Update right section

Finalize header updates
Update share button style
2026-02-22 10:15:46 +00:00 · 2025-11-18 17:31:01 -08:00 · 2025-11-18 17:27:34 -08:00 · 2025-11-18 15:49:35 -08:00 · 2025-11-18 15:47:23 -08:00 · 2025-11-18 15:35:10 -08:00
857 changed files with 51178 additions and 26364 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -0,0 +1,42 @@
+self-hosted-runner:
+  # Labels of self-hosted runner in array of strings.
+  labels:
+    - extras=ecr-cache
+    - extras=s3-cache
+    - hdd=256
+    - runs-on
+    - runner=1cpu-linux-arm64
+    - runner=1cpu-linux-x64
+    - runner=2cpu-linux-arm64
+    - runner=2cpu-linux-x64
+    - runner=4cpu-linux-arm64
+    - runner=4cpu-linux-x64
+    - runner=8cpu-linux-arm64
+    - runner=8cpu-linux-x64
+    - runner=16cpu-linux-arm64
+    - runner=16cpu-linux-x64
+    - ubuntu-slim # Currently in public preview
+    - volume=40gb
+
+# Configuration variables in array of strings defined in your repository or
+# organization. `null` means disabling configuration variables check.
+# Empty array means no configuration variable is allowed.
+config-variables: null
+
+# Configuration for file paths. The keys are glob patterns to match to file
+# paths relative to the repository root. The values are the configurations for
+# the file paths. Note that the path separator is always '/'.
+# The following configurations are available.
+#
+# "ignore" is an array of regular expression patterns. Matched error messages
+# are ignored. This is similar to the "-ignore" command line option.
+paths:
+  # Glob pattern relative to the repository root for matching files. The path separator is always '/'.
+  # This example configures any YAML file under the '.github/workflows/' directory.
+  .github/workflows/**/*.{yml,yaml}:
+    # TODO: These are real and should be fixed eventually.
+    ignore:
+      - 'shellcheck reported issue in this script: SC2038:.+'
+      - 'shellcheck reported issue in this script: SC2046:.+'
+      - 'shellcheck reported issue in this script: SC2086:.+'
+      - 'shellcheck reported issue in this script: SC2193:.+'
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -59,7 +59,7 @@ runs:
  steps:
    - name: Build and push Docker image (Attempt 1 of 3)
      id: buildx1
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      continue-on-error: true
      with:
        context: ${{ inputs.context }}
@@ -86,7 +86,7 @@ runs:
    - name: Build and push Docker image (Attempt 2 of 3)
      id: buildx2
      if: steps.buildx1.outcome != 'success'
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ${{ inputs.context }}
        file: ${{ inputs.file }}
@@ -112,7 +112,7 @@ runs:
    - name: Build and push Docker image (Attempt 3 of 3)
      id: buildx3
      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
      with:
        context: ${{ inputs.context }}
        file: ${{ inputs.file }}
--- a/.github/actions/prepare-build/action.yml
+++ b/.github/actions/prepare-build/action.yml
@@ -1,25 +1,15 @@
 name: "Prepare Build (OpenAPI generation)"
 description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
+inputs:
+  docker-username:
+    required: true
+  docker-password:
+    required: true
 runs:
  using: "composite"
  steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-
-    - name: Setup uv
-      uses: astral-sh/setup-uv@v3
-
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: "3.11"
-
-    - name: Install Python dependencies with uv
-      shell: bash
-      run: |
-        uv pip install --system \
-          -r backend/requirements/default.txt \
-          -r backend/requirements/dev.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Generate OpenAPI schema
      shell: bash
@@ -29,6 +19,15 @@ runs:
      run: |
        python scripts/onyx_openapi_schema.py --filename generated/openapi.json

+    # needed for pulling openapitools/openapi-generator-cli
+    # otherwise, we hit the "Unauthenticated users" limit
+    # https://docs.docker.com/docker-hub/usage/
+    - name: Login to Docker Hub
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs['docker-username'] }}
+        password: ${{ inputs['docker-password'] }}
+
    - name: Generate OpenAPI Python client
      shell: bash
      run: |
@@ -41,10 +40,3 @@ runs:
          --package-name onyx_openapi_client \
          --skip-validate-spec \
          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-
-    - name: Upload OpenAPI artifacts
-      uses: actions/upload-artifact@v4
-      with:
-        name: openapi-artifacts
-        path: backend/generated/
-
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -0,0 +1,17 @@
+name: "Setup Playwright"
+description: "Sets up Playwright and system deps (assumes Python and Playwright are installed)"
+runs:
+  using: "composite"
+  steps:
+    - name: Cache playwright cache
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/ms-playwright
+        key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-playwright-
+
+    - name: Install playwright
+      shell: bash
+      run: |
+        playwright install chromium --with-deps
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -0,0 +1,38 @@
+name: "Setup Python and Install Dependencies"
+description: "Sets up Python with uv and installs deps"
+runs:
+  using: "composite"
+  steps:
+    - name: Setup uv
+      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
+    - name: Cache uv cache directory
+      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+      with:
+        path: ~/.cache/uv
+        key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-uv-
+
+    - name: Setup Python
+      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
+      with:
+        python-version: "3.11"
+
+    - name: Create virtual environment
+      shell: bash
+      run: |
+        uv venv ${{ runner.temp }}/venv
+        echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
+        echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH
+
+    - name: Install Python dependencies with uv
+      shell: bash
+      run: |
+        uv pip install \
+          -r backend/requirements/default.txt \
+          -r backend/requirements/dev.txt \
+          -r backend/requirements/model_server.txt
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -0,0 +1,101 @@
+name: "Slack Notify on Failure"
+description: "Sends a Slack notification when a workflow fails"
+inputs:
+  webhook-url:
+    description: "Slack webhook URL (can also use SLACK_WEBHOOK_URL env var)"
+    required: false
+  failed-jobs:
+    description: "List of failed job names (newline-separated)"
+    required: false
+  title:
+    description: "Title for the notification"
+    required: false
+    default: "🚨 Workflow Failed"
+  ref-name:
+    description: "Git ref name (tag/branch)"
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - name: Send Slack notification
+      shell: bash
+      env:
+        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
+      run: |
+        if [ -z "$SLACK_WEBHOOK_URL" ]; then
+          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
+          exit 0
+        fi
+
+        # Get inputs with defaults
+        FAILED_JOBS="${{ inputs.failed-jobs }}"
+        TITLE="${{ inputs.title }}"
+        REF_NAME="${{ inputs.ref-name }}"
+        REPO="${{ github.repository }}"
+        WORKFLOW="${{ github.workflow }}"
+        RUN_NUMBER="${{ github.run_number }}"
+        RUN_ID="${{ github.run_id }}"
+        SERVER_URL="${{ github.server_url }}"
+        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
+
+        # Use ref_name from input or fall back to github.ref_name
+        if [ -z "$REF_NAME" ]; then
+          REF_NAME="${{ github.ref_name }}"
+        fi
+
+        # Escape JSON special characters
+        escape_json() {
+          local input="$1"
+          # Escape backslashes first (but preserve \n sequences)
+          # Protect \n sequences temporarily
+          input=$(printf '%s' "$input" | sed 's/\\n/\x01NL\x01/g')
+          # Escape remaining backslashes
+          input=$(printf '%s' "$input" | sed 's/\\/\\\\/g')
+          # Restore \n sequences (single backslash, will be correct in JSON)
+          input=$(printf '%s' "$input" | sed 's/\x01NL\x01/\\n/g')
+          # Escape quotes
+          printf '%s' "$input" | sed 's/"/\\"/g'
+        }
+
+        REF_NAME_ESC=$(escape_json "$REF_NAME")
+        FAILED_JOBS_ESC=$(escape_json "$FAILED_JOBS")
+        WORKFLOW_URL_ESC=$(escape_json "$WORKFLOW_URL")
+        TITLE_ESC=$(escape_json "$TITLE")
+
+        # Build JSON payload piece by piece
+        # Note: FAILED_JOBS_ESC already contains \n sequences that should remain as \n in JSON
+        PAYLOAD="{"
+        PAYLOAD="${PAYLOAD}\"text\":\"${TITLE_ESC}\","
+        PAYLOAD="${PAYLOAD}\"blocks\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"header\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"${TITLE_ESC}\"}"
+        PAYLOAD="${PAYLOAD}},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"section\","
+        PAYLOAD="${PAYLOAD}\"fields\":["
+        if [ -n "$REF_NAME" ]; then
+          PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Ref:*\\n${REF_NAME_ESC}\"},"
+        fi
+        PAYLOAD="${PAYLOAD}{\"type\":\"mrkdwn\",\"text\":\"*Run ID:*\\n#${RUN_NUMBER}\"}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+        if [ -n "$FAILED_JOBS" ]; then
+          PAYLOAD="${PAYLOAD},{"
+          PAYLOAD="${PAYLOAD}\"type\":\"section\","
+          PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Failed Jobs:*\\n${FAILED_JOBS_ESC}\"}"
+          PAYLOAD="${PAYLOAD}}"
+        fi
+        PAYLOAD="${PAYLOAD},{"
+        PAYLOAD="${PAYLOAD}\"type\":\"actions\","
+        PAYLOAD="${PAYLOAD}\"elements\":[{"
+        PAYLOAD="${PAYLOAD}\"type\":\"button\","
+        PAYLOAD="${PAYLOAD}\"text\":{\"type\":\"plain_text\",\"text\":\"View Workflow Run\"},"
+        PAYLOAD="${PAYLOAD}\"url\":\"${WORKFLOW_URL_ESC}\""
+        PAYLOAD="${PAYLOAD}}]"
+        PAYLOAD="${PAYLOAD}}"
+        PAYLOAD="${PAYLOAD}]"
+        PAYLOAD="${PAYLOAD}}"
+
+        curl -X POST -H 'Content-type: application/json' \
+          --data "$PAYLOAD" \
+          "$SLACK_WEBHOOK_URL"
+
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,20 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+    assignees:
+      - "jmelahman"
+    labels:
+      - "dependabot:actions"
+  - package-ecosystem: "pip"
+    directory: "/backend"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+    assignees:
+      - "jmelahman"
+    labels:
+      - "dependabot:python"
--- a/.github/runs-on.yml
+++ b/.github/runs-on.yml
@@ -0,0 +1 @@
+_extend: .github-private
--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -1,4 +1,7 @@
 name: Check Lazy Imports
+concurrency:
+  group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -13,12 +16,12 @@ jobs:

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

    - name: Set up Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
      with:
        python-version: '3.11'

    - name: Check lazy imports
-      run: python3 backend/scripts/check_lazy_imports.py
+      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -0,0 +1,934 @@
+name: Build and Push Docker Images on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+  workflow_dispatch:
+
+env:
+  IS_DRY_RUN: ${{ github.event_name == 'workflow_dispatch' }}
+  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+
+jobs:
+  # Determine which components to build based on the tag
+  determine-builds:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
+    outputs:
+      build-web: ${{ steps.check.outputs.build-web }}
+      build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}
+      build-backend: ${{ steps.check.outputs.build-backend }}
+      build-model-server: ${{ steps.check.outputs.build-model-server }}
+      is-cloud-tag: ${{ steps.check.outputs.is-cloud-tag }}
+      is-stable: ${{ steps.check.outputs.is-stable }}
+      is-beta: ${{ steps.check.outputs.is-beta }}
+      is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
+      is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
+      sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
+    steps:
+      - name: Check which components to build and version info
+        id: check
+        run: |
+          TAG="${{ github.ref_name }}"
+          # Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
+          SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
+          IS_CLOUD=false
+          BUILD_WEB=false
+          BUILD_WEB_CLOUD=false
+          BUILD_BACKEND=true
+          BUILD_MODEL_SERVER=true
+          IS_STABLE=false
+          IS_BETA=false
+          IS_STABLE_STANDALONE=false
+          IS_BETA_STANDALONE=false
+
+          if [[ "$TAG" == *cloud* ]]; then
+            IS_CLOUD=true
+            BUILD_WEB_CLOUD=true
+          else
+            BUILD_WEB=true
+          fi
+
+          # Version checks (for web - any stable version)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            IS_STABLE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
+            IS_BETA=true
+          fi
+
+          # Version checks (for backend/model-server - stable version excluding cloud tags)
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_STABLE_STANDALONE=true
+          fi
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
+            IS_BETA_STANDALONE=true
+          fi
+
+          {
+            echo "build-web=$BUILD_WEB"
+            echo "build-web-cloud=$BUILD_WEB_CLOUD"
+            echo "build-backend=$BUILD_BACKEND"
+            echo "build-model-server=$BUILD_MODEL_SERVER"
+            echo "is-cloud-tag=$IS_CLOUD"
+            echo "is-stable=$IS_STABLE"
+            echo "is-beta=$IS_BETA"
+            echo "is-stable-standalone=$IS_STABLE_STANDALONE"
+            echo "is-beta-standalone=$IS_BETA_STANDALONE"
+            echo "sanitized-tag=$SANITIZED_TAG"
+          } >> "$GITHUB_OUTPUT"
+
+  build-web-amd64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-amd64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push AMD64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-amd64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  build-web-arm64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - run-id=${{ github.run_id }}-web-arm64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push ARM64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-arm64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  merge-web:
+    needs:
+      - determine-builds
+      - build-web-amd64
+      - build-web-arm64
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-merge-web
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}
+
+      - name: Create and push manifest
+        run: |
+          IMAGES="${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-web-amd64.outputs.digest }} ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-web-arm64.outputs.digest }}"
+          docker buildx imagetools create \
+            $(printf '%s\n' "${{ steps.meta.outputs.tags }}" | xargs -I {} echo -t {}) \
+            $IMAGES
+
+  build-web-cloud-amd64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web-cloud == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-x64
+      - run-id=${{ github.run_id }}-web-cloud-amd64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push AMD64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NEXT_PUBLIC_CLOUD_ENABLED=true
+            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
+            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
+            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
+            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  build-web-cloud-arm64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-web-cloud == 'true'
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - run-id=${{ github.run_id }}-web-cloud-arm64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push ARM64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NEXT_PUBLIC_CLOUD_ENABLED=true
+            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
+            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
+            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
+            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
+            NODE_OPTIONS=--max-old-space-size=8192
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  merge-web-cloud:
+    needs:
+      - determine-builds
+      - build-web-cloud-amd64
+      - build-web-cloud-arm64
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-merge-web-cloud
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('web-cloud-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+
+      - name: Create and push manifest
+        run: |
+          IMAGES="${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-web-cloud-amd64.outputs.digest }} ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-web-cloud-arm64.outputs.digest }}"
+          docker buildx imagetools create \
+            $(printf '%s\n' "${{ steps.meta.outputs.tags }}" | xargs -I {} echo -t {}) \
+            $IMAGES
+
+  build-backend-amd64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-backend == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-backend-amd64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push AMD64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-amd64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  build-backend-arm64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-backend == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-backend-arm64
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push ARM64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-arm64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  merge-backend:
+    needs:
+      - determine-builds
+      - build-backend-amd64
+      - build-backend-arm64
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-merge-backend
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('backend-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Create and push manifest
+        run: |
+          IMAGES="${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-backend-amd64.outputs.digest }} ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-backend-arm64.outputs.digest }}"
+          docker buildx imagetools create \
+            $(printf '%s\n' "${{ steps.meta.outputs.tags }}" | xargs -I {} echo -t {}) \
+            $IMAGES
+
+  build-model-server-amd64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-model-server == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-model-server-amd64
+      - volume=40gb
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push AMD64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-amd64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  build-model-server-arm64:
+    needs: determine-builds
+    if: needs.determine-builds.outputs.build-model-server == 'true'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-model-server-arm64
+      - volume=40gb
+      - extras=ecr-cache
+    outputs:
+      digest: ${{ steps.build.outputs.digest }}
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push ARM64
+        id: build
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          cache-from: |
+            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64
+          cache-to: |
+            type=inline
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-arm64,mode=max
+          outputs: type=image,name=${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+
+  merge-model-server:
+    needs:
+      - determine-builds
+      - build-model-server-amd64
+      - build-model-server-arm64
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-x64
+      - run-id=${{ github.run_id }}-merge-model-server
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
+        with:
+          images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.event_name == 'workflow_dispatch' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-stable-standalone == 'true' && 'latest' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ github.event_name != 'workflow_dispatch' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}
+
+      - name: Create and push manifest
+        run: |
+          IMAGES="${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-model-server-amd64.outputs.digest }} ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}@${{ needs.build-model-server-arm64.outputs.digest }}"
+          docker buildx imagetools create \
+            $(printf '%s\n' "${{ steps.meta.outputs.tags }}" | xargs -I {} echo -t {}) \
+            $IMAGES
+
+  trivy-scan-web:
+    needs:
+      - determine-builds
+      - merge-web
+    if: needs.merge-web.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-trivy-scan-web
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-web-cloud:
+    needs:
+      - determine-builds
+      - merge-web-cloud
+    if: needs.merge-web-cloud.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-trivy-scan-web-cloud
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:web-cloud-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  trivy-scan-backend:
+    needs:
+      - determine-builds
+      - merge-backend
+    if: needs.merge-backend.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-trivy-scan-backend
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:backend-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              --ignorefile /tmp/.trivyignore \
+              ${SCAN_IMAGE}
+
+  trivy-scan-model-server:
+    needs:
+      - determine-builds
+      - merge-model-server
+    if: needs.merge-model-server.result == 'success'
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - run-id=${{ github.run_id }}-trivy-scan-model-server
+      - extras=ecr-cache
+    env:
+      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Run Trivy vulnerability scanner
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+              SCAN_IMAGE="${{ env.RUNS_ON_ECR_CACHE }}:model-server-${{ needs.determine-builds.outputs.sanitized-tag }}"
+            else
+              SCAN_IMAGE="docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}"
+            fi
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              ${SCAN_IMAGE}
+
+  notify-slack-on-failure:
+    needs:
+      - build-web-amd64
+      - build-web-arm64
+      - merge-web
+      - build-web-cloud-amd64
+      - build-web-cloud-arm64
+      - merge-web-cloud
+      - build-backend-amd64
+      - build-backend-arm64
+      - merge-backend
+      - build-model-server-amd64
+      - build-model-server-arm64
+      - merge-model-server
+    if: always() && (needs.build-web-amd64.result == 'failure' || needs.build-web-arm64.result == 'failure' || needs.merge-web.result == 'failure' || needs.build-web-cloud-amd64.result == 'failure' || needs.build-web-cloud-arm64.result == 'failure' || needs.merge-web-cloud.result == 'failure' || needs.build-backend-amd64.result == 'failure' || needs.build-backend-arm64.result == 'failure' || needs.merge-backend.result == 'failure' || needs.build-model-server-amd64.result == 'failure' || needs.build-model-server-arm64.result == 'failure' || needs.merge-model-server.result == 'failure') && github.event_name != 'workflow_dispatch'
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
+    steps:
+      - name: Checkout
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Determine failed jobs
+        id: failed-jobs
+        shell: bash
+        run: |
+          FAILED_JOBS=""
+          if [ "${{ needs.build-web-amd64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-amd64\\n"
+          fi
+          if [ "${{ needs.build-web-arm64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-arm64\\n"
+          fi
+          if [ "${{ needs.merge-web.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• merge-web\\n"
+          fi
+          if [ "${{ needs.build-web-cloud-amd64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud-amd64\\n"
+          fi
+          if [ "${{ needs.build-web-cloud-arm64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-web-cloud-arm64\\n"
+          fi
+          if [ "${{ needs.merge-web-cloud.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• merge-web-cloud\\n"
+          fi
+          if [ "${{ needs.build-backend-amd64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-backend-amd64\\n"
+          fi
+          if [ "${{ needs.build-backend-arm64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-backend-arm64\\n"
+          fi
+          if [ "${{ needs.merge-backend.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• merge-backend\\n"
+          fi
+          if [ "${{ needs.build-model-server-amd64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-model-server-amd64\\n"
+          fi
+          if [ "${{ needs.build-model-server-arm64.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• build-model-server-arm64\\n"
+          fi
+          if [ "${{ needs.merge-model-server.result }}" == "failure" ]; then
+            FAILED_JOBS="${FAILED_JOBS}• merge-model-server\\n"
+          fi
+          # Remove trailing \n and set output
+          FAILED_JOBS=$(printf '%s' "$FAILED_JOBS" | sed 's/\\n$//')
+          echo "jobs=$FAILED_JOBS" >> "$GITHUB_OUTPUT"
+
+      - name: Send Slack notification
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
+          title: "🚨 Deployment Workflow Failed"
+          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -1,186 +0,0 @@
-name: Build and Push Backend Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-jobs:
-  build-and-push:
-    # TODO: investigate a matrix build like the web container
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-          
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-            
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Install build-essential
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y build-essential
-
-      - name: Backend Image Docker Build and Push
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Export digest      
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-          
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build-and-push
-    steps:
-      # Needed for trivyignore
-      - name: Checkout
-        uses: actions/checkout@v4
-      
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: backend-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-          
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      # Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
-      # Security: No Docker socket mount needed for remote registry scanning
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              --ignorefile /tmp/.trivyignore \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -1,158 +0,0 @@
-name: Build and Push Cloud Web Image on Tag
-# Identical to the web container build, but with correct image tag and build args
-
-on:
-  push:
-    tags:
-      - "*cloud*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
-  DEPLOYMENT: cloud
-  
-jobs:
-  build:
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NEXT_PUBLIC_CLOUD_ENABLED=true
-            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
-            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
-            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
-            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
-            NEXT_PUBLIC_GTM_ENABLED=true
-            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
-            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
-            NODE_OPTIONS=--max-old-space-size=8192
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build
-    steps:
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: cloudweb-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -1,197 +0,0 @@
-name: Build and Push Model Server Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
-  DOCKER_BUILDKIT: 1
-  BUILDKIT_PROGRESS: plain
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-  
-jobs:
-
-#   Bypassing this for now as the idea of not building is glitching
-#   releases and builds that depends on everything being tagged in docker
-#   1) Preliminary job to check if the changed files are relevant
-#   check_model_server_changes:
-#     runs-on: ubuntu-latest
-#     outputs:
-#       changed: ${{ steps.check.outputs.changed }}
-#     steps:
-#       - name: Checkout code
-#         uses: actions/checkout@v4
-# 
-#       - name: Check if relevant files changed
-#         id: check
-#         run: |
-#           # Default to "false"
-#           echo "changed=false" >> $GITHUB_OUTPUT
-# 
-#           # Compare the previous commit (github.event.before) to the current one (github.sha)
-#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-#           # set changed=true
-#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-#             echo "changed=true" >> $GITHUB_OUTPUT
-#           fi
-
-  check_model_server_changes:
-    runs-on: ubuntu-latest
-    outputs:
-      changed: "true"
-    steps:
-      - name: Bypass check and set output
-        run: echo "changed=true" >> $GITHUB_OUTPUT
-        
-  build-amd64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
-    env:
-      PLATFORM_PAIR: linux-amd64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push AMD64
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-#           no-cache: true
-
-  build-arm64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on:
-      [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
-    env:
-      PLATFORM_PAIR: linux-arm64
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: System Info
-        run: |
-          df -h
-          free -h
-          docker system prune -af --volumes
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          driver-opts: |
-            image=moby/buildkit:latest
-            network=host
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push ARM64
-        uses: docker/build-push-action@v6
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          push: true
-          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-          outputs: type=registry
-          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-  merge-and-scan:
-    needs: [build-amd64, build-arm64, check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create and Push Multi-arch Manifest
-        run: |
-          docker buildx create --use
-          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-          if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi
-
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -1,192 +0,0 @@
-name: Build and Push Web Image on Tag
-
-on:
-  push:
-    tags:
-      - "*"
-
-env:
-  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
-  DEPLOYMENT: standalone
-
-jobs:
-  precheck:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    outputs:
-      should-run: ${{ steps.set-output.outputs.should-run }}
-    steps:
-      - name: Check if tag contains "cloud"
-        id: set-output
-        run: |
-          if [[ "${{ github.ref_name }}" == *cloud* ]]; then
-            echo "should-run=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "should-run=true" >> "$GITHUB_OUTPUT"
-          fi
-  build:
-    needs: precheck
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-
-    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: ${{ matrix.platform }}
-          push: true
-          build-args: |
-            ONYX_VERSION=${{ github.ref_name }}
-            NODE_OPTIONS=--max-old-space-size=8192
-
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-          
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    needs:
-      - build
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: web-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-
-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
-      - name: Run Trivy vulnerability scanner
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 30
-          max_attempts: 3
-          retry_wait_seconds: 10
-          command: |
-            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
-              image \
-              --skip-version-check \
-              --timeout 20m \
-              --severity CRITICAL,HIGH \
-              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -0,0 +1,41 @@
+# This workflow is set up to be manually triggered via the GitHub Action tab.
+# Given a version, it will tag those backend and webserver images as "beta".
+
+name: Tag Beta Version
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "The version (ie v1.0.0-beta.0) to tag as beta"
+        required: true
+
+jobs:
+  tag:
+    # See https://runs-on.com/runners/linux/
+    # use a lower powered instance since this just does i/o to docker hub
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Enable Docker CLI experimental features
+        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
+
+      - name: Pull, Tag and Push Web Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
+
+      - name: Pull, Tag and Push API Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
+
+      - name: Pull, Tag and Push Model Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -14,13 +14,13 @@ jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v1
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -14,12 +14,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Helm CLI
-        uses: azure/setup-helm@v4
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
        with:
          version: v3.12.1

@@ -43,7 +43,7 @@ jobs:
          done

      - name: Publish Helm charts to gh-pages
-        uses: stefanprodan/helm-gh-pages@v1.7.0
+        uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -7,12 +7,12 @@ permissions:
  # contents: write # only for delete-branch option
  issues: write
  pull-requests: write
-  
+
 jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/stale@v9
+      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # ratchet:actions/stale@v9
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
@@ -20,4 +20,3 @@ jobs:
          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
          days-before-stale: 75
 #           days-before-close: 90  # uncomment after we test stale behavior
-          
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -16,18 +16,18 @@ permissions:
  actions: read
  contents: read
  security-events: write
-  
+
 jobs:
  scan-licenses:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
-        
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: '3.11'
          cache: 'pip'
@@ -35,7 +35,7 @@ jobs:
            backend/requirements/default.txt
            backend/requirements/dev.txt
            backend/requirements/model_server.txt
-      
+
      - name: Get explicit and transitive dependencies
        run: |
          python -m pip install --upgrade pip
@@ -43,28 +43,28 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
          pip freeze > requirements-all.txt
-                    
+
      - name: Check python
        id: license_check_report
-        uses: pilosus/action-pip-license-checker@v2
+        uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
        with:
          requirements: 'requirements-all.txt'
          fail: 'Copyleft'
          exclude: '(?i)^(pylint|aio[-_]*).*'
-          
+
      - name: Print report
        if: always()
        run: echo "${{ steps.license_check_report.outputs.report }}"
-      
+
      - name: Install npm dependencies
        working-directory: ./web
        run: npm ci

        # be careful enabling the sarif and upload as it may spam the security tab
-        # with a huge amount of items. Work out the issues before enabling upload.       
+        # with a huge amount of items. Work out the issues before enabling upload.
 #       - name: Run Trivy vulnerability scanner in repo mode
 #         if: always()
-#         uses: aquasecurity/trivy-action@0.29.0
+#         uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
 #         with:
 #           scan-type: fs
 #           scan-ref: .
@@ -73,7 +73,7 @@ jobs:
 #           severity: HIGH,CRITICAL
 # #           format: sarif
 # #           output: trivy-results.sarif
-# 
+#
 # #       - name: Upload Trivy scan results to GitHub Security tab
 # #         uses: github/codeql-action/upload-sarif@v3
 # #         with:
@@ -81,14 +81,14 @@ jobs:

  scan-trivy:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-      
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
+
    steps:
    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
-      uses: docker/login-action@v3
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
      with:
        username: ${{ secrets.DOCKER_USERNAME }}
        password: ${{ secrets.DOCKER_TOKEN }}
@@ -98,7 +98,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-backend:latest

    - name: Run Trivy vulnerability scanner on backend
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -112,9 +112,9 @@ jobs:
    # Web server
    - name: Pull web server docker image
      run: docker pull onyxdotapp/onyx-web-server:latest
-          
+
    - name: Run Trivy vulnerability scanner on web server
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -130,7 +130,7 @@ jobs:
      run: docker pull onyxdotapp/onyx-model-server:latest

    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@0.29.0
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
      env:
        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
@@ -139,4 +139,4 @@ jobs:
        scanners: license
        severity: HIGH,CRITICAL
        vuln-type: library
-        exit-code: 0
+        exit-code: 0
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -1,4 +1,7 @@
 name: External Dependency Unit Tests
+concurrency:
+  group: External-Dependency-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -27,13 +30,14 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: ubuntu-latest
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
-      
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
      - name: Discover test directories
        id: set-matrix
        run: |
@@ -44,8 +48,11 @@ jobs:
  external-dependency-unit-tests:
    needs: discover-test-dirs
    # Use larger runner with more resources for Vespa
-    runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=s3-cache
    strategy:
      fail-fast: false
      matrix:
@@ -56,42 +63,31 @@ jobs:
      MODEL_SERVER_HOST: "disabled"

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
+
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index

-      - name: Wait for services
-        run: |
-          echo "Waiting for services to be ready..."
-          sleep 30
-          
-          # Wait for Vespa specifically
-          echo "Waiting for Vespa to be ready..."
-          timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
-          
-          echo "Services should be ready now"
-
      - name: Run migrations
        run: |
          cd backend
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -1,30 +1,33 @@
 name: Helm - Lint and Test Charts
+concurrency:
+  group: Helm-Lint-and-Test-Charts-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
  pull_request:
    branches: [ main ]
  workflow_dispatch:  # Allows manual triggering
-  
+
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
      with:
        fetch-depth: 0
-        
+
    - name: Set up Helm
-      uses: azure/setup-helm@v4.2.0
+      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
      with:
-        version: v3.17.0
-      
+        version: v3.19.0
+
    - name: Set up chart-testing
-      uses: helm/chart-testing-action@v2.7.0
+      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0

    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
    - name: Run chart-testing (list-changed)
@@ -41,7 +44,7 @@ jobs:
 #     - name: Force run chart-testing (list-changed)
 #       id: list-changed
 #       run: echo "changed=true" >> $GITHUB_OUTPUT
-        
+
    # lint all charts if any changes were detected
    - name: Run chart-testing (lint)
      if: steps.list-changed.outputs.changed == 'true'
@@ -51,7 +54,7 @@ jobs:

    - name: Create kind cluster
      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@v1.12.0
+      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

    - name: Pre-install cluster status check
      if: steps.list-changed.outputs.changed == 'true'
@@ -118,7 +121,7 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      run: |
        echo "=== Starting chart installation with monitoring ==="
-        
+
        # Function to monitor cluster state
        monitor_cluster() {
          while true; do
@@ -140,11 +143,11 @@ jobs:
            sleep 60
          done
        }
-        
+
        # Start monitoring in background
        monitor_cluster &
        MONITOR_PID=$!
-        
+
        # Set up cleanup
        cleanup() {
          echo "=== Cleaning up monitoring process ==="
@@ -153,10 +156,10 @@ jobs:
          kubectl get pods --all-namespaces
          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
        }
-        
+
        # Trap cleanup on exit
        trap cleanup EXIT
-        
+
        # Run the actual installation with detailed logging
        echo "=== Starting ct install ==="
        set +e
@@ -214,15 +217,15 @@ jobs:
        echo "=== Final cluster state ==="
        kubectl get pods --all-namespaces
        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
-        
+
        echo "=== Pod descriptions for debugging ==="
        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
-        
+
        echo "=== Recent logs for debugging ==="
        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

        echo "=== Helm releases ==="
        helm list --all-namespaces
-      # the following would install only changed charts, but we only have one chart so 
+      # the following would install only changed charts, but we only have one chart so
      # don't worry about that for now
      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -11,11 +11,6 @@ on:
      - "release/**"

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -31,15 +26,17 @@ env:
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
+  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -61,105 +58,92 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build

  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}


  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          provenance: false
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max


  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

-      - name: Build and push integration test Docker image
-        uses: useblacksmith/build-push-action@v2
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          context: ./backend
-          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push integration test image with Docker Bake
+        env:
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration

  integration-tests:
    needs:
@@ -169,7 +153,11 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache

    strategy:
      fail-fast: false
@@ -177,43 +165,19 @@ jobs:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-
-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
-
-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
@@ -225,7 +189,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
@@ -252,15 +217,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -269,7 +234,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -281,7 +246,7 @@ jobs:
            -p mock-it-services-stack up -d

      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
@@ -318,7 +283,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -337,18 +302,12 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v
-

  multitenant-tests:
    needs:
@@ -357,35 +316,19 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-          wait
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      - name: Start Docker containers for multi-tenant tests
        run: |
          cd deployment/docker_compose
@@ -394,7 +337,8 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          DEV_MODE=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
@@ -457,9 +401,8 @@ jobs:
            -e SKIP_RESET=true \
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
-            -e IMAGE_TAG=test \
            -e DEV_MODE=true \
-            onyxdotapp/onyx-integration:test \
+            ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
@@ -476,7 +419,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
@@ -487,12 +430,13 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml down -v

-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@v7
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
        with:
          script: |
            const needs = ${{ toJSON(needs) }};
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -11,12 +11,14 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
@@ -28,7 +30,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -27,7 +27,7 @@ jobs:
            echo "::error::❌ Your PR title does not follow the Conventional Commits format.
              This check ensures that all pull requests use clear, consistent titles that help automate changelogs and improve project history.

-              Please update your PR title to follow the Conventional Commits style.  
+              Please update your PR title to follow the Conventional Commits style.
              Here is a link to a blog explaining the reason why we've included the Conventional Commits style into our PR titles: https://xfuture-blog.com/working-with-conventional-commits

              **Here are some examples of valid PR titles:**
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -1,4 +1,7 @@
 name: Ensure PR references Linear
+concurrency:
+  group: Ensure-PR-references-Linear-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  pull_request:
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -8,11 +8,6 @@ on:
    types: [checks_requested]

 env:
-  # Private Registry Configuration
-  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
-  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
-  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
-
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -31,12 +26,13 @@ env:

 jobs:
  discover-test-dirs:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Discover test directories
        id: set-matrix
@@ -58,106 +54,89 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-  prepare-build:
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Prepare build
-        uses: ./.github/actions/prepare-build
-
  build-backend-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-
  build-model-server-image:
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          provenance: false
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max

  build-integration-image:
-    needs: prepare-build
-    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
-
-      - name: Download OpenAPI artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: openapi-artifacts
-          path: backend/generated/
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

-      - name: Build and push integration test Docker image
-        uses: useblacksmith/build-push-action@v2
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          context: ./backend
-          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

+      - name: Build and push integration test image with Docker Bake
+        env:
+          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          TAG: integration-test-${{ github.run_id }}
+        run: cd backend && docker buildx bake --push integration

  integration-tests-mit:
    needs:
@@ -167,8 +146,11 @@ jobs:
        build-model-server-image,
        build-integration-image,
      ]
-    # See https://docs.blacksmith.sh/blacksmith-runners/overview
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
+      - extras=ecr-cache

    strategy:
      fail-fast: false
@@ -176,43 +158,19 @@ jobs:
        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Login to Private Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PRIVATE_REGISTRY }}
-          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
-          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from registry in parallel
-          echo "Pulling Docker images in parallel..."
-          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
-
-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
-
-          # Re-tag to remove registry prefix for docker-compose
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
-          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
-
      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
@@ -223,7 +181,8 @@ jobs:
          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
@@ -249,15 +208,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -266,7 +225,7 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
@@ -279,7 +238,7 @@ jobs:

      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
-        uses: nick-fields/retry@v3
+        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
@@ -316,7 +275,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-              onyxdotapp/onyx-integration:test \
+              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
@@ -335,25 +294,20 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

-  
-  required: 
-    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+  required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
    needs: [integration-tests-mit]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@v7
+      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
        with:
          script: |
            const needs = ${{ toJSON(needs) }};
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -6,13 +6,6 @@ concurrency:
 on: push

 env:
-  # AWS ECR Configuration
-  AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
-  ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
-  BUILDX_NO_DEFAULT_ATTESTATIONS: 1
-  
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -23,164 +16,153 @@ env:
  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}

+  # for MCP Oauth tests
+  MCP_OAUTH_CLIENT_ID: ${{ secrets.MCP_OAUTH_CLIENT_ID }}
+  MCP_OAUTH_CLIENT_SECRET: ${{ secrets.MCP_OAUTH_CLIENT_SECRET }}
+  MCP_OAUTH_ISSUER: ${{ secrets.MCP_OAUTH_ISSUER }}
+  MCP_OAUTH_JWKS_URI: ${{ secrets.MCP_OAUTH_JWKS_URI }}
+  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
+  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
+
  MOCK_LLM_RESPONSE: true
+  MCP_TEST_SERVER_PORT: 8004
+  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp
+  MCP_TEST_SERVER_BIND_HOST: 0.0.0.0
+  MCP_TEST_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_HOST: 0.0.0.0
+  MCP_SERVER_PUBLIC_HOST: host.docker.internal
+  MCP_SERVER_PUBLIC_URL: http://host.docker.internal:8004/mcp

 jobs:
  build-web-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

-      - name: Build and push Web Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-backend-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Backend Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  build-model-server-image:
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Set up Docker Buildx
-        uses: useblacksmith/setup-docker-builder@v1
-
-      - name: Build and push Model Server Docker image
-        uses: useblacksmith/build-push-action@v2
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/arm64
-          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
-          provenance: false
-          sbom: false
-          push: true
-          outputs: type=registry
-          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
-
-  playwright-tests:
-    needs: [build-web-image, build-backend-image, build-model-server-image]
-    name: Playwright Tests
-    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      # needed for pulling Vespa, Redis, Postgres, and Minio images
-      # otherwise, we hit the "Unauthenticated users" limit
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Pull Docker images
-        run: |
-          # Pull all images from ECR in parallel
-          echo "Pulling Docker images in parallel..."
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
-          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
+      - name: Build and push Web Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-          # Wait for all background jobs to complete
-          wait
-          echo "All Docker images pulled successfully"
+  build-backend-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

-          # Re-tag with expected names for docker-compose
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
-          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Backend Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  build-model-server-image:
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push Model Server Docker image
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          push: true
+          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
+          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
+          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
+
+  playwright-tests:
+    needs: [build-web-image, build-backend-image, build-model-server-image]
+    name: Playwright Tests (${{ matrix.project }})
+    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
+    strategy:
+      fail-fast: false
+      matrix:
+        project: [admin, no-auth, exclusive]
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        with:
+          fetch-depth: 0

      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
+          cache: 'npm'
+          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
        working-directory: ./web
        run: npm ci

+      - name: Cache playwright cache
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        with:
+          path: ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-playwright-npm-
+
      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps
@@ -194,13 +176,24 @@ jobs:
          EXA_API_KEY=${{ env.EXA_API_KEY }}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
-          IMAGE_TAG=test
+          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
+          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
+          ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          EOF

+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
@@ -215,15 +208,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -232,24 +225,47 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."

+      - name: Wait for MCP OAuth mock server
+        run: |
+          echo "Waiting for MCP OAuth mock server on port ${MCP_TEST_SERVER_PORT:-8004}..."
+          start_time=$(date +%s)
+          timeout=120
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. MCP OAuth mock server did not become ready in ${timeout}s."
+              exit 1
+            fi
+
+            if curl -sf "http://localhost:${MCP_TEST_SERVER_PORT:-8004}/healthz" > /dev/null; then
+              echo "MCP OAuth mock server is ready!"
+              break
+            fi
+
+            sleep 3
+          done
+
      - name: Run Playwright tests
        working-directory: ./web
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
-          npx playwright test
+          npx playwright test --project ${{ matrix.project }}

-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
        with:
-          # Includes test results and debug screenshots
-          name: playwright-test-results-${{ github.run_id }}
-          path: ./web/test-results
+          # Includes test results and trace.zip files
+          name: playwright-test-results-${{ matrix.project }}-${{ github.run_id }}
+          path: ./web/test-results/
          retention-days: 30

      # save before stopping the containers so the logs can be captured
@@ -262,15 +278,11 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
-          name: docker-logs
+          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

-      - name: Stop Docker containers
-        run: |
-          cd deployment/docker_compose
-          docker compose down -v

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
@@ -289,12 +301,12 @@ jobs:
 #     ]
 #   steps:
 #     - name: Checkout code
-#       uses: actions/checkout@v4
+#       uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
 #       with:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@v4
+#       uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

@@ -303,7 +315,7 @@ jobs:
 #       run: npm ci

 #     - name: Download Playwright test results
-#       uses: actions/download-artifact@v4
+#       uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # ratchet:actions/download-artifact@v4
 #       with:
 #         name: test-results
 #         path: ./web/test-results
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -1,4 +1,7 @@
 name: Python Checks
+concurrency:
+  group: Python-Checks-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -10,58 +13,51 @@ on:
 jobs:
  mypy-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    # Note: Mypy seems quite optimized for x64 compared to arm64.
+    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - name: Checkout code
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-    - run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+      # needed for pulling openapitools/openapi-generator-cli
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Generate OpenAPI schema
-      working-directory: ./backend
-      env:
-        PYTHONPATH: "."
-      run: |
-        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+      - name: Prepare build
+        uses: ./.github/actions/prepare-build
+        with:
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Generate OpenAPI Python client
-      working-directory: ./backend
-      run: |
-        docker run --rm \
-          -v "${{ github.workspace }}/backend/generated:/local" \
-          openapitools/openapi-generator-cli generate \
-          -i /local/openapi.json \
-          -g python \
-          -o /local/onyx_openapi_client \
-          --package-name onyx_openapi_client \
-          --skip-validate-spec \
-          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-            
-    - name: Run MyPy
-      run: |
-        cd backend
-        mypy .
+      - name: Cache mypy cache
+        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        with:
+          path: backend/.mypy_cache
+          key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
+          restore-keys: |
+            mypy-${{ runner.os }}-

-    - name: Check import order with reorder-python-imports
-      run: |
-        cd backend
-        find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+      - name: Run MyPy
+        working-directory: ./backend
+        env:
+          MYPY_FORCE_COLOR: 1
+          TERM: xterm-256color
+        run: mypy .

-    - name: Check code formatting with Black
-      run: |
-        cd backend
-        black --check .
+      - name: Check import order with reorder-python-imports
+        working-directory: ./backend
+        run: |
+          find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+
+      - name: Check code formatting with Black
+        working-directory: ./backend
+        run: black --check .
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -1,4 +1,7 @@
 name: Connector Tests
+concurrency:
+  group: Connector-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -119,35 +122,26 @@ env:
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]

    env:
      PYTHONPATH: ./backend

    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies

-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
+      - name: Setup Playwright
+        uses: ./.github/actions/setup-playwright

      - name: Detect Connector changes
        id: changes
-        uses: dorny/paths-filter@v3
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
        with:
          filters: |
            hubspot:
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -10,7 +10,7 @@ on:
        description: 'Branch to run the workflow on'
        required: false
        default: 'main'
-        
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -28,17 +28,17 @@ env:
 jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]

    env:
      PYTHONPATH: ./backend

    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

      - name: Login to Docker Hub
-        uses: docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -53,9 +53,9 @@ jobs:
        run: |
          docker pull onyxdotapp/onyx-model-server:latest
          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-          
+
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: "pip"
@@ -90,15 +90,15 @@ jobs:
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
-            
+
            if [ $elapsed_time -ge $timeout ]; then
              echo "Timeout reached. Service did not become ready in 5 minutes."
              exit 1
            fi
-            
+
            # Use curl with error handling to ignore specific exit code 56
            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-            
+
            if [ "$response" = "200" ]; then
              echo "Service is ready!"
              break
@@ -107,11 +107,11 @@ jobs:
            else
              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
            fi
-            
+
            sleep 5
          done
          echo "Finished waiting for service."
-          
+
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -127,7 +127,7 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
-            
+
      - name: Dump all-container logs (optional)
        if: always()
        run: |
@@ -136,14 +136,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
-          
-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml down -v
-          
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -1,4 +1,7 @@
 name: Python Unit Tests
+concurrency:
+  group: Python-Unit-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true

 on:
  merge_group:
@@ -10,7 +13,8 @@ on:
 jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
+

    env:
      PYTHONPATH: ./backend
@@ -18,27 +22,15 @@ jobs:
      SF_USERNAME: ${{ secrets.SF_USERNAME }}
      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
-      
+
    steps:
+    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4

-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-
-    - name: Install Dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+    - name: Setup Python and Install Dependencies
+      uses: ./.github/actions/setup-python-and-install-dependencies

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -10,14 +10,17 @@ on:
 jobs:
  quality-checks:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
    steps:
-      - uses: actions/checkout@v4
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          fetch-depth: 0
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
-      - uses: pre-commit/action@v3.0.1
+      - name: Setup Terraform
+        uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
+      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
        with:
          extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -0,0 +1,47 @@
+name: Sync FOSS Repo
+
+on:
+  schedule:
+    # Run daily at 3am PT (11am UTC during PST)
+    - cron: '0 11 * * *'
+  workflow_dispatch:
+
+jobs:
+  sync-foss:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout main Onyx repo
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install git-filter-repo
+        run: |
+          sudo apt-get update && sudo apt-get install -y git-filter-repo
+
+      - name: Configure SSH for deploy key
+        env:
+          FOSS_REPO_DEPLOY_KEY: ${{ secrets.FOSS_REPO_DEPLOY_KEY }}
+        run: |
+          mkdir -p ~/.ssh
+          echo "$FOSS_REPO_DEPLOY_KEY" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+          ssh-keyscan github.com >> ~/.ssh/known_hosts
+
+      - name: Set Git config
+        run: |
+          git config --global user.name "onyx-bot"
+          git config --global user.email "bot@onyx.app"
+
+      - name: Build FOSS version
+        run: bash backend/scripts/make_foss_repo.sh
+
+      - name: Push to FOSS repo
+        env:
+          FOSS_REPO_URL: git@github.com:onyx-dot-app/onyx-foss.git
+        run: |
+          cd /tmp/foss_repo
+          git remote add public "$FOSS_REPO_URL"
+          git push --force public main
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -9,7 +9,7 @@ permissions:

 jobs:
  create-and-push-tag:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
@@ -19,7 +19,7 @@ jobs:
      # Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
      # and not rkuo's personal account. It is fine to leave this key as is!
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
        with:
          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"

--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ backend/tests/regression/search_quality/eval-*
 backend/tests/regression/search_quality/search_eval_config.yaml
 backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
+backend/onyx/evals/one_off/*.json
 *.log

 # secret files
@@ -31,6 +32,10 @@ settings.json
 /backend/tests/regression/answer_quality/search_test_config.yaml
 *.egg-info

+# Claude
+AGENTS.md
+CLAUDE.md
+
 # Local .terraform directories
 **/.terraform/*

@@ -40,3 +45,6 @@ settings.json

 # Local .terraform.lock.hcl file
 .terraform.lock.hcl
+
+# MCP configs
+.playwright-mcp
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,15 @@
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-yaml
+        files: ^.github/
+
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.8
+    hooks:
+      - id: actionlint
+
  - repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
@@ -29,6 +40,7 @@ repos:
    rev: v0.11.4
    hooks:
      - id: ruff
+
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v3.1.0
    hooks:
@@ -47,12 +59,18 @@ repos:

  - repo: local
    hooks:
+      - id: terraform-fmt
+        name: terraform fmt
+        entry: terraform fmt -recursive
+        language: system
+        pass_filenames: false
+        files: \.tf$
+
      - id: check-lazy-imports
-        name: Check lazy imports are not directly imported
+        name: Check lazy imports
        entry: python3 backend/scripts/check_lazy_imports.py
        language: system
-        files: ^backend/.*\.py$
-        pass_filenames: false
+        files: ^backend/(?!\.venv/).*\.py$

  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -1,6 +1,6 @@
 # Copy this file to .env in the .vscode folder
 # Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
-# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
+# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes

 # For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled
@@ -37,8 +37,8 @@ OPENAI_API_KEY=<REPLACE THIS>
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o

-# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
-# Only needed if using DanswerBot
+# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
+# Only needed if using OnyxBot
 #ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
 #ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>

@@ -75,4 +75,9 @@ SHOW_EXTRA_CONNECTORS=True
 LANGSMITH_TRACING="true"
 LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 LANGSMITH_API_KEY=<REPLACE_THIS>
-LANGSMITH_PROJECT=<REPLACE_THIS>
+LANGSMITH_PROJECT=<REPLACE_THIS>
+
+# Local Confluence OAuth testing
+# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
+# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
+# NEXT_PUBLIC_TEST_ENV=True
--- a/AGENTS.md.template
+++ b/AGENTS.md.template
@@ -194,13 +194,15 @@ alembic -n schema_private upgrade head

 ### Creating Migrations
 ```bash
-# Auto-generate migration
-alembic revision --autogenerate -m "description"
+# Create migration
+alembic revision -m "description"

 # Multi-tenant migration
-alembic -n schema_private revision --autogenerate -m "description"
+alembic -n schema_private revision -m "description"
 ```

+Write the migration manually and place it in the file that alembic creates when running the above command.
+
 ## Testing Strategy

 There are 4 main types of tests within Onyx:
--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -197,15 +197,19 @@ alembic -n schema_private upgrade head

 ### Creating Migrations
 ```bash
-# Auto-generate migration
-alembic revision --autogenerate -m "description"
+# Create migration
+alembic revision -m "description"

 # Multi-tenant migration
-alembic -n schema_private revision --autogenerate -m "description"
+alembic -n schema_private revision -m "description"
 ```

+Write the migration manually and place it in the file that alembic creates when running the above command.
+
 ## Testing Strategy

+First, you must activate the virtual environment with `source .venv/bin/activate`. 
+
 There are 4 main types of tests within Onyx:

 ### Unit Tests
@@ -216,7 +220,7 @@ write these for complex, isolated modules e.g. `citation_processing.py`.
 To run them:

 ```bash
-python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
+pytest -xv backend/tests/unit
 ```

 ### External Dependency Unit Tests
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -94,6 +94,12 @@ If using PowerShell, the command slightly differs:

 Install the required python dependencies:

+```bash
+pip install -r backend/requirements/combined.txt
+```
+
+or
+
 ```bash
 pip install -r backend/requirements/default.txt
 pip install -r backend/requirements/dev.txt
@@ -122,7 +128,7 @@ Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)
 to manage your Node installations. Once installed, you can run

 ```bash
-nvm install 22 && nvm use 22`
+nvm install 22 && nvm use 22
 node -v # verify your active version
 ``` 

--- a/README.md
+++ b/README.md
@@ -1,29 +1,34 @@
 <a name="readme-top"></a>

 <h2 align="center">
-    <a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true" /></a>
 </h2>

 <p align="center">Open Source AI Platform</p>

 <p align="center">
    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
-        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+    <a href="https://docs.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation" />
    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
+    <a href="https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme" target="_blank">
+        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation" />
    </a>
    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
-        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License" />
    </a>
 </p>

+<p align="center">
+  <a href="https://trendshift.io/repositories/12516" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/12516" alt="onyx-dot-app/onyx | Trendshift" style="width: 250px; height: 55px;" />
+  </a>
+</p>


-**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
+**[Onyx](https://www.onyx.app/?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.

 Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.

@@ -52,7 +57,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep

 Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)

-To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
+To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)!



@@ -60,13 +65,13 @@ To learn more about the features, check out our [documentation](https://docs.ony
 Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.

 See guides below:
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
+- [Docker](https://docs.onyx.app/deployment/local/docker?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for most users)
+- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for large teams)
+- [Terraform](https://docs.onyx.app/deployment/local/terraform?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme) (best for teams already using Terraform)
+- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme), etc.)

 > [!TIP]  
-> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
+> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme)**.



@@ -90,7 +95,7 @@ There are two editions of Onyx:

 - Onyx Community Edition (CE) is available freely under the MIT license.
 - Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
-For feature details, check out [our website](https://www.onyx.app/pricing).
+For feature details, check out [our website](https://www.onyx.app/pricing?utm_source=onyx_repo&utm_medium=github&utm_campaign=readme).



--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -7,15 +7,12 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
 Edition features outside of personal development or testing purposes. Please reach out to \
 founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
 # DO_NOT_TRACK is used to disable telemetry for Unstructured
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Install system dependencies
 # cmake needed for psycopg (postgres)
@@ -90,6 +87,10 @@ nltk.download('stopwords', quiet=True); \
 nltk.download('punkt_tab', quiet=True);"
 # nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed

+# Pre-downloading tiktoken for setups with limited egress
+RUN python -c "import tiktoken; \
+tiktoken.get_encoding('cl100k_base')"
+
 # Set up application files
 WORKDIR /app

@@ -124,6 +125,10 @@ COPY --chown=onyx:onyx ./assets /app/assets

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 # Default command which does nothing
 # This container is used by api server and background which specify their own CMD
 CMD ["tail", "-f", "/dev/null"]
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -6,13 +6,10 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
 You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
 visit https://github.com/onyx-dot-app/onyx."

-# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
-ARG ONYX_VERSION=0.0.0-dev
-ENV ONYX_VERSION=${ONYX_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true" \
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface

-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Create non-root user for security best practices
 RUN mkdir -p /app && \
@@ -23,24 +20,6 @@ RUN mkdir -p /app && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

-# --- add toolchain needed for Rust/Python builds (fastuuid) ---
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    PATH=/usr/local/cargo/bin:$PATH
-
-RUN set -eux; \
-    apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        pkg-config \
-        curl \
-        ca-certificates \
-    # Install latest stable Rust (supports Cargo.lock v4)
-    && curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
-    && rustc --version && cargo --version \
-    && apt-get remove -y --allow-remove-essential perl-base \
-    && apt-get autoremove -y \
-    && rm -rf /var/lib/apt/lists/*
-
 COPY ./requirements/model_server.txt /tmp/requirements.txt
 RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt && \
@@ -83,4 +62,8 @@ COPY ./model_server /app/model_server

 ENV PYTHONPATH=/app

+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION}
+
 CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic/versions/09995b8811eb_add_theme_preference_to_user.py
+++ b/backend/alembic/versions/09995b8811eb_add_theme_preference_to_user.py
@@ -0,0 +1,33 @@
+"""add theme_preference to user
+
+Revision ID: 09995b8811eb
+Revises: 3d1cca026fe8
+Create Date: 2025-10-24 08:58:50.246949
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from onyx.db.enums import ThemePreference
+
+
+# revision identifiers, used by Alembic.
+revision = "09995b8811eb"
+down_revision = "3d1cca026fe8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "theme_preference",
+            sa.Enum(ThemePreference, native_enum=False),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "theme_preference")
--- a/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
+++ b/backend/alembic/versions/2acdef638fc2_add_switchover_type_field.py
@@ -0,0 +1,72 @@
+"""add switchover_type field and remove background_reindex_enabled
+
+Revision ID: 2acdef638fc2
+Revises: a4f23d6b71c8
+Create Date: 2025-01-XX XX:XX:XX.XXXXXX
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+from onyx.db.enums import SwitchoverType
+
+
+# revision identifiers, used by Alembic.
+revision = "2acdef638fc2"
+down_revision = "a4f23d6b71c8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add switchover_type column with default value of REINDEX
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "switchover_type",
+            sa.Enum(SwitchoverType, native_enum=False),
+            nullable=False,
+            server_default=SwitchoverType.REINDEX.value,
+        ),
+    )
+
+    # Migrate existing data: set switchover_type based on background_reindex_enabled
+    # REINDEX where background_reindex_enabled=True, INSTANT where False
+    op.execute(
+        """
+        UPDATE search_settings
+        SET switchover_type = CASE
+            WHEN background_reindex_enabled = true THEN 'REINDEX'
+            ELSE 'INSTANT'
+        END
+        """
+    )
+
+    # Remove the background_reindex_enabled column (replaced by switchover_type)
+    op.drop_column("search_settings", "background_reindex_enabled")
+
+
+def downgrade() -> None:
+    # Re-add the background_reindex_enabled column with default value of True
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "background_reindex_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default="true",
+        ),
+    )
+    # Set background_reindex_enabled based on switchover_type
+    op.execute(
+        """
+        UPDATE search_settings
+        SET background_reindex_enabled = CASE
+            WHEN switchover_type = 'INSTANT' THEN false
+            ELSE true
+        END
+        """
+    )
+    # Remove the switchover_type column
+    op.drop_column("search_settings", "switchover_type")
--- a/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
+++ b/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
@@ -12,6 +12,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy import text
 import logging
+import fastapi_users_db_sqlalchemy

 logger = logging.getLogger("alembic.runtime.migration")

@@ -58,6 +59,9 @@ def upgrade() -> None:
        logger.info("Dropping chat_session.folder_id...")

        # Drop foreign key constraint first
+        op.execute(
+            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_chat_folder_fk"
+        )
        op.execute(
            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
        )
@@ -172,20 +176,6 @@ def downgrade() -> None:
                "user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
            )

-    # Recreate chat_folder table
-    if "chat_folder" not in inspector.get_table_names():
-        op.create_table(
-            "chat_folder",
-            sa.Column("id", sa.Integer(), nullable=False),
-            sa.Column("user_id", sa.UUID(), nullable=False),
-            sa.Column("name", sa.String(), nullable=False),
-            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
-            sa.PrimaryKeyConstraint("id"),
-            sa.ForeignKeyConstraint(
-                ["user_id"], ["user.id"], name="chat_folder_user_fk"
-            ),
-        )
-
    # Recreate persona__user_folder table
    if "persona__user_folder" not in inspector.get_table_names():
        op.create_table(
@@ -197,6 +187,26 @@ def downgrade() -> None:
            sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
        )

+    # Recreate chat_folder table and related structures
+    if "chat_folder" not in inspector.get_table_names():
+        op.create_table(
+            "chat_folder",
+            sa.Column("id", sa.Integer(), nullable=False),
+            sa.Column(
+                "user_id",
+                fastapi_users_db_sqlalchemy.generics.GUID(),
+                nullable=True,
+            ),
+            sa.Column("name", sa.String(), nullable=True),
+            sa.Column("display_priority", sa.Integer(), nullable=False),
+            sa.ForeignKeyConstraint(
+                ["user_id"],
+                ["user.id"],
+                name="chat_folder_user_id_fkey",
+            ),
+            sa.PrimaryKeyConstraint("id"),
+        )
+
    # Add folder_id back to chat_session
    if "chat_session" in inspector.get_table_names():
        columns = [col["name"] for col in inspector.get_columns("chat_session")]
@@ -208,7 +218,7 @@ def downgrade() -> None:
            # Add foreign key if chat_folder exists
            if "chat_folder" in inspector.get_table_names():
                op.create_foreign_key(
-                    "chat_session_folder_fk",
+                    "chat_session_chat_folder_fk",
                    "chat_session",
                    "chat_folder",
                    ["folder_id"],
--- a/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
+++ b/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
@@ -292,7 +292,7 @@ def downgrade() -> None:
    logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
    logger.error("Data restoration requires backup files or database backup.")

-    raise NotImplementedError(
-        "Downgrade of legacy data cleanup is not supported. "
-        "Deleted data must be restored from backups."
-    )
+    # raise NotImplementedError(
+    #     "Downgrade of legacy data cleanup is not supported. "
+    #     "Deleted data must be restored from backups."
+    # )
--- a/backend/alembic/versions/3d1cca026fe8_add_oauth_config_and_user_tokens.py
+++ b/backend/alembic/versions/3d1cca026fe8_add_oauth_config_and_user_tokens.py
@@ -0,0 +1,121 @@
+"""add_oauth_config_and_user_tokens
+
+Revision ID: 3d1cca026fe8
+Revises: c8a93a2af083
+Create Date: 2025-10-21 13:27:34.274721
+
+"""
+
+from alembic import op
+import fastapi_users_db_sqlalchemy
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "3d1cca026fe8"
+down_revision = "c8a93a2af083"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create oauth_config table
+    op.create_table(
+        "oauth_config",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("authorization_url", sa.Text(), nullable=False),
+        sa.Column("token_url", sa.Text(), nullable=False),
+        sa.Column("client_id", sa.LargeBinary(), nullable=False),
+        sa.Column("client_secret", sa.LargeBinary(), nullable=False),
+        sa.Column("scopes", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column(
+            "additional_params",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("name"),
+    )
+
+    # Create oauth_user_token table
+    op.create_table(
+        "oauth_user_token",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("oauth_config_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=False,
+        ),
+        sa.Column("token_data", sa.LargeBinary(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["oauth_config_id"], ["oauth_config.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("oauth_config_id", "user_id", name="uq_oauth_user_token"),
+    )
+
+    # Create index on user_id for efficient user-based token lookups
+    # Note: unique constraint on (oauth_config_id, user_id) already creates
+    # an index for config-based lookups
+    op.create_index(
+        "ix_oauth_user_token_user_id",
+        "oauth_user_token",
+        ["user_id"],
+    )
+
+    # Add oauth_config_id column to tool table
+    op.add_column("tool", sa.Column("oauth_config_id", sa.Integer(), nullable=True))
+
+    # Create foreign key from tool to oauth_config
+    op.create_foreign_key(
+        "tool_oauth_config_fk",
+        "tool",
+        "oauth_config",
+        ["oauth_config_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+
+
+def downgrade() -> None:
+    # Drop foreign key from tool to oauth_config
+    op.drop_constraint("tool_oauth_config_fk", "tool", type_="foreignkey")
+
+    # Drop oauth_config_id column from tool table
+    op.drop_column("tool", "oauth_config_id")
+
+    # Drop index on user_id
+    op.drop_index("ix_oauth_user_token_user_id", table_name="oauth_user_token")
+
+    # Drop oauth_user_token table (will cascade delete tokens)
+    op.drop_table("oauth_user_token")
+
+    # Drop oauth_config table
+    op.drop_table("oauth_config")
--- a/backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py
+++ b/backend/alembic/versions/5e1c073d48a3_add_personal_access_token_table.py
@@ -0,0 +1,88 @@
+"""add_personal_access_token_table
+
+Revision ID: 5e1c073d48a3
+Revises: 09995b8811eb
+Create Date: 2025-10-30 17:30:24.308521
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+
+# revision identifiers, used by Alembic.
+revision = "5e1c073d48a3"
+down_revision = "09995b8811eb"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create personal_access_token table
+    op.create_table(
+        "personal_access_token",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("hashed_token", sa.String(length=64), nullable=False),
+        sa.Column("token_display", sa.String(), nullable=False),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            nullable=False,
+        ),
+        sa.Column(
+            "expires_at",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "last_used_at",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+        sa.Column(
+            "is_revoked",
+            sa.Boolean(),
+            server_default=sa.text("false"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("hashed_token"),
+    )
+
+    # Create indexes
+    op.create_index(
+        "ix_personal_access_token_expires_at",
+        "personal_access_token",
+        ["expires_at"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_pat_user_created",
+        "personal_access_token",
+        ["user_id", sa.text("created_at DESC")],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    # Drop indexes first
+    op.drop_index("ix_pat_user_created", table_name="personal_access_token")
+    op.drop_index(
+        "ix_personal_access_token_expires_at", table_name="personal_access_token"
+    )
+
+    # Drop table
+    op.drop_table("personal_access_token")
--- a/backend/alembic/versions/7547d982db8f_chat_folders.py
+++ b/backend/alembic/versions/7547d982db8f_chat_folders.py
@@ -45,8 +45,23 @@ def upgrade() -> None:


 def downgrade() -> None:
-    op.drop_constraint(
-        "chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
-    )
-    op.drop_column("chat_session", "folder_id")
-    op.drop_table("chat_folder")
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    if "chat_session" in inspector.get_table_names():
+        chat_session_fks = {
+            fk.get("name") for fk in inspector.get_foreign_keys("chat_session")
+        }
+        if "chat_session_chat_folder_fk" in chat_session_fks:
+            op.drop_constraint(
+                "chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
+            )
+
+        chat_session_columns = {
+            col["name"] for col in inspector.get_columns("chat_session")
+        }
+        if "folder_id" in chat_session_columns:
+            op.drop_column("chat_session", "folder_id")
+
+    if "chat_folder" in inspector.get_table_names():
+        op.drop_table("chat_folder")
--- a/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
+++ b/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
@@ -180,14 +180,162 @@ def downgrade() -> None:
    )
    logger.error("Only proceed if absolutely necessary and have backups.")

-    # The downgrade would need to:
-    # 1. Add back integer columns
-    # 2. Generate new sequential IDs
-    # 3. Update all foreign key references
-    # 4. Swap primary keys back
-    # This is complex and risky, so we raise an error instead
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)

-    raise NotImplementedError(
-        "Downgrade of UUID primary key swap is not supported due to data loss risk. "
-        "Manual intervention with data backup/restore is required."
+    # Capture existing primary key definitions so we can restore them after swaps
+    persona_pk = inspector.get_pk_constraint("persona__user_file") or {}
+    persona_pk_name = persona_pk.get("name")
+    persona_pk_cols = persona_pk.get("constrained_columns") or []
+
+    project_pk = inspector.get_pk_constraint("project__user_file") or {}
+    project_pk_name = project_pk.get("name")
+    project_pk_cols = project_pk.get("constrained_columns") or []
+
+    # Drop foreign keys that reference the UUID primary key
+    op.drop_constraint(
+        "persona__user_file_user_file_id_fkey",
+        "persona__user_file",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "fk_project__user_file_user_file_id",
+        "project__user_file",
+        type_="foreignkey",
+    )
+
+    # Drop primary keys that rely on the UUID column so we can replace it
+    if persona_pk_name:
+        op.drop_constraint(persona_pk_name, "persona__user_file", type_="primary")
+    if project_pk_name:
+        op.drop_constraint(project_pk_name, "project__user_file", type_="primary")
+
+    # Rebuild integer IDs on user_file using a sequence-backed column
+    op.execute("CREATE SEQUENCE IF NOT EXISTS user_file_id_seq")
+    op.add_column(
+        "user_file",
+        sa.Column(
+            "id_int",
+            sa.Integer(),
+            server_default=sa.text("nextval('user_file_id_seq')"),
+            nullable=False,
+        ),
+    )
+    op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id_int")
+
+    # Prepare integer foreign key columns on referencing tables
+    op.add_column(
+        "persona__user_file",
+        sa.Column("user_file_id_int", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "project__user_file",
+        sa.Column("user_file_id_int", sa.Integer(), nullable=True),
+    )
+
+    # Populate the new integer foreign key columns by mapping from the UUID IDs
+    op.execute(
+        """
+        UPDATE persona__user_file AS p
+        SET user_file_id_int = uf.id_int
+        FROM user_file AS uf
+        WHERE p.user_file_id = uf.id
+        """
+    )
+    op.execute(
+        """
+        UPDATE project__user_file AS p
+        SET user_file_id_int = uf.id_int
+        FROM user_file AS uf
+        WHERE p.user_file_id = uf.id
+        """
+    )
+
+    op.alter_column(
+        "persona__user_file",
+        "user_file_id_int",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+    op.alter_column(
+        "project__user_file",
+        "user_file_id_int",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+
+    # Remove the UUID foreign key columns and rename the integer replacements
+    op.drop_column("persona__user_file", "user_file_id")
+    op.alter_column(
+        "persona__user_file",
+        "user_file_id_int",
+        new_column_name="user_file_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+
+    op.drop_column("project__user_file", "user_file_id")
+    op.alter_column(
+        "project__user_file",
+        "user_file_id_int",
+        new_column_name="user_file_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+
+    # Swap the user_file primary key back to the integer column
+    op.drop_constraint("user_file_pkey", "user_file", type_="primary")
+    op.drop_column("user_file", "id")
+    op.alter_column(
+        "user_file",
+        "id_int",
+        new_column_name="id",
+        existing_type=sa.Integer(),
+    )
+    op.alter_column(
+        "user_file",
+        "id",
+        existing_type=sa.Integer(),
+        nullable=False,
+        server_default=sa.text("nextval('user_file_id_seq')"),
+    )
+    op.execute("ALTER SEQUENCE user_file_id_seq OWNED BY user_file.id")
+    op.execute(
+        """
+        SELECT setval(
+            'user_file_id_seq',
+            GREATEST(COALESCE(MAX(id), 1), 1),
+            MAX(id) IS NOT NULL
+        )
+        FROM user_file
+        """
+    )
+    op.create_primary_key("user_file_pkey", "user_file", ["id"])
+
+    # Restore primary keys on referencing tables
+    if persona_pk_cols:
+        op.create_primary_key(
+            "persona__user_file_pkey", "persona__user_file", persona_pk_cols
+        )
+    if project_pk_cols:
+        op.create_primary_key(
+            "project__user_file_pkey",
+            "project__user_file",
+            project_pk_cols,
+        )
+
+    # Recreate foreign keys pointing at the integer primary key
+    op.create_foreign_key(
+        "persona__user_file_user_file_id_fkey",
+        "persona__user_file",
+        "user_file",
+        ["user_file_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "fk_project__user_file_user_file_id",
+        "project__user_file",
+        "user_file",
+        ["user_file_id"],
+        ["id"],
    )
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -181,12 +181,21 @@ def upgrade() -> None:
            sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
            sa.PrimaryKeyConstraint("project_id", "user_file_id"),
        )
+        logger.info("Created project__user_file table")
+
+    # Only create the index if it doesn't exist
+    existing_indexes = [
+        ix["name"] for ix in inspector.get_indexes("project__user_file")
+    ]
+    if "idx_project__user_file_user_file_id" not in existing_indexes:
        op.create_index(
            "idx_project__user_file_user_file_id",
            "project__user_file",
            ["user_file_id"],
        )
-        logger.info("Created project__user_file table")
+        logger.info(
+            "Created index idx_project__user_file_user_file_id on project__user_file"
+        )

    logger.info("Migration 1 (schema additions) completed successfully")

@@ -201,7 +210,7 @@ def downgrade() -> None:

    # Drop project__user_file table
    if "project__user_file" in inspector.get_table_names():
-        op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
+        # op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
        op.drop_table("project__user_file")
        logger.info("Dropped project__user_file table")

--- a/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
+++ b/backend/alembic/versions/9drpiiw74ljy_add_config_to_federated_connector.py
@@ -0,0 +1,97 @@
+"""add config to federated_connector
+
+Revision ID: 9drpiiw74ljy
+Revises: 2acdef638fc2
+Create Date: 2025-11-03 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "9drpiiw74ljy"
+down_revision = "2acdef638fc2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    connection = op.get_bind()
+
+    # Check if column already exists in current schema
+    result = connection.execute(
+        sa.text(
+            """
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_schema = current_schema()
+            AND table_name = 'federated_connector'
+            AND column_name = 'config'
+            """
+        )
+    )
+    column_exists = result.fetchone() is not None
+
+    # Add config column with default empty object (only if it doesn't exist)
+    if not column_exists:
+        op.add_column(
+            "federated_connector",
+            sa.Column(
+                "config", postgresql.JSONB(), nullable=False, server_default="{}"
+            ),
+        )
+
+    # Data migration: Single bulk update for all Slack connectors
+    connection.execute(
+        sa.text(
+            """
+            WITH connector_configs AS (
+                SELECT
+                    fc.id as connector_id,
+                    CASE
+                        WHEN fcds.entities->'channels' IS NOT NULL
+                            AND jsonb_typeof(fcds.entities->'channels') = 'array'
+                            AND jsonb_array_length(fcds.entities->'channels') > 0
+                        THEN
+                            jsonb_build_object(
+                                'channels', fcds.entities->'channels',
+                                'search_all_channels', false
+                            ) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                        ELSE
+                            jsonb_build_object('search_all_channels', true) ||
+                            CASE
+                                WHEN fcds.entities->'include_dm' IS NOT NULL
+                                THEN jsonb_build_object('include_dm', fcds.entities->'include_dm')
+                                ELSE '{}'::jsonb
+                            END
+                    END as config
+                FROM federated_connector fc
+                LEFT JOIN LATERAL (
+                    SELECT entities
+                    FROM federated_connector__document_set
+                    WHERE federated_connector_id = fc.id
+                    AND entities IS NOT NULL
+                    ORDER BY id
+                    LIMIT 1
+                ) fcds ON true
+                WHERE fc.source = 'FEDERATED_SLACK'
+                AND fcds.entities IS NOT NULL
+            )
+            UPDATE federated_connector fc
+            SET config = cc.config
+            FROM connector_configs cc
+            WHERE fc.id = cc.connector_id
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("federated_connector", "config")
--- a/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
+++ b/backend/alembic/versions/a4f23d6b71c8_add_llm_provider_persona_restrictions.py
@@ -0,0 +1,61 @@
+"""add llm provider persona restrictions
+
+Revision ID: a4f23d6b71c8
+Revises: 5e1c073d48a3
+Create Date: 2025-10-21 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a4f23d6b71c8"
+down_revision = "5e1c073d48a3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_provider__persona",
+        sa.Column("llm_provider_id", sa.Integer(), nullable=False),
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["llm_provider_id"], ["llm_provider.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["persona_id"], ["persona.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("llm_provider_id", "persona_id"),
+    )
+    op.create_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        "llm_provider__persona",
+        ["llm_provider_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_persona_id",
+        "llm_provider__persona",
+        ["persona_id"],
+    )
+    op.create_index(
+        "ix_llm_provider__persona_composite",
+        "llm_provider__persona",
+        ["persona_id", "llm_provider_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_llm_provider__persona_composite",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_persona_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_index(
+        "ix_llm_provider__persona_llm_provider_id",
+        table_name="llm_provider__persona",
+    )
+    op.drop_table("llm_provider__persona")
--- a/backend/docker-bake.hcl
+++ b/backend/docker-bake.hcl
@@ -0,0 +1,27 @@
+variable "REPOSITORY" {
+  default = "onyxdotapp/onyx-integration"
+}
+
+variable "TAG" {
+  default = "latest"
+}
+
+target "backend" {
+  context    = "."
+  dockerfile = "Dockerfile"
+}
+
+target "integration" {
+  context    = "."
+  dockerfile = "tests/integration/Dockerfile"
+
+  // Provide the base image via build context from the backend target
+  contexts = {
+    base = "target:backend"
+  }
+
+  cache-from = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache"]
+  cache-to   = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache,mode=max"]
+
+  tags      = ["${REPOSITORY}:${TAG}"]
+}
--- a/backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja
+++ b/backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja
@@ -18,7 +18,7 @@
            <!-- <document type="danswer_chunk" mode="index" /> -->
 {{ document_elements }}
        </documents>
-        <nodes count="60">
+        <nodes count="50">
            <resources vcpu="8.0" memory="128.0Gb" architecture="arm64" storage-type="local"
                disk="475.0Gb" />
        </nodes>
--- a/backend/ee/onyx/external_permissions/confluence/space_access.py
+++ b/backend/ee/onyx/external_permissions/confluence/space_access.py
@@ -139,19 +139,13 @@ def get_all_space_permissions(
 ) -> dict[str, ExternalAccess]:
    logger.debug("Getting space permissions")
    # Gets all the spaces in the Confluence instance
-    all_space_keys = []
-    start = 0
-    while True:
-        spaces_batch = confluence_client.get_all_spaces(
-            start=start, limit=REQUEST_PAGINATION_LIMIT
+    all_space_keys = [
+        key
+        for space in confluence_client.retrieve_confluence_spaces(
+            limit=REQUEST_PAGINATION_LIMIT,
        )
-        for space in spaces_batch.get("results", []):
-            all_space_keys.append(space.get("key"))
-
-        if len(spaces_batch.get("results", [])) < REQUEST_PAGINATION_LIMIT:
-            break
-
-        start += len(spaces_batch.get("results", []))
+        if (key := space.get("key"))
+    ]

    # Gets the permissions for each space
    logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -8,7 +8,7 @@ from fastapi import Request
 from fastapi import Response

 from ee.onyx.auth.users import decode_anonymous_user_jwt_token
-from onyx.auth.api_key import extract_tenant_from_api_key_header
+from onyx.auth.utils import extract_tenant_from_auth_header
 from onyx.configs.constants import ANONYMOUS_USER_COOKIE_NAME
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.engine.sql_engine import is_valid_schema_name
@@ -49,13 +49,13 @@ async def _get_tenant_id_from_request(
 ) -> str:
    """
    Attempt to extract tenant_id from:
-    1) The API key header
+    1) The API key or PAT (Personal Access Token) header
    2) The Redis-based token (stored in Cookie: fastapiusersauth)
    3) The anonymous user cookie
    Fallback: POSTGRES_DEFAULT_SCHEMA
    """
-    # Check for API key
-    tenant_id = extract_tenant_from_api_key_header(request)
+    # Check for API key or PAT in Authorization header
+    tenant_id = extract_tenant_from_auth_header(request)
    if tenant_id is not None:
        return tenant_id

--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -76,6 +76,7 @@ class ConfluenceCloudOAuth:
        "read:confluence-content.permission%20"
        "read:confluence-user%20"
        "read:confluence-groups%20"
+        "read:space:confluence%20"
        "readonly:content.attachment:confluence%20"
        "search:confluence%20"
        # granular scope
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -161,7 +161,7 @@ def handle_send_message_simple_with_history(
        persona_id=req.persona_id,
    )

-    llm, _ = get_llms_for_persona(persona=chat_session.persona)
+    llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -24,6 +24,7 @@ from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.models import QADocsResponse
 from onyx.chat.process_message import gather_stream
 from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
 from onyx.context.search.models import SavedSearchDocWithContent
 from onyx.context.search.models import SearchRequest
@@ -48,9 +49,42 @@ logger = setup_logger()
 basic_router = APIRouter(prefix="/query")


+class DocumentSearchPagination(BaseModel):
+    offset: int
+    limit: int
+    returned_count: int
+    has_more: bool
+    next_offset: int | None = None
+
+
 class DocumentSearchResponse(BaseModel):
    top_documents: list[SavedSearchDocWithContent]
    llm_indices: list[int]
+    pagination: DocumentSearchPagination
+
+
+def _normalize_pagination(limit: int | None, offset: int | None) -> tuple[int, int]:
+    if limit is None:
+        resolved_limit = NUM_RETURNED_HITS
+    else:
+        resolved_limit = limit
+
+    if resolved_limit <= 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.limit must be positive"
+        )
+
+    if offset is None:
+        resolved_offset = 0
+    else:
+        resolved_offset = offset
+
+    if resolved_offset < 0:
+        raise HTTPException(
+            status_code=400, detail="retrieval_options.offset cannot be negative"
+        )
+
+    return resolved_limit, resolved_offset


@basic_router.post("/document-search")
@@ -64,6 +98,10 @@ def handle_search_request(
    logger.notice(f"Received document search query: {query}")

    llm, fast_llm = get_default_llms()
+    pagination_limit, pagination_offset = _normalize_pagination(
+        limit=search_request.retrieval_options.limit,
+        offset=search_request.retrieval_options.offset,
+    )

    search_pipeline = SearchPipeline(
        search_request=SearchRequest(
@@ -72,8 +110,8 @@ def handle_search_request(
            human_selected_filters=search_request.retrieval_options.filters,
            enable_auto_detect_filters=search_request.retrieval_options.enable_auto_detect_filters,
            persona=None,  # For simplicity, default settings should be good for this search
-            offset=search_request.retrieval_options.offset,
-            limit=search_request.retrieval_options.limit,
+            offset=pagination_offset,
+            limit=pagination_limit + 1,
            rerank_settings=search_request.rerank_settings,
            evaluation_type=search_request.evaluation_type,
            chunks_above=search_request.chunks_above,
@@ -116,6 +154,9 @@ def handle_search_request(
        for section in top_sections
    ]

+    # Track whether the underlying retrieval produced more items than requested
+    has_more_results = len(top_docs) > pagination_limit
+
    # Deduping happens at the last step to avoid harming quality by dropping content early on
    deduped_docs = top_docs
    dropped_inds = None
@@ -134,7 +175,22 @@ def handle_search_request(
            dropped_indices=dropped_inds,
        )

-    return DocumentSearchResponse(top_documents=deduped_docs, llm_indices=llm_indices)
+    paginated_docs = deduped_docs[:pagination_limit]
+    llm_indices = [index for index in llm_indices if index < len(paginated_docs)]
+    has_more = has_more_results
+    pagination = DocumentSearchPagination(
+        offset=pagination_offset,
+        limit=pagination_limit,
+        returned_count=len(paginated_docs),
+        has_more=has_more,
+        next_offset=(pagination_offset + pagination_limit) if has_more else None,
+    )
+
+    return DocumentSearchResponse(
+        top_documents=paginated_docs,
+        llm_indices=llm_indices,
+        pagination=pagination,
+    )


 def get_answer_stream(
@@ -162,7 +218,7 @@ def get_answer_stream(
            is_for_edit=False,
        )

-    llm = get_main_llm_from_tuple(get_llms_for_persona(persona_info))
+    llm = get_main_llm_from_tuple(get_llms_for_persona(persona=persona_info, user=user))

    llm_tokenizer = get_tokenizer(
        model_name=llm.config.model_name,
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -517,7 +517,7 @@ def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
    try:
        keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
    except Exception as e:
-        logger.error(
+        logger.warning(
            f"Failed to extract keywords for query: {intent_req.query} due to {e}"
        )
        # Fallback to keeping all words
--- a/backend/onyx/agents/agent_framework/models.py
+++ b/backend/onyx/agents/agent_framework/models.py
@@ -0,0 +1,47 @@
+from typing import Any
+from typing import Literal
+from typing import TypeAlias
+
+from pydantic import BaseModel
+
+from onyx.llm.model_response import ModelResponseStream
+
+
+class ToolCallStreamItem(BaseModel):
+    call_id: str | None = None
+
+    id: str | None = None
+
+    name: str | None = None
+
+    arguments: str | None = None
+
+    type: Literal["function_call"] = "function_call"
+
+    index: int | None = None
+
+
+class ToolCallOutputStreamItem(BaseModel):
+    call_id: str | None = None
+
+    output: Any
+
+    type: Literal["function_call_output"] = "function_call_output"
+
+
+RunItemStreamEventDetails: TypeAlias = ToolCallStreamItem | ToolCallOutputStreamItem
+
+
+class RunItemStreamEvent(BaseModel):
+    type: Literal[
+        "message_start",
+        "message_done",
+        "reasoning_start",
+        "reasoning_done",
+        "tool_call",
+        "tool_call_output",
+    ]
+    details: RunItemStreamEventDetails | None = None
+
+
+StreamEvent: TypeAlias = ModelResponseStream | RunItemStreamEvent
--- a/backend/onyx/agents/agent_framework/query.py
+++ b/backend/onyx/agents/agent_framework/query.py
@@ -0,0 +1,215 @@
+import json
+from collections.abc import Iterator
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any
+
+from onyx.agents.agent_framework.models import RunItemStreamEvent
+from onyx.agents.agent_framework.models import StreamEvent
+from onyx.agents.agent_framework.models import ToolCallOutputStreamItem
+from onyx.agents.agent_framework.models import ToolCallStreamItem
+from onyx.llm.interfaces import LanguageModelInput
+from onyx.llm.interfaces import LLM
+from onyx.llm.interfaces import ToolChoiceOptions
+from onyx.llm.message_types import ChatCompletionMessage
+from onyx.llm.message_types import ToolCall
+from onyx.llm.model_response import ModelResponseStream
+from onyx.tools.tool import RunContextWrapper
+from onyx.tools.tool import Tool
+
+
+@dataclass
+class QueryResult:
+    stream: Iterator[StreamEvent]
+    new_messages_stateful: list[ChatCompletionMessage]
+
+
+def _serialize_tool_output(output: Any) -> str:
+    if isinstance(output, str):
+        return output
+    try:
+        return json.dumps(output)
+    except TypeError:
+        return str(output)
+
+
+def _update_tool_call_with_delta(
+    tool_calls_in_progress: dict[int, dict[str, Any]],
+    tool_call_delta: Any,
+) -> None:
+    index = tool_call_delta.index
+
+    if index not in tool_calls_in_progress:
+        tool_calls_in_progress[index] = {
+            "id": None,
+            "name": None,
+            "arguments": "",
+        }
+
+    if tool_call_delta.id:
+        tool_calls_in_progress[index]["id"] = tool_call_delta.id
+
+    if tool_call_delta.function:
+        if tool_call_delta.function.name:
+            tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
+
+        if tool_call_delta.function.arguments:
+            tool_calls_in_progress[index][
+                "arguments"
+            ] += tool_call_delta.function.arguments
+
+
+def query(
+    llm_with_default_settings: LLM,
+    messages: LanguageModelInput,
+    tools: Sequence[Tool],
+    context: Any,
+    tool_choice: ToolChoiceOptions | None = None,
+) -> QueryResult:
+    tool_definitions = [tool.tool_definition() for tool in tools]
+    tools_by_name = {tool.name: tool for tool in tools}
+
+    new_messages_stateful: list[ChatCompletionMessage] = []
+
+    def stream_generator() -> Iterator[StreamEvent]:
+        reasoning_started = False
+        message_started = False
+
+        tool_calls_in_progress: dict[int, dict[str, Any]] = {}
+
+        content_parts: list[str] = []
+        reasoning_parts: list[str] = []
+
+        for chunk in llm_with_default_settings.stream(
+            prompt=messages,
+            tools=tool_definitions,
+            tool_choice=tool_choice,
+        ):
+            assert isinstance(chunk, ModelResponseStream)
+
+            delta = chunk.choice.delta
+            finish_reason = chunk.choice.finish_reason
+
+            if delta.reasoning_content:
+                reasoning_parts.append(delta.reasoning_content)
+                if not reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_start")
+                    reasoning_started = True
+
+            if delta.content:
+                content_parts.append(delta.content)
+                if reasoning_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if not message_started:
+                    yield RunItemStreamEvent(type="message_start")
+                    message_started = True
+
+            if delta.tool_calls:
+                if reasoning_started and not message_started:
+                    yield RunItemStreamEvent(type="reasoning_done")
+                    reasoning_started = False
+                if message_started:
+                    yield RunItemStreamEvent(type="message_done")
+                    message_started = False
+
+                for tool_call_delta in delta.tool_calls:
+                    _update_tool_call_with_delta(
+                        tool_calls_in_progress, tool_call_delta
+                    )
+
+            yield chunk
+
+            if not finish_reason:
+                continue
+            if message_started:
+                yield RunItemStreamEvent(type="message_done")
+                message_started = False
+
+            if finish_reason == "tool_calls" and tool_calls_in_progress:
+                sorted_tool_calls = sorted(tool_calls_in_progress.items())
+
+                # Build tool calls for the message and execute tools
+                assistant_tool_calls: list[ToolCall] = []
+                tool_outputs: dict[str, str] = {}
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+                    name = tool_call_data["name"]
+                    arguments_str = tool_call_data["arguments"]
+
+                    if call_id is None or name is None:
+                        continue
+
+                    assistant_tool_calls.append(
+                        {
+                            "id": call_id,
+                            "type": "function",
+                            "function": {
+                                "name": name,
+                                "arguments": arguments_str,
+                            },
+                        }
+                    )
+
+                    yield RunItemStreamEvent(
+                        type="tool_call",
+                        details=ToolCallStreamItem(
+                            call_id=call_id,
+                            name=name,
+                            arguments=arguments_str,
+                        ),
+                    )
+
+                    if name in tools_by_name:
+                        tool = tools_by_name[name]
+                        arguments = json.loads(arguments_str)
+
+                        run_context = RunContextWrapper(context=context)
+
+                        # TODO: Instead of executing sequentially, execute in parallel
+                        # In practice, it's not a must right now since we don't use parallel
+                        # tool calls, so kicking the can down the road for now.
+                        output = tool.run_v2(run_context, **arguments)
+                        tool_outputs[call_id] = _serialize_tool_output(output)
+
+                        yield RunItemStreamEvent(
+                            type="tool_call_output",
+                            details=ToolCallOutputStreamItem(
+                                call_id=call_id,
+                                output=output,
+                            ),
+                        )
+
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": None,
+                        "tool_calls": assistant_tool_calls,
+                    }
+                )
+
+                for _, tool_call_data in sorted_tool_calls:
+                    call_id = tool_call_data["id"]
+
+                    if call_id in tool_outputs:
+                        new_messages_stateful.append(
+                            {
+                                "role": "tool",
+                                "content": tool_outputs[call_id],
+                                "tool_call_id": call_id,
+                            }
+                        )
+
+            elif finish_reason == "stop" and content_parts:
+                new_messages_stateful.append(
+                    {
+                        "role": "assistant",
+                        "content": "".join(content_parts),
+                    }
+                )
+
+    return QueryResult(
+        stream=stream_generator(),
+        new_messages_stateful=new_messages_stateful,
+    )
--- a/backend/onyx/agents/agent_sdk/message_format.py
+++ b/backend/onyx/agents/agent_sdk/message_format.py
@@ -2,15 +2,27 @@ from collections.abc import Sequence

 from langchain.schema.messages import BaseMessage

+from onyx.agents.agent_sdk.message_types import AgentSDKMessage
+from onyx.agents.agent_sdk.message_types import AssistantMessageWithContent
+from onyx.agents.agent_sdk.message_types import ImageContent
+from onyx.agents.agent_sdk.message_types import InputTextContent
+from onyx.agents.agent_sdk.message_types import SystemMessage
+from onyx.agents.agent_sdk.message_types import UserMessage
+

 # TODO: Currently, we only support native API input for images. For other
 # files, we process the content and share it as text in the message. In
 # the future, we might support native file uploads for other types of files.
-def base_messages_to_agent_sdk_msgs(msgs: Sequence[BaseMessage]) -> list[dict]:
-    return [_base_message_to_agent_sdk_msg(msg) for msg in msgs]
+def base_messages_to_agent_sdk_msgs(
+    msgs: Sequence[BaseMessage],
+    is_responses_api: bool,
+) -> list[AgentSDKMessage]:
+    return [_base_message_to_agent_sdk_msg(msg, is_responses_api) for msg in msgs]


-def _base_message_to_agent_sdk_msg(msg: BaseMessage) -> dict:
+def _base_message_to_agent_sdk_msg(
+    msg: BaseMessage, is_responses_api: bool
+) -> AgentSDKMessage:
    message_type_to_agent_sdk_role = {
        "human": "user",
        "system": "system",
@@ -20,59 +32,136 @@ def _base_message_to_agent_sdk_msg(msg: BaseMessage) -> dict:

    # Convert content to Agent SDK format
    content = msg.content
-    if isinstance(content, str):
-        # Convert string to structured text format
-        structured_content = [
-            {
-                "type": "input_text",
-                "text": content,
-            }
-        ]
-    elif isinstance(content, list):
-        # Content is already a list, process each item
-        structured_content = []
-        for item in content:
-            if isinstance(item, str):
-                structured_content.append(
-                    {
-                        "type": "input_text",
-                        "text": item,
-                    }
-                )
-            elif isinstance(item, dict):
-                # Handle different item types
-                item_type = item.get("type")

-                if item_type == "text":
-                    # Convert text type to input_text
-                    structured_content.append(
-                        {
-                            "type": "input_text",
-                            "text": item.get("text", ""),
-                        }
-                    )
-                elif item_type == "image_url":
-                    # Convert image_url to input_image format
-                    image_url = item.get("image_url", {})
-                    if isinstance(image_url, dict):
-                        url = image_url.get("url", "")
-                    else:
-                        url = image_url
-                    structured_content.append(
-                        {
-                            "type": "input_image",
-                            "image_url": url,
-                            "detail": "auto",
-                        }
-                    )
+    if isinstance(content, str):
+        # For system/user/assistant messages, use InputTextContent
+        if role in ("system", "user"):
+            input_text_content: list[InputTextContent | ImageContent] = [
+                InputTextContent(type="input_text", text=content)
+            ]
+            if role == "system":
+                # SystemMessage only accepts InputTextContent
+                system_msg: SystemMessage = {
+                    "role": "system",
+                    "content": [InputTextContent(type="input_text", text=content)],
+                }
+                return system_msg
+            else:  # user
+                user_msg: UserMessage = {
+                    "role": "user",
+                    "content": input_text_content,
+                }
+                return user_msg
+        else:  # assistant
+            assistant_msg: AssistantMessageWithContent
+            if is_responses_api:
+                from onyx.agents.agent_sdk.message_types import OutputTextContent
+
+                assistant_msg = {
+                    "role": "assistant",
+                    "content": [OutputTextContent(type="output_text", text=content)],
+                }
            else:
-                raise ValueError(f"Unexpected item type: {type(item)}. Item: {item}")
+                assistant_msg = {
+                    "role": "assistant",
+                    "content": [InputTextContent(type="input_text", text=content)],
+                }
+            return assistant_msg
+    elif isinstance(content, list):
+        # For lists, we need to process based on the role
+        if role == "assistant":
+            # For responses API, use OutputTextContent; otherwise use InputTextContent
+            assistant_content: list[InputTextContent | OutputTextContent] = []
+
+            if is_responses_api:
+                from onyx.agents.agent_sdk.message_types import OutputTextContent
+
+                for item in content:
+                    if isinstance(item, str):
+                        assistant_content.append(
+                            OutputTextContent(type="output_text", text=item)
+                        )
+                    elif isinstance(item, dict) and item.get("type") == "text":
+                        assistant_content.append(
+                            OutputTextContent(
+                                type="output_text", text=item.get("text", "")
+                            )
+                        )
+                    else:
+                        raise ValueError(
+                            f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
+                        )
+            else:
+                for item in content:
+                    if isinstance(item, str):
+                        assistant_content.append(
+                            InputTextContent(type="input_text", text=item)
+                        )
+                    elif isinstance(item, dict) and item.get("type") == "text":
+                        assistant_content.append(
+                            InputTextContent(
+                                type="input_text", text=item.get("text", "")
+                            )
+                        )
+                    else:
+                        raise ValueError(
+                            f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
+                        )
+
+            assistant_msg_list: AssistantMessageWithContent = {
+                "role": "assistant",
+                "content": assistant_content,
+            }
+            return assistant_msg_list
+        else:  # system or user - use InputTextContent
+            input_content: list[InputTextContent | ImageContent] = []
+            for item in content:
+                if isinstance(item, str):
+                    input_content.append(InputTextContent(type="input_text", text=item))
+                elif isinstance(item, dict):
+                    item_type = item.get("type")
+                    if item_type == "text":
+                        input_content.append(
+                            InputTextContent(
+                                type="input_text", text=item.get("text", "")
+                            )
+                        )
+                    elif item_type == "image_url":
+                        # Convert image_url to input_image format
+                        image_url = item.get("image_url", {})
+                        if isinstance(image_url, dict):
+                            url = image_url.get("url", "")
+                        else:
+                            url = image_url
+                        input_content.append(
+                            ImageContent(
+                                type="input_image", image_url=url, detail="auto"
+                            )
+                        )
+                    else:
+                        raise ValueError(f"Unexpected item type: {item_type}")
+                else:
+                    raise ValueError(
+                        f"Unexpected item type: {type(item)}. Item: {item}"
+                    )
+
+            if role == "system":
+                # SystemMessage only accepts InputTextContent (no images)
+                text_only_content = [
+                    c for c in input_content if c["type"] == "input_text"
+                ]
+                system_msg_list: SystemMessage = {
+                    "role": "system",
+                    "content": text_only_content,  # type: ignore[typeddict-item]
+                }
+                return system_msg_list
+            else:  # user
+                user_msg_list: UserMessage = {
+                    "role": "user",
+                    "content": input_content,
+                }
+                return user_msg_list
    else:
        raise ValueError(
            f"Unexpected content type: {type(content)}. Content: {content}"
        )
-
-    return {
-        "role": role,
-        "content": structured_content,
-    }
--- a/backend/onyx/agents/agent_sdk/message_types.py
+++ b/backend/onyx/agents/agent_sdk/message_types.py
@@ -0,0 +1,125 @@
+"""Strongly typed message structures for Agent SDK messages."""
+
+from typing import Literal
+from typing import NotRequired
+
+from typing_extensions import TypedDict
+
+
+class InputTextContent(TypedDict):
+    type: Literal["input_text"]
+    text: str
+
+
+class OutputTextContent(TypedDict):
+    type: Literal["output_text"]
+    text: str
+
+
+TextContent = InputTextContent | OutputTextContent
+
+
+class ImageContent(TypedDict):
+    type: Literal["input_image"]
+    image_url: str
+    detail: str
+
+
+# Tool call structures
+class ToolCallFunction(TypedDict):
+    name: str
+    arguments: str
+
+
+class ToolCall(TypedDict):
+    id: str
+    type: Literal["function"]
+    function: ToolCallFunction
+
+
+# Message types
+class SystemMessage(TypedDict):
+    role: Literal["system"]
+    content: list[InputTextContent]  # System messages use input text
+
+
+class UserMessage(TypedDict):
+    role: Literal["user"]
+    content: list[
+        InputTextContent | ImageContent
+    ]  # User messages use input text or images
+
+
+class AssistantMessageWithContent(TypedDict):
+    role: Literal["assistant"]
+    content: list[
+        InputTextContent | OutputTextContent
+    ]  # Assistant messages use output_text for responses API compatibility
+
+
+class AssistantMessageWithToolCalls(TypedDict):
+    role: Literal["assistant"]
+    tool_calls: list[ToolCall]
+
+
+class AssistantMessageDuringAgentRun(TypedDict):
+    role: Literal["assistant"]
+    id: str
+    content: (
+        list[InputTextContent | OutputTextContent] | list[ToolCall]
+    )  # Assistant runtime messages receive output_text from agents SDK for responses API compatibility
+    status: Literal["completed", "failed", "in_progress"]
+    type: Literal["message"]
+
+
+class ToolMessage(TypedDict):
+    role: Literal["tool"]
+    content: str
+    tool_call_id: str
+
+
+class FunctionCallMessage(TypedDict):
+    """Agent SDK function call message format."""
+
+    type: Literal["function_call"]
+    id: NotRequired[str]
+    call_id: str
+    name: str
+    arguments: str
+
+
+class FunctionCallOutputMessage(TypedDict):
+    """Agent SDK function call output message format."""
+
+    type: Literal["function_call_output"]
+    call_id: str
+    output: str
+
+
+class SummaryText(TypedDict):
+    """Summary text item in reasoning messages."""
+
+    text: str
+    type: Literal["summary_text"]
+
+
+class ReasoningMessage(TypedDict):
+    """Agent SDK reasoning message format."""
+
+    id: str
+    type: Literal["reasoning"]
+    summary: list[SummaryText]
+
+
+# Union type for all Agent SDK messages
+AgentSDKMessage = (
+    SystemMessage
+    | UserMessage
+    | AssistantMessageWithContent
+    | AssistantMessageWithToolCalls
+    | AssistantMessageDuringAgentRun
+    | ToolMessage
+    | FunctionCallMessage
+    | FunctionCallOutputMessage
+    | ReasoningMessage
+)
--- a/backend/onyx/agents/agent_sdk/monkey_patches.py
+++ b/backend/onyx/agents/agent_sdk/monkey_patches.py
@@ -0,0 +1,36 @@
+from typing import Any
+
+from agents.models.openai_responses import Converter as OpenAIResponsesConverter
+
+
+# TODO: I am very sad that I have to monkey patch this :(
+# Basically, OpenAI agents sdk doesn't convert the tool choice correctly
+# when they have a built-in tool in their framework, like they do for web_search
+# and image_generation.
+# Going to open up a thread with OpenAI agents team to see what they recommend
+# or what we can fix.
+# A discussion is warranted, but we likely want to just write our own LitellmModel for
+# the OpenAI agents SDK since they probably don't really care about Litellm and will
+# prioritize functionality for their own models.
+def monkey_patch_convert_tool_choice_to_ignore_openai_hosted_web_search() -> None:
+    if (
+        getattr(OpenAIResponsesConverter.convert_tool_choice, "__name__", "")
+        == "_patched_convert_tool_choice"
+    ):
+        return
+
+    orig_func = OpenAIResponsesConverter.convert_tool_choice.__func__  # type: ignore[attr-defined]
+
+    def _patched_convert_tool_choice(cls: type, tool_choice: Any) -> Any:
+        # Handle OpenAI hosted tools that we have custom implementations for
+        # Without this patch, the library uses special formatting that breaks our custom tools
+        # See: https://platform.openai.com/docs/api-reference/responses/create#responses_create-tool_choice-hosted_tool-type
+        if tool_choice == "web_search":
+            return {"type": "function", "name": "web_search"}
+        if tool_choice == "image_generation":
+            return {"type": "function", "name": "image_generation"}
+        return orig_func(cls, tool_choice)
+
+    OpenAIResponsesConverter.convert_tool_choice = classmethod(  # type: ignore[method-assign, assignment]
+        _patched_convert_tool_choice
+    )
--- a/backend/onyx/agents/agent_sdk/sync_agent_stream_adapter.py
+++ b/backend/onyx/agents/agent_sdk/sync_agent_stream_adapter.py
@@ -2,6 +2,7 @@ import asyncio
 import queue
 import threading
 from collections.abc import Iterator
+from collections.abc import Sequence
 from typing import Generic
 from typing import Optional
 from typing import TypeVar
@@ -11,6 +12,7 @@ from agents import RunResultStreaming
 from agents import TContext
 from agents.run import Runner

+from onyx.agents.agent_sdk.message_types import AgentSDKMessage
 from onyx.utils.threadpool_concurrency import run_in_background

 T = TypeVar("T")
@@ -41,7 +43,7 @@ class SyncAgentStream(Generic[T]):
        self,
        *,
        agent: Agent,
-        input: list[dict],
+        input: Sequence[AgentSDKMessage],
        context: TContext | None = None,
        max_turns: int = 100,
        queue_maxsize: int = 0,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
@@ -14,10 +14,10 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.configs.constants import DocumentSource
 from onyx.prompts.agents.dc_prompts import DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT
 from onyx.prompts.agents.dc_prompts import DC_OBJECT_SEPARATOR
 from onyx.prompts.agents.dc_prompts import DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT
+from onyx.secondary_llm_flows.source_filter import strings_to_document_sources
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_with_timeout

@@ -61,10 +61,12 @@ def search_objects(
        if agent_1_independent_sources_str is None:
            raise ValueError("Agent 1 Independent Research Sources not found")

-        document_sources = [
-            DocumentSource(x.strip().lower())
-            for x in agent_1_independent_sources_str.split(DC_OBJECT_SEPARATOR)
-        ]
+        document_sources = strings_to_document_sources(
+            [
+                x.strip().lower()
+                for x in agent_1_independent_sources_str.split(DC_OBJECT_SEPARATOR)
+            ]
+        )

        agent_1_output_objective = extract_section(
            agent_1_instructions, "Output Objective:"
@@ -119,7 +121,7 @@ def search_objects(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
@@ -155,7 +155,7 @@ def research_object_source(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
@@ -76,7 +76,7 @@ def consolidate_object_research(
    try:
        llm_response = run_with_timeout(
            30,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/dr/models.py
+++ b/backend/onyx/agents/agent_search/dr/models.py
@@ -1,6 +1,7 @@
 from enum import Enum

 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.agents.agent_search.dr.enums import DRPath
 from onyx.agents.agent_search.dr.sub_agents.image_generation.models import (
@@ -74,8 +75,7 @@ class OrchestratorTool(BaseModel):
    cost: float
    tool_object: Tool | None = None  # None for CLOSER

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class IterationInstructions(BaseModel):
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -41,7 +41,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
 from onyx.chat.chat_utils import build_citation_map_from_numbers
 from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
-from onyx.chat.memories import make_memories_callback
+from onyx.chat.memories import get_memories
 from onyx.chat.models import PromptConfig
 from onyx.chat.prompt_builder.citations_prompt import build_citations_system_message
 from onyx.chat.prompt_builder.citations_prompt import build_citations_user_message
@@ -98,14 +98,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-def _format_tool_name(tool_name: str) -> str:
-    """Convert tool name to LLM-friendly format."""
-    name = tool_name.replace(" ", "_")
-    # take care of camel case like GetAPIKey -> GET_API_KEY for LLM readability
-    name = re.sub(r"(?<=[a-z0-9])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])", "_", name)
-    return name.upper()
-
-
 def _get_available_tools(
    db_session: Session,
    graph_config: GraphConfig,
@@ -498,11 +490,9 @@ def clarifier(
        if graph_config.tooling.search_tool
        else None
    )
-    memories_callback = make_memories_callback(user, db_session)
+    memories = get_memories(user, db_session)
    assistant_system_prompt = handle_company_awareness(assistant_system_prompt)
-    assistant_system_prompt = handle_memories(
-        assistant_system_prompt, memories_callback
-    )
+    assistant_system_prompt = handle_memories(assistant_system_prompt, memories)

    chat_history_string = (
        get_chat_history_string(
@@ -562,7 +552,7 @@ def clarifier(
                # if there is only one tool (Closer), we don't need to decide. It's an LLM answer
                llm_decision = DecisionResponse(decision="LLM", reasoning="")

-            if llm_decision.decision == "LLM":
+            if llm_decision.decision == "LLM" and research_type != ResearchType.DEEP:

                write_custom_event(
                    current_step_nr,
@@ -654,11 +644,14 @@ def clarifier(
            if context_llm_docs:
                persona = graph_config.inputs.persona
                if persona is not None:
-                    prompt_config = PromptConfig.from_model(persona)
+                    prompt_config = PromptConfig.from_model(
+                        persona, db_session=graph_config.persistence.db_session
+                    )
                else:
                    prompt_config = PromptConfig(
-                        system_prompt=assistant_system_prompt,
-                        task_prompt="",
+                        default_behavior_system_prompt=assistant_system_prompt,
+                        custom_instructions=None,
+                        reminder="",
                        datetime_aware=True,
                    )

@@ -687,7 +680,7 @@ def clarifier(

            @traced(name="clarifier stream and process", type="llm")
            def stream_and_process() -> BasicSearchProcessedStreamResults:
-                stream = graph_config.tooling.primary_llm.stream(
+                stream = graph_config.tooling.primary_llm.stream_langchain(
                    prompt=create_question_prompt(
                        cast(str, system_prompt_to_use),
                        cast(str, user_prompt_to_use),
@@ -702,55 +695,58 @@ def clarifier(
                    should_stream_answer=True,
                    writer=writer,
                    ind=0,
-                    final_search_results=context_llm_docs,
-                    displayed_search_results=context_llm_docs,
+                    search_results=context_llm_docs,
                    generate_final_answer=True,
                    chat_message_id=str(graph_config.persistence.chat_session_id),
                )

-            full_response = stream_and_process()
-            if len(full_response.ai_message_chunk.tool_calls) == 0:
+            # Deep research always continues to clarification or search
+            if research_type != ResearchType.DEEP:
+                full_response = stream_and_process()
+                if len(full_response.ai_message_chunk.tool_calls) == 0:

-                if isinstance(full_response.full_answer, str):
-                    full_answer = (
-                        normalize_square_bracket_citations_to_double_with_links(
-                            full_response.full_answer
+                    if isinstance(full_response.full_answer, str):
+                        full_answer = (
+                            normalize_square_bracket_citations_to_double_with_links(
+                                full_response.full_answer
+                            )
+                        )
+                    else:
+                        full_answer = None
+
+                    # Persist final documents and derive citations when using in-context docs
+                    final_documents_db, citations_map = (
+                        _persist_final_docs_and_citations(
+                            db_session=db_session,
+                            context_llm_docs=context_llm_docs,
+                            full_answer=full_answer,
                        )
                    )
-                else:
-                    full_answer = None

-                # Persist final documents and derive citations when using in-context docs
-                final_documents_db, citations_map = _persist_final_docs_and_citations(
-                    db_session=db_session,
-                    context_llm_docs=context_llm_docs,
-                    full_answer=full_answer,
-                )
+                    update_db_session_with_messages(
+                        db_session=db_session,
+                        chat_message_id=message_id,
+                        chat_session_id=graph_config.persistence.chat_session_id,
+                        is_agentic=graph_config.behavior.use_agentic_search,
+                        message=full_answer,
+                        token_count=len(llm_tokenizer.encode(full_answer or "")),
+                        citations=citations_map,
+                        final_documents=final_documents_db or None,
+                        update_parent_message=True,
+                        research_answer_purpose=ResearchAnswerPurpose.ANSWER,
+                    )

-                update_db_session_with_messages(
-                    db_session=db_session,
-                    chat_message_id=message_id,
-                    chat_session_id=graph_config.persistence.chat_session_id,
-                    is_agentic=graph_config.behavior.use_agentic_search,
-                    message=full_answer,
-                    token_count=len(llm_tokenizer.encode(full_answer or "")),
-                    citations=citations_map,
-                    final_documents=final_documents_db or None,
-                    update_parent_message=True,
-                    research_answer_purpose=ResearchAnswerPurpose.ANSWER,
-                )
+                    db_session.commit()

-                db_session.commit()
-
-                return OrchestrationSetup(
-                    original_question=original_question,
-                    chat_history_string="",
-                    tools_used=[DRPath.END.value],
-                    query_list=[],
-                    available_tools=available_tools,
-                    assistant_system_prompt=assistant_system_prompt,
-                    assistant_task_prompt=assistant_task_prompt,
-                )
+                    return OrchestrationSetup(
+                        original_question=original_question,
+                        chat_history_string="",
+                        tools_used=[DRPath.END.value],
+                        query_list=[],
+                        available_tools=available_tools,
+                        assistant_system_prompt=assistant_system_prompt,
+                        assistant_task_prompt=assistant_task_prompt,
+                    )

        # Continue, as external knowledge is required.

--- a/backend/onyx/agents/agent_search/dr/process_llm_stream.py
+++ b/backend/onyx/agents/agent_search/dr/process_llm_stream.py
@@ -41,18 +41,16 @@ def process_llm_stream(
    should_stream_answer: bool,
    writer: StreamWriter,
    ind: int,
-    final_search_results: list[LlmDoc] | None = None,
-    displayed_search_results: list[LlmDoc] | None = None,
+    search_results: list[LlmDoc] | None = None,
    generate_final_answer: bool = False,
    chat_message_id: str | None = None,
 ) -> BasicSearchProcessedStreamResults:
    tool_call_chunk = AIMessageChunk(content="")

-    if final_search_results and displayed_search_results:
+    if search_results:
        answer_handler: AnswerResponseHandler = CitationResponseHandler(
-            context_docs=final_search_results,
-            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
-            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
+            context_docs=search_results,
+            doc_id_to_rank_map=map_document_id_order(search_results),
        )
    else:
        answer_handler = PassThroughAnswerResponseHandler()
@@ -78,7 +76,7 @@ def process_llm_stream(
        ):
            tool_call_chunk += message  # type: ignore
        elif should_stream_answer:
-            for response_part in answer_handler.handle_response_part(message, []):
+            for response_part in answer_handler.handle_response_part(message):

                # only stream out answer parts
                if (
@@ -94,7 +92,7 @@ def process_llm_stream(
                    if not start_final_answer_streaming_set:
                        # Convert LlmDocs to SavedSearchDocs
                        saved_search_docs = saved_search_docs_from_llm_docs(
-                            final_search_results
+                            search_results
                        )
                        write_custom_event(
                            ind,
--- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
@@ -30,6 +30,7 @@ from onyx.db.connector import DocumentSource
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.prompts.dr_prompts import BASE_SEARCH_PROCESSING_PROMPT
 from onyx.prompts.dr_prompts import INTERNAL_SEARCH_PROMPTS
+from onyx.secondary_llm_flows.source_filter import strings_to_document_sources
 from onyx.server.query_and_chat.streaming_models import SearchToolDelta
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_implementations.search.search_tool import (
@@ -128,10 +129,11 @@ def basic_search(
        if re.match(date_pattern, implied_start_date):
            implied_time_filter = datetime.strptime(implied_start_date, "%Y-%m-%d")

-    specified_source_types: list[DocumentSource] | None = [
-        DocumentSource(source_type)
-        for source_type in search_processing.specified_source_types
-    ]
+    specified_source_types: list[DocumentSource] | None = (
+        strings_to_document_sources(search_processing.specified_source_types)
+        if search_processing.specified_source_types
+        else None
+    )

    if specified_source_types is not None and len(specified_source_types) == 0:
        specified_source_types = None
--- a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
@@ -66,7 +66,7 @@ def custom_tool_act(
            base_question=base_question,
            tool_description=custom_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[custom_tool.tool_definition()],
            tool_choice="required",
@@ -125,7 +125,7 @@ def custom_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
@@ -65,7 +65,7 @@ def generic_internal_tool_act(
            base_question=base_question,
            tool_description=generic_internal_tool_info.description,
        )
-        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke_langchain(
            tool_use_prompt,
            tools=[generic_internal_tool.tool_definition()],
            tool_choice="required",
@@ -113,7 +113,7 @@ def generic_internal_tool_act(
        query=branch_query, base_question=base_question, tool_response=tool_str
    )
    answer_string = str(
-        graph_config.tooling.primary_llm.invoke(
+        graph_config.tooling.primary_llm.invoke_langchain(
            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()
--- a/backend/onyx/agents/agent_search/dr/sub_agents/image_generation/dr_image_generation_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/image_generation/dr_image_generation_2_act.py
@@ -117,10 +117,8 @@ def image_generation(

    # save images to file store
    file_ids = save_files(
-        urls=[img.url for img in image_generation_responses if img.url],
-        base64_files=[
-            img.image_data for img in image_generation_responses if img.image_data
-        ],
+        urls=[],
+        base64_files=[img.image_data for img in image_generation_responses],
    )

    final_generated_images = [
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/exa_client.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/exa_client.py
@@ -1,3 +1,5 @@
+from collections.abc import Sequence
+
 from exa_py import Exa
 from exa_py.api import HighlightsContentsOptions

@@ -47,9 +49,9 @@ class ExaClient(WebSearchProvider):
        ]

    @retry_builder(tries=3, delay=1, backoff=2)
-    def contents(self, urls: list[str]) -> list[WebContent]:
+    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        response = self.exa.get_contents(
-            urls=urls,
+            urls=list(urls),
            text=True,
            livecrawl="preferred",
        )
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
@@ -1,4 +1,5 @@
 import json
+from collections.abc import Sequence
 from concurrent.futures import ThreadPoolExecutor

 import requests
@@ -55,7 +56,7 @@ class SerperClient(WebSearchProvider):
            for result in organic_results
        ]

-    def contents(self, urls: list[str]) -> list[WebContent]:
+    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        if not urls:
            return []

--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py
@@ -78,7 +78,7 @@ def web_search(
    def _search(search_query: str) -> list[WebSearchResult]:
        search_results: list[WebSearchResult] = []
        try:
-            search_results = provider.search(search_query)
+            search_results = list(provider.search(search_query))
        except Exception as e:
            logger.error(f"Error performing search: {e}")
        return search_results
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
@@ -21,6 +21,7 @@ from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.context.search.models import InferenceSection
 from onyx.prompts.dr_prompts import INTERNAL_SEARCH_PROMPTS
 from onyx.utils.logger import setup_logger
+from onyx.utils.url import normalize_url


 logger = setup_logger()
@@ -38,12 +39,17 @@ def is_summarize(
    node_start_time = datetime.now()

    # build branch iterations from fetch inputs
+    # Normalize URLs to handle mismatches from query parameters (e.g., ?activeTab=explore)
    url_to_raw_document: dict[str, InferenceSection] = {}
    for raw_document in state.raw_documents:
-        url_to_raw_document[raw_document.center_chunk.semantic_identifier] = (
-            raw_document
-        )
-    urls = state.branch_questions_to_urls[state.branch_question]
+        normalized_url = normalize_url(raw_document.center_chunk.semantic_identifier)
+        url_to_raw_document[normalized_url] = raw_document
+
+    # Normalize the URLs from branch_questions_to_urls as well
+    urls = [
+        normalize_url(url)
+        for url in state.branch_questions_to_urls[state.branch_question]
+    ]
    current_iteration = state.iteration_nr
    graph_config = cast(GraphConfig, config["metadata"]["config"])
    research_type = graph_config.behavior.research_type
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
@@ -1,9 +1,13 @@
 from abc import ABC
 from abc import abstractmethod
+from collections.abc import Sequence
 from datetime import datetime
 from enum import Enum

 from pydantic import BaseModel
+from pydantic import field_validator
+
+from onyx.utils.url import normalize_url


 class ProviderType(Enum):
@@ -16,9 +20,14 @@ class ProviderType(Enum):
 class WebSearchResult(BaseModel):
    title: str
    link: str
+    snippet: str | None = None
    author: str | None = None
    published_date: datetime | None = None
-    snippet: str | None = None
+
+    @field_validator("link")
+    @classmethod
+    def normalize_link(cls, v: str) -> str:
+        return normalize_url(v)


 class WebContent(BaseModel):
@@ -28,12 +37,17 @@ class WebContent(BaseModel):
    published_date: datetime | None = None
    scrape_successful: bool = True

+    @field_validator("link")
+    @classmethod
+    def normalize_link(cls, v: str) -> str:
+        return normalize_url(v)
+

 class WebSearchProvider(ABC):
    @abstractmethod
-    def search(self, query: str) -> list[WebSearchResult]:
+    def search(self, query: str) -> Sequence[WebSearchResult]:
        pass

    @abstractmethod
-    def contents(self, urls: list[str]) -> list[WebContent]:
+    def contents(self, urls: Sequence[str]) -> list[WebContent]:
        pass
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
@@ -4,6 +4,8 @@ from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
 from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
    WebSearchResult,
 )
+from onyx.chat.models import DOCUMENT_CITATION_NUMBER_EMPTY_VALUE
+from onyx.chat.models import LlmDoc
 from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceSection
@@ -75,3 +77,23 @@ def dummy_inference_section_from_internet_search_result(
        chunks=[],
        combined_content="",
    )
+
+
+def llm_doc_from_web_content(web_content: WebContent) -> LlmDoc:
+    """Create an LlmDoc from WebContent with the INTERNET_SEARCH_DOC_ prefix"""
+    return LlmDoc(
+        # TODO: Is this what we want to do for document_id? We're kind of overloading it since it
+        # should ideally correspond to a document in the database. But I guess if you're calling this
+        # function you know it won't be in the database.
+        document_id="INTERNET_SEARCH_DOC_" + web_content.link,
+        content=truncate_search_result_content(web_content.full_content),
+        blurb=web_content.link,
+        semantic_identifier=web_content.link,
+        source_type=DocumentSource.WEB,
+        metadata={},
+        link=web_content.link,
+        document_citation_number=DOCUMENT_CITATION_NUMBER_EMPTY_VALUE,
+        updated_at=web_content.published_date,
+        source_links={},
+        match_highlights=[],
+    )
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a1_extract_ert.py
@@ -106,7 +106,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_ENTITY_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
@@ -176,7 +176,7 @@ def extract_ert(
    try:
        llm_response = run_with_timeout(
            KG_RELATIONSHIP_EXTRACTION_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a2_analyze.py
@@ -202,7 +202,7 @@ def analyze(
        llm_response = run_with_timeout(
            KG_STRATEGY_GENERATION_TIMEOUT,
            # fast_llm.invoke,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=5,
            max_tokens=100,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/a3_generate_simple_sql.py
@@ -169,7 +169,7 @@ def _get_source_documents(
    try:
        llm_response = run_with_timeout(
            KG_SQL_GENERATION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
            max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -321,7 +321,7 @@ def generate_simple_sql(
        try:
            llm_response = run_with_timeout(
                KG_SQL_GENERATION_TIMEOUT,
-                primary_llm.invoke,
+                primary_llm.invoke_langchain,
                prompt=msg,
                timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
@@ -451,7 +451,7 @@ def generate_simple_sql(
            try:
                llm_response = run_with_timeout(
                    KG_SQL_GENERATION_TIMEOUT,
-                    primary_llm.invoke,
+                    primary_llm.invoke_langchain,
                    prompt=msg,
                    timeout_override=KG_SQL_GENERATION_TIMEOUT_OVERRIDE,
                    max_tokens=KG_SQL_GENERATION_MAX_TOKENS,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b1_construct_deep_search_filters.py
@@ -94,7 +94,7 @@ def construct_deep_search_filters(
    try:
        llm_response = run_with_timeout(
            KG_FILTER_CONSTRUCTION_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=15,
            max_tokens=1400,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2p_process_individual_deep_search.py
@@ -137,7 +137,7 @@ def process_individual_deep_search(
    try:
        llm_response = run_with_timeout(
            KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
-            primary_llm.invoke,
+            primary_llm.invoke_langchain,
            prompt=msg,
            timeout_override=KG_OBJECT_SOURCE_RESEARCH_TIMEOUT,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
+++ b/backend/onyx/agents/agent_search/kb_search/nodes/b2s_filtered_search.py
@@ -127,7 +127,7 @@ def filtered_search(
    try:
        llm_response = run_with_timeout(
            KG_FILTERED_SEARCH_TIMEOUT,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=30,
            max_tokens=300,
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -1,6 +1,7 @@
 from uuid import UUID

 from pydantic import BaseModel
+from pydantic import ConfigDict
 from sqlalchemy.orm import Session

 from onyx.agents.agent_search.dr.enums import ResearchType
@@ -25,8 +26,7 @@ class GraphInputs(BaseModel):
    structured_response_format: dict | None = None
    project_instructions: str | None = None

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphTooling(BaseModel):
@@ -41,8 +41,7 @@ class GraphTooling(BaseModel):
    force_use_tool: ForceUseTool
    using_tool_calling_llm: bool = False

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphPersistence(BaseModel):
@@ -57,8 +56,7 @@ class GraphPersistence(BaseModel):
    # message were flushed to; only needed for agentic search
    db_session: Session

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class GraphSearchConfig(BaseModel):
@@ -87,5 +85,4 @@ class GraphConfig(BaseModel):
    # Only needed for agentic search
    persistence: GraphPersistence

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,4 +1,5 @@
 from pydantic import BaseModel
+from pydantic import ConfigDict

 from onyx.chat.prompt_builder.schemas import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
@@ -38,8 +39,7 @@ class ToolChoice(BaseModel):
    id: str | None
    search_tool_override_kwargs: SearchToolOverrideKwargs = SearchToolOverrideKwargs()

-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(arbitrary_types_allowed=True)


 class ToolChoiceUpdate(BaseModel):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -82,7 +82,7 @@ def trim_prompt_piece(config: LLMConfig, prompt_piece: str, reserved_str: str) -
 def build_history_prompt(config: GraphConfig, question: str) -> str:
    prompt_builder = config.inputs.prompt_builder
    persona_base = get_persona_agent_prompt_expressions(
-        config.inputs.persona
+        config.inputs.persona, db_session=config.persistence.db_session
    ).base_prompt

    if prompt_builder is None:
@@ -126,7 +126,9 @@ def build_history_prompt(config: GraphConfig, question: str) -> str:
 def get_prompt_enrichment_components(
    config: GraphConfig,
 ) -> AgentPromptEnrichmentComponents:
-    persona_prompts = get_persona_agent_prompt_expressions(config.inputs.persona)
+    persona_prompts = get_persona_agent_prompt_expressions(
+        config.inputs.persona, db_session=config.persistence.db_session
+    )

    history = build_history_prompt(config, config.inputs.prompt_builder.raw_user_query)

--- a/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
@@ -76,7 +76,7 @@ def stream_llm_answer(
    else:
        citation_processor = None

-    for message in llm.stream(
+    for message in llm.stream_langchain(
        prompt,
        timeout_override=timeout_override,
        max_tokens=max_tokens,
@@ -156,7 +156,7 @@ def invoke_llm_json(
    ) and supports_response_schema(llm.config.model_name, llm.config.model_provider)

    response_content = str(
-        llm.invoke(
+        llm.invoke_langchain(
            prompt,
            tools=tools,
            tool_choice=tool_choice,
@@ -224,7 +224,7 @@ def get_answer_from_llm(
    else:
        llm_response = run_with_timeout(
            timeout,
-            llm.invoke,
+            llm.invoke_langchain,
            prompt=msg,
            timeout_override=timeout_override,
            max_tokens=max_tokens,
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -10,6 +10,7 @@ from typing import TypedDict

 from langchain_core.messages import BaseMessage
 from langgraph.types import StreamWriter
+from sqlalchemy.orm import Session

 from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
@@ -134,18 +135,24 @@ def format_entity_term_extraction(

 def get_persona_agent_prompt_expressions(
    persona: Persona | None,
+    db_session: Session,
 ) -> PersonaPromptExpressions:
    if persona is None:
        return PersonaPromptExpressions(
            contextualized_prompt=ASSISTANT_SYSTEM_PROMPT_DEFAULT, base_prompt=""
        )

-    # Prompts are now embedded directly on the Persona model
-    prompt_config = PromptConfig.from_model(persona)
+    # Pull custom instructions if they exist for backwards compatibility
+    prompt_config = PromptConfig.from_model(persona, db_session=db_session)
+    system_prompt = (
+        prompt_config.custom_instructions
+        or prompt_config.default_behavior_system_prompt
+    )
+
    datetime_aware_system_prompt = handle_onyx_date_awareness(
-        prompt_str=prompt_config.system_prompt,
+        prompt_str=system_prompt,
        prompt_config=prompt_config,
-        add_additional_info_if_no_tag=persona.datetime_aware,
+        add_additional_info_if_no_tag=bool(persona and persona.datetime_aware),
    )

    return PersonaPromptExpressions(
@@ -268,7 +275,7 @@ def summarize_history(
    try:
        history_response = run_with_timeout(
            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
+            llm.invoke_langchain,
            history_context_prompt,
            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
            max_tokens=AGENT_MAX_TOKENS_HISTORY_SUMMARY,
--- a/backend/onyx/auth/api_key.py
+++ b/backend/onyx/auth/api_key.py
@@ -2,29 +2,20 @@ import hashlib
 import secrets
 import uuid
 from urllib.parse import quote
-from urllib.parse import unquote

 from fastapi import Request
 from passlib.hash import sha256_crypt
 from pydantic import BaseModel

+from onyx.auth.constants import API_KEY_LENGTH
+from onyx.auth.constants import API_KEY_PREFIX
+from onyx.auth.constants import DEPRECATED_API_KEY_PREFIX
 from onyx.auth.schemas import UserRole
+from onyx.auth.utils import get_hashed_bearer_token_from_request
 from onyx.configs.app_configs import API_KEY_HASH_ROUNDS
 from shared_configs.configs import MULTI_TENANT


-_API_KEY_HEADER_NAME = "Authorization"
-# NOTE for others who are curious: In the context of a header, "X-" often refers
-# to non-standard, experimental, or custom headers in HTTP or other protocols. It
-# indicates that the header is not part of the official standards defined by
-# organizations like the Internet Engineering Task Force (IETF).
-_API_KEY_HEADER_ALTERNATIVE_NAME = "X-Onyx-Authorization"
-_BEARER_PREFIX = "Bearer "
-_API_KEY_PREFIX = "on_"
-_DEPRECATED_API_KEY_PREFIX = "dn_"
-_API_KEY_LEN = 192
-
-
 class ApiKeyDescriptor(BaseModel):
    api_key_id: int
    api_key_display: str
@@ -37,34 +28,10 @@ class ApiKeyDescriptor(BaseModel):

 def generate_api_key(tenant_id: str | None = None) -> str:
    if not MULTI_TENANT or not tenant_id:
-        return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN)
+        return API_KEY_PREFIX + secrets.token_urlsafe(API_KEY_LENGTH)

    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID
-    return f"{_API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(_API_KEY_LEN)}"
-
-
-def extract_tenant_from_api_key_header(request: Request) -> str | None:
-    """Extract tenant ID from request. Returns None if auth is disabled or invalid format."""
-    raw_api_key_header = request.headers.get(
-        _API_KEY_HEADER_ALTERNATIVE_NAME
-    ) or request.headers.get(_API_KEY_HEADER_NAME)
-
-    if not raw_api_key_header or not raw_api_key_header.startswith(_BEARER_PREFIX):
-        return None
-
-    api_key = raw_api_key_header[len(_BEARER_PREFIX) :].strip()
-
-    if not api_key.startswith(_API_KEY_PREFIX) and not api_key.startswith(
-        _DEPRECATED_API_KEY_PREFIX
-    ):
-        return None
-
-    parts = api_key[len(_API_KEY_PREFIX) :].split(".", 1)
-    if len(parts) != 2:
-        return None
-
-    tenant_id = parts[0]
-    return unquote(tenant_id) if tenant_id else None
+    return f"{API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(API_KEY_LENGTH)}"


 def _deprecated_hash_api_key(api_key: str) -> str:
@@ -74,30 +41,30 @@ def _deprecated_hash_api_key(api_key: str) -> str:
 def hash_api_key(api_key: str) -> str:
    # NOTE: no salt is needed, as the API key is randomly generated
    # and overlaps are impossible
-    if api_key.startswith(_API_KEY_PREFIX):
+    if api_key.startswith(API_KEY_PREFIX):
        return hashlib.sha256(api_key.encode("utf-8")).hexdigest()

-    if api_key.startswith(_DEPRECATED_API_KEY_PREFIX):
+    if api_key.startswith(DEPRECATED_API_KEY_PREFIX):
        return _deprecated_hash_api_key(api_key)

    raise ValueError(f"Invalid API key prefix: {api_key[:3]}")


 def build_displayable_api_key(api_key: str) -> str:
-    if api_key.startswith(_API_KEY_PREFIX):
-        api_key = api_key[len(_API_KEY_PREFIX) :]
+    if api_key.startswith(API_KEY_PREFIX):
+        api_key = api_key[len(API_KEY_PREFIX) :]

-    return _API_KEY_PREFIX + api_key[:4] + "********" + api_key[-4:]
+    return API_KEY_PREFIX + api_key[:4] + "********" + api_key[-4:]


 def get_hashed_api_key_from_request(request: Request) -> str | None:
-    raw_api_key_header = request.headers.get(
-        _API_KEY_HEADER_ALTERNATIVE_NAME
-    ) or request.headers.get(_API_KEY_HEADER_NAME)
-    if raw_api_key_header is None:
-        return None
+    """Extract and hash API key from Authorization header.

-    if raw_api_key_header.startswith(_BEARER_PREFIX):
-        raw_api_key_header = raw_api_key_header[len(_BEARER_PREFIX) :].strip()
-
-    return hash_api_key(raw_api_key_header)
+    Accepts both "Bearer <key>" and raw key formats.
+    """
+    return get_hashed_bearer_token_from_request(
+        request,
+        valid_prefixes=[API_KEY_PREFIX, DEPRECATED_API_KEY_PREFIX],
+        hash_fn=hash_api_key,
+        allow_non_bearer=True,  # API keys historically support both formats
+    )
--- a/backend/onyx/auth/constants.py
+++ b/backend/onyx/auth/constants.py
@@ -0,0 +1,15 @@
+"""Authentication constants shared across auth modules."""
+
+# API Key constants
+API_KEY_PREFIX = "on_"
+DEPRECATED_API_KEY_PREFIX = "dn_"
+API_KEY_LENGTH = 192
+
+# PAT constants
+PAT_PREFIX = "onyx_pat_"
+PAT_LENGTH = 192
+
+# Shared header constants
+API_KEY_HEADER_NAME = "Authorization"
+API_KEY_HEADER_ALTERNATIVE_NAME = "X-Onyx-Authorization"
+BEARER_PREFIX = "Bearer "
--- a/backend/onyx/auth/oauth_token_manager.py
+++ b/backend/onyx/auth/oauth_token_manager.py
@@ -0,0 +1,155 @@
+import time
+from typing import Any
+from urllib.parse import urlencode
+from uuid import UUID
+
+import requests
+from sqlalchemy.orm import Session
+
+from onyx.db.models import OAuthConfig
+from onyx.db.models import OAuthUserToken
+from onyx.db.oauth_config import get_user_oauth_token
+from onyx.db.oauth_config import upsert_user_oauth_token
+from onyx.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+class OAuthTokenManager:
+    """Manages OAuth token retrieval, refresh, and validation"""
+
+    def __init__(self, oauth_config: OAuthConfig, user_id: UUID, db_session: Session):
+        self.oauth_config = oauth_config
+        self.user_id = user_id
+        self.db_session = db_session
+
+    def get_valid_access_token(self) -> str | None:
+        """Get valid access token, refreshing if necessary"""
+        user_token = get_user_oauth_token(
+            self.oauth_config.id, self.user_id, self.db_session
+        )
+
+        if not user_token:
+            return None
+
+        token_data = user_token.token_data
+
+        # Check if token is expired
+        if OAuthTokenManager.is_token_expired(token_data):
+            # Try to refresh if we have a refresh token
+            if "refresh_token" in token_data:
+                try:
+                    return self.refresh_token(user_token)
+                except Exception as e:
+                    logger.warning(f"Failed to refresh token: {e}")
+                    return None
+            else:
+                return None
+
+        return token_data.get("access_token")
+
+    def refresh_token(self, user_token: OAuthUserToken) -> str:
+        """Refresh access token using refresh token"""
+        token_data = user_token.token_data
+
+        response = requests.post(
+            self.oauth_config.token_url,
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": token_data["refresh_token"],
+                "client_id": self.oauth_config.client_id,
+                "client_secret": self.oauth_config.client_secret,
+            },
+            headers={"Accept": "application/json"},
+        )
+        response.raise_for_status()
+
+        new_token_data = response.json()
+
+        # Calculate expires_at if expires_in is present
+        if "expires_in" in new_token_data:
+            new_token_data["expires_at"] = (
+                int(time.time()) + new_token_data["expires_in"]
+            )
+
+        # Preserve refresh_token if not returned (some providers don't return it)
+        if "refresh_token" not in new_token_data and "refresh_token" in token_data:
+            new_token_data["refresh_token"] = token_data["refresh_token"]
+
+        # Update token in DB
+        upsert_user_oauth_token(
+            self.oauth_config.id,
+            self.user_id,
+            new_token_data,
+            self.db_session,
+        )
+
+        return new_token_data["access_token"]
+
+    @classmethod
+    def token_expiration_time(cls, token_data: dict[str, Any]) -> int | None:
+        """Get the token expiration time"""
+        expires_at = token_data.get("expires_at")
+        if not expires_at:
+            return None
+
+        return expires_at
+
+    @classmethod
+    def is_token_expired(cls, token_data: dict[str, Any]) -> bool:
+        """Check if token is expired (with 60 second buffer)"""
+        expires_at = cls.token_expiration_time(token_data)
+        if not expires_at:
+            return False  # No expiration data, assume valid
+
+        # Add 60 second buffer to avoid race conditions
+        return int(time.time()) + 60 >= expires_at
+
+    def exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:
+        """Exchange authorization code for access token"""
+        response = requests.post(
+            self.oauth_config.token_url,
+            data={
+                "grant_type": "authorization_code",
+                "code": code,
+                "client_id": self.oauth_config.client_id,
+                "client_secret": self.oauth_config.client_secret,
+                "redirect_uri": redirect_uri,
+            },
+            headers={"Accept": "application/json"},
+        )
+        response.raise_for_status()
+
+        token_data = response.json()
+
+        # Calculate expires_at if expires_in is present
+        if "expires_in" in token_data:
+            token_data["expires_at"] = int(time.time()) + token_data["expires_in"]
+
+        return token_data
+
+    @staticmethod
+    def build_authorization_url(
+        oauth_config: OAuthConfig, redirect_uri: str, state: str
+    ) -> str:
+        """Build OAuth authorization URL"""
+        params: dict[str, Any] = {
+            "client_id": oauth_config.client_id,
+            "redirect_uri": redirect_uri,
+            "response_type": "code",
+            "state": state,
+        }
+
+        # Add scopes if configured
+        if oauth_config.scopes:
+            params["scope"] = " ".join(oauth_config.scopes)
+
+        # Add any additional provider-specific parameters
+        if oauth_config.additional_params:
+            params.update(oauth_config.additional_params)
+
+        # Check if URL already has query parameters
+        separator = "&" if "?" in oauth_config.authorization_url else "?"
+
+        return f"{oauth_config.authorization_url}{separator}{urlencode(params)}"
--- a/backend/onyx/auth/pat.py
+++ b/backend/onyx/auth/pat.py
@@ -0,0 +1,60 @@
+"""Personal Access Token generation and validation."""
+
+import hashlib
+import secrets
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from urllib.parse import quote
+
+from fastapi import Request
+
+from onyx.auth.constants import PAT_LENGTH
+from onyx.auth.constants import PAT_PREFIX
+from onyx.auth.utils import get_hashed_bearer_token_from_request
+from shared_configs.configs import MULTI_TENANT
+
+
+def generate_pat(tenant_id: str | None = None) -> str:
+    """Generate cryptographically secure PAT."""
+    if MULTI_TENANT and tenant_id:
+        encoded_tenant = quote(tenant_id)
+        return f"{PAT_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(PAT_LENGTH)}"
+    return PAT_PREFIX + secrets.token_urlsafe(PAT_LENGTH)
+
+
+def hash_pat(token: str) -> str:
+    """Hash PAT using SHA256 (no salt needed due to cryptographic randomness)."""
+    return hashlib.sha256(token.encode("utf-8")).hexdigest()
+
+
+def build_displayable_pat(token: str) -> str:
+    """Create masked display version: show prefix + first 4 random chars, mask middle, show last 4.
+
+    Example: onyx_pat_abc1****xyz9
+    """
+    # Show first 12 chars (onyx_pat_ + 4 random chars) and last 4 chars
+    return f"{token[:12]}****{token[-4:]}"
+
+
+def get_hashed_pat_from_request(request: Request) -> str | None:
+    """Extract and hash PAT from Authorization header.
+
+    Only accepts "Bearer <token>" format (unlike API keys which support raw format).
+    """
+    return get_hashed_bearer_token_from_request(
+        request,
+        valid_prefixes=[PAT_PREFIX],
+        hash_fn=hash_pat,
+        allow_non_bearer=False,  # PATs require Bearer prefix
+    )
+
+
+def calculate_expiration(days: int | None) -> datetime | None:
+    """Calculate expiration at 23:59:59.999999 UTC on the target date. None = no expiration."""
+    if days is None:
+        return None
+    expiry_date = datetime.now(timezone.utc).date() + timedelta(days=days)
+    return datetime.combine(expiry_date, datetime.max.time()).replace(
+        tzinfo=timezone.utc
+    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -64,6 +64,7 @@ from onyx.auth.email_utils import send_user_verification_email
 from onyx.auth.invited_users import get_invited_users
 from onyx.auth.invited_users import remove_user_from_invited_users
 from onyx.auth.jwt import verify_jwt_token
+from onyx.auth.pat import get_hashed_pat_from_request
 from onyx.auth.schemas import AuthBackend
 from onyx.auth.schemas import UserCreate
 from onyx.auth.schemas import UserRole
@@ -109,13 +110,12 @@ from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
 from onyx.db.models import Persona
 from onyx.db.models import User
-from onyx.db.saml import get_saml_account
+from onyx.db.pat import fetch_user_for_pat
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
-from onyx.utils.secrets import extract_hashed_cookie
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
@@ -206,6 +206,10 @@ def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:


 def verify_email_is_invited(email: str) -> None:
+    if AUTH_TYPE in {AuthType.SAML, AuthType.OIDC}:
+        # SSO providers manage membership; allow JIT provisioning regardless of invites
+        return
+
    whitelist = get_invited_users()
    if not whitelist:
        return
@@ -1064,17 +1068,7 @@ async def _check_for_saml_and_jwt(
    user: User | None,
    async_db_session: AsyncSession,
 ) -> User | None:
-    # Check if the user has a session cookie from SAML
-    if AUTH_TYPE == AuthType.SAML:
-        saved_cookie = extract_hashed_cookie(request)
-
-        if saved_cookie:
-            saml_account = await get_saml_account(
-                cookie=saved_cookie, async_db_session=async_db_session
-            )
-            user = saml_account.user if saml_account else None
-
-    # If user is still None, check for JWT in Authorization header
+    # If user is None, check for JWT in Authorization header
    if user is None and JWT_PUBLIC_KEY_URL is not None:
        auth_header = request.headers.get("Authorization")
        if auth_header and auth_header.startswith("Bearer "):
@@ -1091,6 +1085,12 @@ async def optional_user(
 ) -> User | None:
    user = await _check_for_saml_and_jwt(request, user, async_db_session)

+    # check if a PAT is present (before API key)
+    if user is None:
+        hashed_pat = get_hashed_pat_from_request(request)
+        if hashed_pat:
+            user = await fetch_user_for_pat(hashed_pat, async_db_session)
+
    # check if an API key is present
    if user is None:
        try:
--- a/Show More
+++ b/Show More