k

chore(tests): add more packet tests (#7677 )
chore(extensions): pull in chrome extension (#7703 )
2026-03-10 18:22:40 +00:00 · 2026-01-23 17:17:28 -08:00 · 2026-01-23 19:49:41 +00:00 · 2026-01-23 10:17:05 -08:00 · 2026-01-23 17:02:38 +00:00 · 2026-01-23 08:49:59 -08:00
291 changed files with 19678 additions and 5370 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -8,7 +8,9 @@ on:

 # Set restrictive default permissions for all jobs. Jobs that need more permissions
 # should explicitly declare them.
-permissions: {}
+permissions:
+  # Required for OIDC authentication with AWS
+  id-token: write # zizmor: ignore[excessive-permissions]

 env:
  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
@@ -150,16 +152,30 @@ jobs:
    if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'
    runs-on: ubuntu-slim
    timeout-minutes: 10
+    environment: release
    steps:
      - name: Checkout
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
+          parse-json-secrets: true
+
      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
-          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: "• check-version-tag"
          title: "🚨 Version Tag Check Failed"
          ref-name: ${{ github.ref_name }}
@@ -168,6 +184,7 @@ jobs:
    needs: determine-builds
    if: needs.determine-builds.outputs.build-desktop == 'true'
    permissions:
+      id-token: write
      contents: write
      actions: read
    strategy:
@@ -185,12 +202,33 @@ jobs:

    runs-on: ${{ matrix.platform }}
    timeout-minutes: 90
+    environment: release
    steps:
      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
        with:
          # NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
          persist-credentials: true # zizmor: ignore[artipacked]

+      - name: Configure AWS credentials
+        if: startsWith(matrix.platform, 'macos-')
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        if: startsWith(matrix.platform, 'macos-')
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            APPLE_ID, deploy/apple-id
+            APPLE_PASSWORD, deploy/apple-password
+            APPLE_CERTIFICATE, deploy/apple-certificate
+            APPLE_CERTIFICATE_PASSWORD, deploy/apple-certificate-password
+            KEYCHAIN_PASSWORD, deploy/keychain-password
+            APPLE_TEAM_ID, deploy/apple-team-id
+          parse-json-secrets: true
+
      - name: install dependencies (ubuntu only)
        if: startsWith(matrix.platform, 'ubuntu-')
        run: |
@@ -285,15 +323,40 @@ jobs:

          Write-Host "Versions set to: $VERSION"

+      - name: Import Apple Developer Certificate
+        if: startsWith(matrix.platform, 'macos-')
+        run: |
+          echo $APPLE_CERTIFICATE | base64 --decode > certificate.p12
+          security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
+          security default-keychain -s build.keychain
+          security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
+          security set-keychain-settings -t 3600 -u build.keychain
+          security import certificate.p12 -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign
+          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain
+          security find-identity -v -p codesigning build.keychain
+
+      - name: Verify Certificate
+        if: startsWith(matrix.platform, 'macos-')
+        run: |
+          CERT_INFO=$(security find-identity -v -p codesigning build.keychain | grep -E "(Developer ID Application|Apple Distribution|Apple Development)" | head -n 1)
+          CERT_ID=$(echo "$CERT_INFO" | awk -F'"' '{print $2}')
+          echo "CERT_ID=$CERT_ID" >> $GITHUB_ENV
+          echo "Certificate imported."
+
      - uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          APPLE_ID: ${{ env.APPLE_ID }}
+          APPLE_PASSWORD: ${{ env.APPLE_PASSWORD }}
+          APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}
+          APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}
        with:
          tagName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
          releaseName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
          releaseBody: "See the assets to download this version and install."
          releaseDraft: true
          prerelease: false
+          assetNamePattern: "[name]_[arch][ext]"
          args: ${{ matrix.args }}

  build-web-amd64:
@@ -305,6 +368,7 @@ jobs:
      - run-id=${{ github.run_id }}-web-amd64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -317,6 +381,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -326,13 +404,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
@@ -363,6 +441,7 @@ jobs:
      - run-id=${{ github.run_id }}-web-arm64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -375,6 +454,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -384,13 +477,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
@@ -423,19 +516,34 @@ jobs:
      - run-id=${{ github.run_id }}-merge-web
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
@@ -471,6 +579,7 @@ jobs:
      - run-id=${{ github.run_id }}-web-cloud-amd64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -483,6 +592,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -492,13 +615,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
@@ -537,6 +660,7 @@ jobs:
      - run-id=${{ github.run_id }}-web-cloud-arm64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -549,6 +673,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -558,13 +696,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
@@ -605,19 +743,34 @@ jobs:
      - run-id=${{ github.run_id }}-merge-web-cloud
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
@@ -650,6 +803,7 @@ jobs:
      - run-id=${{ github.run_id }}-backend-amd64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -662,6 +816,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -671,13 +839,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
@@ -707,6 +875,7 @@ jobs:
      - run-id=${{ github.run_id }}-backend-arm64
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -719,6 +888,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -728,13 +911,13 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
@@ -766,19 +949,34 @@ jobs:
      - run-id=${{ github.run_id }}-merge-backend
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
@@ -815,6 +1013,7 @@ jobs:
      - volume=40gb
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -827,6 +1026,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -836,15 +1049,15 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push AMD64
        id: build
@@ -879,6 +1092,7 @@ jobs:
      - volume=40gb
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    outputs:
      digest: ${{ steps.build.outputs.digest }}
    env:
@@ -891,6 +1105,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -900,15 +1128,15 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Build and push ARM64
        id: build
@@ -944,19 +1172,34 @@ jobs:
      - run-id=${{ github.run_id }}-merge-model-server
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          username: ${{ env.DOCKER_USERNAME }}
+          password: ${{ env.DOCKER_TOKEN }}

      - name: Docker meta
        id: meta
@@ -994,11 +1237,26 @@ jobs:
      - run-id=${{ github.run_id }}-trivy-scan-web
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
@@ -1014,8 +1272,8 @@ jobs:
            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              -e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
              image \
              --skip-version-check \
@@ -1034,11 +1292,26 @@ jobs:
      - run-id=${{ github.run_id }}-trivy-scan-web-cloud
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
@@ -1054,8 +1327,8 @@ jobs:
            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              -e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
              image \
              --skip-version-check \
@@ -1074,6 +1347,7 @@ jobs:
      - run-id=${{ github.run_id }}-trivy-scan-backend
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
    steps:
@@ -1084,6 +1358,20 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
@@ -1100,8 +1388,8 @@ jobs:
              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              -e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
              image \
              --skip-version-check \
@@ -1121,11 +1409,26 @@ jobs:
      - run-id=${{ github.run_id }}-trivy-scan-model-server
      - extras=ecr-cache
    timeout-minutes: 90
+    environment: release
    env:
      REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            DOCKER_USERNAME, deploy/docker-username
+            DOCKER_TOKEN, deploy/docker-token
+          parse-json-secrets: true
+
      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
@@ -1141,8 +1444,8 @@ jobs:
            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              -e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
              image \
              --skip-version-check \
@@ -1170,12 +1473,26 @@ jobs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 90
+    environment: release
    steps:
      - name: Checkout
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
+          parse-json-secrets: true
+
      - name: Determine failed jobs
        id: failed-jobs
        shell: bash
@@ -1241,7 +1558,7 @@ jobs:
      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
-          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
          title: "🚨 Deployment Workflow Failed"
          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -29,6 +29,7 @@ jobs:
        run: |
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
+          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -94,7 +94,7 @@ jobs:

    steps:
    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -45,6 +45,9 @@ env:
  # TODO: debug why this is failing and enable
  CODE_INTERPRETER_BASE_URL: http://localhost:8000

+  # OpenSearch
+  OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
+
 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -125,11 +128,13 @@ jobs:
          docker compose \
            -f docker-compose.yml \
            -f docker-compose.dev.yml \
+            -f docker-compose.opensearch.yml \
            up -d \
            minio \
            relational_db \
            cache \
            index \
+            opensearch \
            code-interpreter

      - name: Run migrations
@@ -158,7 +163,7 @@ jobs:
          cd deployment/docker_compose

          # Get list of running containers
-          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
+          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)

          # Collect logs from each container
          for container in $containers; do
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -88,6 +88,7 @@ jobs:
          echo "=== Adding Helm repositories ==="
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
+          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
@@ -180,6 +181,11 @@ jobs:
          trap cleanup EXIT

          # Run the actual installation with detailed logging
+          # Note that opensearch.enabled is true whereas others in this install
+          # are false. There is some work that needs to be done to get this
+          # entire step working in CI, enabling opensearch here is a small step
+          # in that direction. If this is causing issues, disabling it in this
+          # step should be ok in the short term.
          echo "=== Starting ct install ==="
          set +e
          ct install --all \
@@ -187,6 +193,8 @@ jobs:
              --set=nginx.enabled=false \
              --set=minio.enabled=false \
              --set=vespa.enabled=false \
+              --set=opensearch.enabled=true \
+              --set=auth.opensearch.enabled=true \
              --set=slackbot.enabled=false \
              --set=postgresql.enabled=true \
              --set=postgresql.nameOverride=cloudnative-pg \
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -103,7 +103,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
          persist-credentials: false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -95,7 +95,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -85,7 +85,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -70,7 +70,7 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f

      - name: Build and load
        uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 # editors
 .vscode
+!/.vscode/env_template.txt
+!/.vscode/launch.json
+!/.vscode/tasks.template.jsonc
 .zed
 .cursor

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -74,6 +74,13 @@ repos:
      #   pass_filenames: true
      #   files: ^backend/.*\.py$

+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
+    hooks:
+      - id: check-added-large-files
+        name: Check for added large files
+        args: ["--maxkb=1500"]
+
  - repo: https://github.com/rhysd/actionlint
    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -1,5 +1,3 @@
-/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
-
 {
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
@@ -24,7 +22,7 @@
        "Slack Bot",
        "Celery primary",
        "Celery light",
-        "Celery background",
+        "Celery heavy",
        "Celery docfetching",
        "Celery docprocessing",
        "Celery beat"
@@ -579,6 +577,99 @@
        "group": "3"
      }
    },
+    {
+      // Dummy entry used to label the group
+      "name": "--- Database ---",
+      "type": "node",
+      "request": "launch",
+      "presentation": {
+        "group": "4",
+        "order": 0
+      }
+    },
+    {
+      "name": "Clean restore seeded database dump (destructive)",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "uv",
+      "runtimeArgs": [
+        "run",
+        "--with",
+        "onyx-devtools",
+        "ods",
+        "db",
+        "restore",
+        "--fetch-seeded",
+        "--clean",
+        "--yes"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "4"
+      }
+    },
+    {
+      "name": "Create database snapshot",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "uv",
+      "runtimeArgs": [
+        "run",
+        "--with",
+        "onyx-devtools",
+        "ods",
+        "db",
+        "dump",
+        "backup.dump"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "4"
+      }
+    },
+    {
+      "name": "Clean restore database snapshot (destructive)",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "uv",
+      "runtimeArgs": [
+        "run",
+        "--with",
+        "onyx-devtools",
+        "ods",
+        "db",
+        "restore",
+        "--clean",
+        "--yes",
+        "backup.dump"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "4"
+      }
+    },
+    {
+      "name": "Upgrade database to head revision",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "uv",
+      "runtimeArgs": [
+        "run",
+        "--with",
+        "onyx-devtools",
+        "ods",
+        "db",
+        "upgrade"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "4"
+      }
+    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
--- a/backend/.trivyignore
+++ b/backend/.trivyignore
@@ -37,10 +37,6 @@ CVE-2023-50868
 CVE-2023-52425
 CVE-2024-28757

-# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
-# No impact in our settings
-CVE-2023-7104
-
 # libharfbuzz0b, O(n^2) growth, worst case is denial of service
 # Accept the risk
 CVE-2023-25193
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -89,12 +89,6 @@ RUN uv pip install --system --no-cache-dir --upgrade \
 RUN python -c "from tokenizers import Tokenizer; \
 Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"

-# Pre-downloading NLTK for setups with limited egress
-RUN python -c "import nltk; \
-nltk.download('stopwords', quiet=True); \
-nltk.download('punkt_tab', quiet=True);"
-# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
-
 # Pre-downloading tiktoken for setups with limited egress
 RUN python -c "import tiktoken; \
 tiktoken.get_encoding('cl100k_base')"
--- a/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
+++ b/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
@@ -0,0 +1,42 @@
+"""add_unique_constraint_to_inputprompt_prompt_user_id
+
+Revision ID: 2c2430828bdf
+Revises: fb80bdd256de
+Create Date: 2026-01-20 16:01:54.314805
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "2c2430828bdf"
+down_revision = "fb80bdd256de"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create unique constraint on (prompt, user_id) for user-owned prompts
+    # This ensures each user can only have one shortcut with a given name
+    op.create_unique_constraint(
+        "uq_inputprompt_prompt_user_id",
+        "inputprompt",
+        ["prompt", "user_id"],
+    )
+
+    # Create partial unique index for public prompts (where user_id IS NULL)
+    # PostgreSQL unique constraints don't enforce uniqueness for NULL values,
+    # so we need a partial index to ensure public prompt names are also unique
+    op.execute(
+        """
+        CREATE UNIQUE INDEX uq_inputprompt_prompt_public
+        ON inputprompt (prompt)
+        WHERE user_id IS NULL
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
+    op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")
--- a/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
+++ b/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
@@ -0,0 +1,29 @@
+"""remove default prompt shortcuts
+
+Revision ID: 41fa44bef321
+Revises: 2c2430828bdf
+Create Date: 2025-01-21
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "41fa44bef321"
+down_revision = "2c2430828bdf"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Delete any user associations for the default prompts first (foreign key constraint)
+    op.execute(
+        "DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
+    )
+    # Delete the pre-seeded default prompt shortcuts (they have negative IDs)
+    op.execute("DELETE FROM inputprompt WHERE id < 0")
+
+
+def downgrade() -> None:
+    # We don't restore the default prompts on downgrade
+    pass
--- a/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
+++ b/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
@@ -0,0 +1,31 @@
+"""add chat_background to user
+
+Revision ID: fb80bdd256de
+Revises: 8b5ce697290e
+Create Date: 2026-01-16 16:15:59.222617
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "fb80bdd256de"
+down_revision = "8b5ce697290e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "chat_background",
+            sa.String(),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "chat_background")
--- a/backend/ee/onyx/search/process_search_query.py
+++ b/backend/ee/onyx/search/process_search_query.py
@@ -17,7 +17,8 @@ from onyx.context.search.models import InferenceChunk
 from onyx.context.search.pipeline import merge_individual_chunks
 from onyx.context.search.pipeline import search_pipeline
 from onyx.db.models import User
-from onyx.document_index.factory import get_current_primary_default_document_index
+from onyx.db.search_settings import get_current_search_settings
+from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.llm.factory import get_default_llm
 from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
@@ -42,11 +43,13 @@ def _run_single_search(
    document_index: DocumentIndex,
    user: User | None,
    db_session: Session,
+    num_hits: int | None = None,
 ) -> list[InferenceChunk]:
    """Execute a single search query and return chunks."""
    chunk_search_request = ChunkSearchRequest(
        query=query,
        user_selected_filters=filters,
+        limit=num_hits,
    )

    return search_pipeline(
@@ -72,7 +75,9 @@ def stream_search_query(
    Used by both streaming and non-streaming endpoints.
    """
    # Get document index
-    document_index = get_current_primary_default_document_index(db_session)
+    search_settings = get_current_search_settings(db_session)
+    # This flow is for search so we do not get all indices.
+    document_index = get_default_document_index(search_settings, None)

    # Determine queries to execute
    original_query = request.search_query
@@ -114,6 +119,7 @@ def stream_search_query(
            document_index=document_index,
            user=user,
            db_session=db_session,
+            num_hits=request.num_hits,
        )
    else:
        # Multiple queries - run in parallel and merge with RRF
@@ -121,7 +127,14 @@ def stream_search_query(
        search_functions = [
            (
                _run_single_search,
-                (query, request.filters, document_index, user, db_session),
+                (
+                    query,
+                    request.filters,
+                    document_index,
+                    user,
+                    db_session,
+                    request.num_hits,
+                ),
            )
            for query in all_executed_queries
        ]
@@ -168,6 +181,9 @@ def stream_search_query(
    # Merge chunks into sections
    sections = merge_individual_chunks(chunks)

+    # Truncate to the requested number of hits
+    sections = sections[: request.num_hits]
+
    # Apply LLM document selection if requested
    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -10,6 +10,8 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    ("/enterprise-settings/logo", {"GET"}),
    ("/enterprise-settings/logotype", {"GET"}),
    ("/enterprise-settings/custom-analytics-script", {"GET"}),
+    # Stripe publishable key is safe to expose publicly
+    ("/tenants/stripe-publishable-key", {"GET"}),
 ]


--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -32,6 +32,7 @@ class SendSearchQueryRequest(BaseModel):
    filters: BaseFilters | None = None
    num_docs_fed_to_llm_selection: int | None = None
    run_query_expansion: bool = False
+    num_hits: int = 50

    include_content: bool = False
    stream: bool = False
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -1,3 +1,6 @@
+import asyncio
+
+import httpx
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
@@ -12,11 +15,14 @@ from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
 from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
 from ee.onyx.server.tenants.models import ProductGatingResponse
+from ee.onyx.server.tenants.models import StripePublishableKeyResponse
 from ee.onyx.server.tenants.models import SubscriptionSessionResponse
 from ee.onyx.server.tenants.models import SubscriptionStatusResponse
 from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
 from ee.onyx.server.tenants.product_gating import store_product_gating
 from onyx.auth.users import User
+from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
+from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
@@ -26,6 +32,10 @@ logger = setup_logger()

 router = APIRouter(prefix="/tenants")

+# Cache for Stripe publishable key to avoid hitting S3 on every request
+_stripe_publishable_key_cache: str | None = None
+_stripe_key_lock = asyncio.Lock()
+

@router.post("/product-gating")
 def gate_product(
@@ -113,3 +123,67 @@ async def create_subscription_session(
    except Exception as e:
        logger.exception("Failed to create subscription session")
        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/stripe-publishable-key")
+async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
+    """
+    Fetch the Stripe publishable key.
+    Priority: env var override (for testing) > S3 bucket (production).
+    This endpoint is public (no auth required) since publishable keys are safe to expose.
+    The key is cached in memory to avoid hitting S3 on every request.
+    """
+    global _stripe_publishable_key_cache
+
+    # Fast path: return cached value without lock
+    if _stripe_publishable_key_cache:
+        return StripePublishableKeyResponse(
+            publishable_key=_stripe_publishable_key_cache
+        )
+
+    # Use lock to prevent concurrent S3 requests
+    async with _stripe_key_lock:
+        # Double-check after acquiring lock (another request may have populated cache)
+        if _stripe_publishable_key_cache:
+            return StripePublishableKeyResponse(
+                publishable_key=_stripe_publishable_key_cache
+            )
+
+        # Check for env var override first (for local testing with pk_test_* keys)
+        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
+            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
+            if not key.startswith("pk_"):
+                raise HTTPException(
+                    status_code=500,
+                    detail="Invalid Stripe publishable key format",
+                )
+            _stripe_publishable_key_cache = key
+            return StripePublishableKeyResponse(publishable_key=key)
+
+        # Fall back to S3 bucket
+        if not STRIPE_PUBLISHABLE_KEY_URL:
+            raise HTTPException(
+                status_code=500,
+                detail="Stripe publishable key is not configured",
+            )
+
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
+                response.raise_for_status()
+                key = response.text.strip()
+
+                # Validate key format
+                if not key.startswith("pk_"):
+                    raise HTTPException(
+                        status_code=500,
+                        detail="Invalid Stripe publishable key format",
+                    )
+
+                _stripe_publishable_key_cache = key
+                return StripePublishableKeyResponse(publishable_key=key)
+        except httpx.HTTPError:
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to fetch Stripe publishable key",
+            )
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -105,3 +105,7 @@ class PendingUserSnapshot(BaseModel):

 class ApproveUserRequest(BaseModel):
    email: str
+
+
+class StripePublishableKeyResponse(BaseModel):
+    publishable_key: str
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -11,6 +11,7 @@ from typing import Any
 from typing import cast
 from typing import Dict
 from typing import List
+from typing import Literal
 from typing import Optional
 from typing import Protocol
 from typing import Tuple
@@ -1456,6 +1457,9 @@ def get_default_admin_user_emails_() -> list[str]:


 STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
+STATE_TOKEN_LIFETIME_SECONDS = 3600
+CSRF_TOKEN_KEY = "csrftoken"
+CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"


 class OAuth2AuthorizeResponse(BaseModel):
@@ -1463,13 +1467,19 @@ class OAuth2AuthorizeResponse(BaseModel):


 def generate_state_token(
-    data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
+    data: Dict[str, str],
+    secret: SecretType,
+    lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
 ) -> str:
    data["aud"] = STATE_TOKEN_AUDIENCE

    return generate_jwt(data, secret, lifetime_seconds)


+def generate_csrf_token() -> str:
+    return secrets.token_urlsafe(32)
+
+
 # refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
 def create_onyx_oauth_router(
    oauth_client: BaseOAuth2,
@@ -1498,6 +1508,13 @@ def get_oauth_router(
    redirect_url: Optional[str] = None,
    associate_by_email: bool = False,
    is_verified_by_default: bool = False,
+    *,
+    csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
+    csrf_token_cookie_path: str = "/",
+    csrf_token_cookie_domain: Optional[str] = None,
+    csrf_token_cookie_secure: Optional[bool] = None,
+    csrf_token_cookie_httponly: bool = True,
+    csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
 ) -> APIRouter:
    """Generate a router with the OAuth routes."""
    router = APIRouter()
@@ -1514,6 +1531,9 @@ def get_oauth_router(
            route_name=callback_route_name,
        )

+    if csrf_token_cookie_secure is None:
+        csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
+
    @router.get(
        "/authorize",
        name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1521,8 +1541,10 @@ def get_oauth_router(
    )
    async def authorize(
        request: Request,
+        response: Response,
+        redirect: bool = Query(False),
        scopes: List[str] = Query(None),
-    ) -> OAuth2AuthorizeResponse:
+    ) -> Response | OAuth2AuthorizeResponse:
        referral_source = request.cookies.get("referral_source", None)

        if redirect_url is not None:
@@ -1532,9 +1554,11 @@ def get_oauth_router(

        next_url = request.query_params.get("next", "/")

+        csrf_token = generate_csrf_token()
        state_data: Dict[str, str] = {
            "next_url": next_url,
            "referral_source": referral_source or "default_referral",
+            CSRF_TOKEN_KEY: csrf_token,
        }
        state = generate_state_token(state_data, state_secret)

@@ -1551,6 +1575,31 @@ def get_oauth_router(
                authorization_url, {"access_type": "offline", "prompt": "consent"}
            )

+        if redirect:
+            redirect_response = RedirectResponse(authorization_url, status_code=302)
+            redirect_response.set_cookie(
+                key=csrf_token_cookie_name,
+                value=csrf_token,
+                max_age=STATE_TOKEN_LIFETIME_SECONDS,
+                path=csrf_token_cookie_path,
+                domain=csrf_token_cookie_domain,
+                secure=csrf_token_cookie_secure,
+                httponly=csrf_token_cookie_httponly,
+                samesite=csrf_token_cookie_samesite,
+            )
+            return redirect_response
+
+        response.set_cookie(
+            key=csrf_token_cookie_name,
+            value=csrf_token,
+            max_age=STATE_TOKEN_LIFETIME_SECONDS,
+            path=csrf_token_cookie_path,
+            domain=csrf_token_cookie_domain,
+            secure=csrf_token_cookie_secure,
+            httponly=csrf_token_cookie_httponly,
+            samesite=csrf_token_cookie_samesite,
+        )
+
        return OAuth2AuthorizeResponse(authorization_url=authorization_url)

    @log_function_time(print_only=True)
@@ -1600,7 +1649,33 @@ def get_oauth_router(
        try:
            state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
        except jwt.DecodeError:
-            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=getattr(
+                    ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
+                ),
+            )
+        except jwt.ExpiredSignatureError:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=getattr(
+                    ErrorCode,
+                    "ACCESS_TOKEN_ALREADY_EXPIRED",
+                    "ACCESS_TOKEN_ALREADY_EXPIRED",
+                ),
+            )
+
+        cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
+        state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
+        if (
+            not cookie_csrf_token
+            or not state_csrf_token
+            or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
+        ):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
+            )

        next_url = state_data.get("next_url", "/")
        referral_source = state_data.get("referral_source", None)
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -26,10 +26,13 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
 from onyx.background.celery.celery_utils import make_probe_path
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
-from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
 from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_sqlalchemy_engine
+from onyx.document_index.opensearch.client import (
+    wait_for_opensearch_with_timeout,
+)
 from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_connector import RedisConnector
@@ -516,15 +519,17 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""

-    if ENABLE_OPENSEARCH_FOR_ONYX:
-        # TODO(andrei): Do some similar liveness checking for OpenSearch.
-        return
-
    if not wait_for_vespa_with_timeout():
-        msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
+        msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

+    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
+        if not wait_for_opensearch_with_timeout():
+            msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
+            logger.error(msg)
+            raise WorkerShutdown(msg)
+

 # File for validating worker liveness
 class LivenessProbe(bootsteps.StartStopStep):
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
                callback=callback,
            )

-            document_index = get_default_document_index(
+            document_indices = get_all_document_indices(
                index_attempt.search_settings,
                None,
                httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
-                document_index=document_index,
+                document_indices=document_indices,
                ignore_time_skip=True,  # Documents are already filtered during extraction
                db_session=db_session,
                tenant_id=tenant_id,
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.relationships import delete_document_references_from_kg
 from onyx.db.search_settings import get_active_search_settings
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_pool import get_redis_client
@@ -97,13 +97,17 @@ def document_by_cc_pair_cleanup_task(
            action = "skip"

            active_search_settings = get_active_search_settings(db_session)
-            doc_index = get_default_document_index(
+            # This flow is for updates and deletion so we get all indices.
+            document_indices = get_all_document_indices(
                active_search_settings.primary,
                active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

-            retry_index = RetryDocumentIndex(doc_index)
+            retry_document_indices: list[RetryDocumentIndex] = [
+                RetryDocumentIndex(document_index)
+                for document_index in document_indices
+            ]

            count = get_document_connector_count(db_session, document_id)
            if count == 1:
@@ -113,11 +117,12 @@ def document_by_cc_pair_cleanup_task(

                chunk_count = fetch_chunk_count_for_document(document_id, db_session)

-                _ = retry_index.delete_single(
-                    document_id,
-                    tenant_id=tenant_id,
-                    chunk_count=chunk_count,
-                )
+                for retry_document_index in retry_document_indices:
+                    _ = retry_document_index.delete_single(
+                        document_id,
+                        tenant_id=tenant_id,
+                        chunk_count=chunk_count,
+                    )

                delete_document_references_from_kg(
                    db_session=db_session,
@@ -155,14 +160,18 @@ def document_by_cc_pair_cleanup_task(
                    hidden=doc.hidden,
                )

-                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                retry_index.update_single(
-                    document_id,
-                    tenant_id=tenant_id,
-                    chunk_count=doc.chunk_count,
-                    fields=fields,
-                    user_fields=None,
-                )
+                for retry_document_index in retry_document_indices:
+                    # TODO(andrei): Previously there was a comment here saying
+                    # it was ok if a doc did not exist in the document index. I
+                    # don't agree with that claim, so keep an eye on this task
+                    # to see if this raises.
+                    retry_document_index.update_single(
+                        document_id,
+                        tenant_id=tenant_id,
+                        chunk_count=doc.chunk_count,
+                        fields=fields,
+                        user_fields=None,
+                    )

                # there are still other cc_pair references to the doc, so just resync to Vespa
                delete_document_by_connector_credential_pair__no_commit(
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -32,7 +32,7 @@ from onyx.db.enums import UserFileStatus
 from onyx.db.models import UserFile
 from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_active_search_settings_list
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.interfaces import VespaDocumentUserFields
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.file_store.file_store import get_default_file_store
@@ -244,7 +244,8 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
                    search_settings=current_search_settings,
                )

-                document_index = get_default_document_index(
+                # This flow is for indexing so we get all indices.
+                document_indices = get_all_document_indices(
                    current_search_settings,
                    None,
                    httpx_client=HttpxPool.get("vespa"),
@@ -258,7 +259,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
                # real work happens here!
                index_pipeline_result = run_indexing_pipeline(
                    embedder=embedding_model,
-                    document_index=document_index,
+                    document_indices=document_indices,
                    ignore_time_skip=True,
                    db_session=db_session,
                    tenant_id=tenant_id,
@@ -412,12 +413,16 @@ def process_single_user_file_delete(
                httpx_init_vespa_pool(20)

            active_search_settings = get_active_search_settings(db_session)
-            document_index = get_default_document_index(
+            # This flow is for deletion so we get all indices.
+            document_indices = get_all_document_indices(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )
-            retry_index = RetryDocumentIndex(document_index)
+            retry_document_indices: list[RetryDocumentIndex] = [
+                RetryDocumentIndex(document_index)
+                for document_index in document_indices
+            ]
            index_name = active_search_settings.primary.index_name
            selection = f"{index_name}.document_id=='{user_file_id}'"

@@ -438,11 +443,12 @@ def process_single_user_file_delete(
            else:
                chunk_count = user_file.chunk_count

-            retry_index.delete_single(
-                doc_id=user_file_id,
-                tenant_id=tenant_id,
-                chunk_count=chunk_count,
-            )
+            for retry_document_index in retry_document_indices:
+                retry_document_index.delete_single(
+                    doc_id=user_file_id,
+                    tenant_id=tenant_id,
+                    chunk_count=chunk_count,
+                )

            # 2) Delete the user-uploaded file content from filestore (blob + metadata)
            file_store = get_default_file_store()
@@ -564,12 +570,16 @@ def process_single_user_file_project_sync(
                httpx_init_vespa_pool(20)

            active_search_settings = get_active_search_settings(db_session)
-            doc_index = get_default_document_index(
+            # This flow is for updates so we get all indices.
+            document_indices = get_all_document_indices(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )
-            retry_index = RetryDocumentIndex(doc_index)
+            retry_document_indices: list[RetryDocumentIndex] = [
+                RetryDocumentIndex(document_index)
+                for document_index in document_indices
+            ]

            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
            if not user_file:
@@ -579,13 +589,14 @@ def process_single_user_file_project_sync(
                return None

            project_ids = [project.id for project in user_file.projects]
-            retry_index.update_single(
-                doc_id=str(user_file.id),
-                tenant_id=tenant_id,
-                chunk_count=user_file.chunk_count,
-                fields=None,
-                user_fields=VespaDocumentUserFields(user_projects=project_ids),
-            )
+            for retry_document_index in retry_document_indices:
+                retry_document_index.update_single(
+                    doc_id=str(user_file.id),
+                    tenant_id=tenant_id,
+                    chunk_count=user_file.chunk_count,
+                    fields=None,
+                    user_fields=VespaDocumentUserFields(user_projects=project_ids),
+                )

            task_logger.info(
                f"process_single_user_file_project_sync - User file id={user_file_id}"
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
 from onyx.db.sync_record import cleanup_sync_records
 from onyx.db.sync_record import insert_sync_record
 from onyx.db.sync_record import update_sync_record_status
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_document_set import RedisDocumentSet
@@ -70,6 +70,8 @@ logger = setup_logger()

 # celery auto associates tasks created inside another task,
 # which bloats the result metadata considerably. trail=False prevents this.
+# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
+# generic *document_index*.
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
    ignore_result=True,
@@ -465,13 +467,17 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
    try:
        with get_session_with_current_tenant() as db_session:
            active_search_settings = get_active_search_settings(db_session)
-            doc_index = get_default_document_index(
+            # This flow is for updates so we get all indices.
+            document_indices = get_all_document_indices(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

-            retry_index = RetryDocumentIndex(doc_index)
+            retry_document_indices: list[RetryDocumentIndex] = [
+                RetryDocumentIndex(document_index)
+                for document_index in document_indices
+            ]

            doc = get_document(document_id, db_session)
            if not doc:
@@ -500,14 +506,18 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
                    # aggregated_boost_factor=doc.aggregated_boost_factor,
                )

-                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                retry_index.update_single(
-                    document_id,
-                    tenant_id=tenant_id,
-                    chunk_count=doc.chunk_count,
-                    fields=fields,
-                    user_fields=None,
-                )
+                for retry_document_index in retry_document_indices:
+                    # TODO(andrei): Previously there was a comment here saying
+                    # it was ok if a doc did not exist in the document index. I
+                    # don't agree with that claim, so keep an eye on this task
+                    # to see if this raises.
+                    retry_document_index.update_single(
+                        document_id,
+                        tenant_id=tenant_id,
+                        chunk_count=doc.chunk_count,
+                        fields=fields,
+                        user_fields=None,
+                    )

                # update db last. Worst case = we crash right before this and
                # the sync might repeat again later
--- a/backend/onyx/chat/chat_state.py
+++ b/backend/onyx/chat/chat_state.py
@@ -7,6 +7,7 @@ from typing import Any

 from onyx.chat.citation_processor import CitationMapping
 from onyx.chat.emitter import Emitter
+from onyx.context.search.models import SearchDoc
 from onyx.server.query_and_chat.placement import Placement
 from onyx.server.query_and_chat.streaming_models import OverallStop
 from onyx.server.query_and_chat.streaming_models import Packet
@@ -15,6 +16,11 @@ from onyx.tools.models import ToolCallInfo
 from onyx.utils.threadpool_concurrency import run_in_background
 from onyx.utils.threadpool_concurrency import wait_on_background

+# Type alias for search doc deduplication key
+# Simple key: just document_id (str)
+# Full key: (document_id, chunk_ind, match_highlights)
+SearchDocKey = str | tuple[str, int, tuple[str, ...]]
+

 class ChatStateContainer:
    """Container for accumulating state during LLM loop execution.
@@ -40,6 +46,10 @@ class ChatStateContainer:
        # True if this turn is a clarification question (deep research flow)
        self.is_clarification: bool = False
        # Note: LLM cost tracking is now handled in multi_llm.py
+        # Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
+        self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
+        # Track which citation numbers were actually emitted during streaming
+        self._emitted_citations: set[int] = set()

    def add_tool_call(self, tool_call: ToolCallInfo) -> None:
        """Add a tool call to the accumulated state."""
@@ -91,6 +101,54 @@ class ChatStateContainer:
        with self._lock:
            return self.is_clarification

+    @staticmethod
+    def create_search_doc_key(
+        search_doc: SearchDoc, use_simple_key: bool = True
+    ) -> SearchDocKey:
+        """Create a unique key for a SearchDoc for deduplication.
+
+        Args:
+            search_doc: The SearchDoc to create a key for
+            use_simple_key: If True (default), use only document_id for deduplication.
+                If False, include chunk_ind and match_highlights so that the same
+                document/chunk with different highlights are stored separately.
+        """
+        if use_simple_key:
+            return search_doc.document_id
+        match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
+        return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
+
+    def add_search_docs(
+        self, search_docs: list[SearchDoc], use_simple_key: bool = True
+    ) -> None:
+        """Add search docs to the accumulated collection with deduplication.
+
+        Args:
+            search_docs: List of SearchDoc objects to add
+            use_simple_key: If True (default), deduplicate by document_id only.
+                If False, deduplicate by document_id + chunk_ind + match_highlights.
+        """
+        with self._lock:
+            for doc in search_docs:
+                key = self.create_search_doc_key(doc, use_simple_key)
+                if key not in self._all_search_docs:
+                    self._all_search_docs[key] = doc
+
+    def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
+        """Thread-safe getter for all accumulated search docs (returns a copy)."""
+        with self._lock:
+            return self._all_search_docs.copy()
+
+    def add_emitted_citation(self, citation_num: int) -> None:
+        """Add a citation number that was actually emitted during streaming."""
+        with self._lock:
+            self._emitted_citations.add(citation_num)
+
+    def get_emitted_citations(self) -> set[int]:
+        """Thread-safe getter for emitted citations (returns a copy)."""
+        with self._lock:
+            return self._emitted_citations.copy()
+

 def run_chat_loop_with_state_containers(
    func: Callable[..., None],
--- a/backend/onyx/chat/citation_utils.py
+++ b/backend/onyx/chat/citation_utils.py
@@ -53,6 +53,50 @@ def update_citation_processor_from_tool_response(
            citation_processor.update_citation_mapping(citation_to_doc)


+def extract_citation_order_from_text(text: str) -> list[int]:
+    """Extract citation numbers from text in order of first appearance.
+
+    Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
+    the citation numbers in the order they first appear in the text.
+
+    Args:
+        text: The text containing citations
+
+    Returns:
+        List of citation numbers in order of first appearance (no duplicates)
+    """
+    # Same pattern used in collapse_citations and DynamicCitationProcessor
+    # Group 2 captures the number in double bracket format: [[1]], 【【1】】
+    # Group 4 captures the numbers in single bracket format: [1], [1, 2]
+    citation_pattern = re.compile(
+        r"([\[【［]{2}(\d+)[\]】］]{2})|([\[【［]([\d]+(?: *, *\d+)*)[\]】］])"
+    )
+    seen: set[int] = set()
+    order: list[int] = []
+
+    for match in citation_pattern.finditer(text):
+        # Group 2 is for double bracket single number, group 4 is for single bracket
+        if match.group(2):
+            nums_str = match.group(2)
+        elif match.group(4):
+            nums_str = match.group(4)
+        else:
+            continue
+
+        for num_str in nums_str.split(","):
+            num_str = num_str.strip()
+            if num_str:
+                try:
+                    num = int(num_str)
+                    if num not in seen:
+                        seen.add(num)
+                        order.append(num)
+                except ValueError:
+                    continue
+
+    return order
+
+
 def collapse_citations(
    answer_text: str,
    existing_citation_mapping: CitationMapping,
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -45,6 +45,7 @@ from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
+from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
 from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
 from onyx.tools.tool_runner import run_tool_calls
 from onyx.tracing.framework.create import trace
@@ -453,12 +454,16 @@ def run_llm_loop(

            # The section below calculates the available tokens for history a bit more accurately
            # now that project files are loaded in.
-            if persona and persona.replace_base_system_prompt and persona.system_prompt:
+            if persona and persona.replace_base_system_prompt:
                # Handles the case where user has checked off the "Replace base system prompt" checkbox
-                system_prompt = ChatMessageSimple(
-                    message=persona.system_prompt,
-                    token_count=token_counter(persona.system_prompt),
-                    message_type=MessageType.SYSTEM,
+                system_prompt = (
+                    ChatMessageSimple(
+                        message=persona.system_prompt,
+                        token_count=token_counter(persona.system_prompt),
+                        message_type=MessageType.SYSTEM,
+                    )
+                    if persona.system_prompt
+                    else None
                )
                custom_agent_prompt_msg = None
            else:
@@ -612,6 +617,7 @@ def run_llm_loop(
                next_citation_num=citation_processor.get_next_citation_number(),
                max_concurrent_tools=None,
                skip_search_query_expansion=has_called_search_tool,
+                url_snippet_map=extract_url_snippet_map(gathered_documents or []),
            )
            tool_responses = parallel_tool_call_results.tool_responses
            citation_mapping = parallel_tool_call_results.updated_citation_mapping
@@ -650,8 +656,15 @@ def run_llm_loop(

                # Extract search_docs if this is a search tool response
                search_docs = None
+                displayed_docs = None
                if isinstance(tool_response.rich_response, SearchDocsResponse):
                    search_docs = tool_response.rich_response.search_docs
+                    displayed_docs = tool_response.rich_response.displayed_docs
+
+                    # Add ALL search docs to state container for DB persistence
+                    if search_docs:
+                        state_container.add_search_docs(search_docs)
+
                    if gathered_documents:
                        gathered_documents.extend(search_docs)
                    else:
@@ -685,7 +698,7 @@ def run_llm_loop(
                    reasoning_tokens=llm_step_result.reasoning,  # All tool calls from this loop share the same reasoning
                    tool_call_arguments=tool_call.tool_args,
                    tool_call_response=saved_response,
-                    search_docs=search_docs,
+                    search_docs=displayed_docs or search_docs,
                    generated_images=generated_images,
                )
                # Add to state container for partial save support
--- a/backend/onyx/chat/llm_step.py
+++ b/backend/onyx/chat/llm_step.py
@@ -14,6 +14,7 @@ from onyx.chat.emitter import Emitter
 from onyx.chat.models import ChatMessageSimple
 from onyx.chat.models import LlmStepResult
 from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
+from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDoc
 from onyx.file_store.models import ChatFileType
@@ -432,7 +433,7 @@ def translate_history_to_llm_format(

    for idx, msg in enumerate(history):
        # if the message is being added to the history
-        if msg.message_type in [
+        if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
            MessageType.SYSTEM,
            MessageType.USER,
            MessageType.ASSISTANT,
@@ -859,6 +860,11 @@ def run_llm_step_pkt_generator(
                                    ),
                                    obj=result,
                                )
+                                # Track emitted citation for saving
+                                if state_container:
+                                    state_container.add_emitted_citation(
+                                        result.citation_number
+                                    )
                    else:
                        # When citation_processor is None, use delta.content directly without modification
                        accumulated_answer += delta.content
@@ -985,6 +991,9 @@ def run_llm_step_pkt_generator(
                    ),
                    obj=result,
                )
+                # Track emitted citation for saving
+                if state_container:
+                    state_container.add_emitted_citation(result.citation_number)

    # Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
    # Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -42,7 +42,6 @@ from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import MilestoneRecordType
 from onyx.context.search.models import BaseFilters
-from onyx.context.search.models import CitationDocInfo
 from onyx.context.search.models import SearchDoc
 from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_chat_session_by_id
@@ -744,27 +743,16 @@ def llm_loop_completion_handle(
        else:
            final_answer = "The generation was stopped by the user."

-    # Build citation_docs_info from accumulated citations in state container
-    citation_docs_info: list[CitationDocInfo] = []
-    seen_citation_nums: set[int] = set()
-    for citation_num, search_doc in state_container.citation_to_doc.items():
-        if citation_num not in seen_citation_nums:
-            seen_citation_nums.add(citation_num)
-            citation_docs_info.append(
-                CitationDocInfo(
-                    search_doc=search_doc,
-                    citation_number=citation_num,
-                )
-            )
-
    save_chat_turn(
        message_text=final_answer,
        reasoning_tokens=state_container.reasoning_tokens,
-        citation_docs_info=citation_docs_info,
+        citation_to_doc=state_container.citation_to_doc,
        tool_calls=state_container.tool_calls,
+        all_search_docs=state_container.get_all_search_docs(),
        db_session=db_session,
        assistant_message=assistant_message,
        is_clarification=state_container.is_clarification,
+        emitted_citations=state_container.get_emitted_citations(),
    )


--- a/backend/onyx/chat/save_chat.py
+++ b/backend/onyx/chat/save_chat.py
@@ -2,8 +2,9 @@ import json

 from sqlalchemy.orm import Session

+from onyx.chat.chat_state import ChatStateContainer
+from onyx.chat.chat_state import SearchDocKey
 from onyx.configs.constants import DocumentSource
-from onyx.context.search.models import CitationDocInfo
 from onyx.context.search.models import SearchDoc
 from onyx.db.chat import add_search_docs_to_chat_message
 from onyx.db.chat import add_search_docs_to_tool_call
@@ -19,22 +20,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
-    """
-    Create a unique key for a SearchDoc that accounts for different versions of the same
-    document/chunk with different match_highlights.
-
-    Args:
-        search_doc: The SearchDoc pydantic model to create a key for
-
-    Returns:
-        A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
-        this specific version of the document
-    """
-    match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
-    return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
-
-
 def _create_and_link_tool_calls(
    tool_calls: list[ToolCallInfo],
    assistant_message: ChatMessage,
@@ -154,38 +139,36 @@ def save_chat_turn(
    message_text: str,
    reasoning_tokens: str | None,
    tool_calls: list[ToolCallInfo],
-    citation_docs_info: list[CitationDocInfo],
+    citation_to_doc: dict[int, SearchDoc],
+    all_search_docs: dict[SearchDocKey, SearchDoc],
    db_session: Session,
    assistant_message: ChatMessage,
    is_clarification: bool = False,
+    emitted_citations: set[int] | None = None,
 ) -> None:
    """
    Save a chat turn by populating the assistant_message and creating related entities.

    This function:
    1. Updates the ChatMessage with text, reasoning tokens, and token count
-    2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
-    3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
-    4. Builds citation mapping from citation_docs_info
-    5. Links all unique SearchDocs from tool calls to the ChatMessage
+    2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
+    3. Builds tool_call -> search_doc mapping for displayed docs
+    4. Builds citation mapping from citation_to_doc
+    5. Links all unique SearchDocs to the ChatMessage
    6. Creates ToolCall entries and links SearchDocs to them
    7. Builds the citations mapping for the ChatMessage

-    Deduplication Logic:
-    - SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
-    - This ensures that the same document/chunk with different match_highlights (from different
-      queries) are stored as separate SearchDoc entries
-    - Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
-      matches its specific query highlights
-
    Args:
        message_text: The message content to save
        reasoning_tokens: Optional reasoning tokens for the message
        tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
-        citation_docs_info: List of citation document information for building citations mapping
+        citation_to_doc: Mapping from citation number to SearchDoc for building citations
+        all_search_docs: Pre-deduplicated search docs from ChatStateContainer
        db_session: Database session for persistence
        assistant_message: The ChatMessage object to populate (should already exist in DB)
        is_clarification: Whether this assistant message is a clarification question (deep research flow)
+        emitted_citations: Set of citation numbers that were actually emitted during streaming.
+            If provided, only citations in this set will be saved; others are filtered out.
    """
    # 1. Update ChatMessage with message content, reasoning tokens, and token count
    assistant_message.message = message_text
@@ -200,53 +183,53 @@ def save_chat_turn(
    else:
        assistant_message.token_count = 0

-    # 2. Create SearchDoc entries from tool_calls
-    # Build mapping from SearchDoc to DB SearchDoc ID
-    # Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
-    # while ensuring different versions with different highlights are stored separately
-    search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
-    tool_call_to_search_doc_ids: dict[str, list[int]] = {}
+    # 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
+    search_doc_key_to_id: dict[SearchDocKey, int] = {}
+    for key, search_doc_py in all_search_docs.items():
+        db_search_doc = create_db_search_doc(
+            server_search_doc=search_doc_py,
+            db_session=db_session,
+            commit=False,
+        )
+        search_doc_key_to_id[key] = db_search_doc.id

-    # Process tool calls and their search docs
+    # 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
+    tool_call_to_search_doc_ids: dict[str, list[int]] = {}
    for tool_call_info in tool_calls:
        if tool_call_info.search_docs:
            search_doc_ids_for_tool: list[int] = []
            for search_doc_py in tool_call_info.search_docs:
-                # Create a unique key for this SearchDoc version
-                search_doc_key = _create_search_doc_key(search_doc_py)
-
-                # Check if we've already created this exact SearchDoc version
-                if search_doc_key in search_doc_key_to_id:
-                    search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
+                key = ChatStateContainer.create_search_doc_key(search_doc_py)
+                if key in search_doc_key_to_id:
+                    search_doc_ids_for_tool.append(search_doc_key_to_id[key])
                else:
-                    # Create new DB SearchDoc entry
+                    # Displayed doc not in all_search_docs - create it
+                    # This can happen if displayed_docs contains docs not in search_docs
                    db_search_doc = create_db_search_doc(
                        server_search_doc=search_doc_py,
                        db_session=db_session,
                        commit=False,
                    )
-                    search_doc_key_to_id[search_doc_key] = db_search_doc.id
+                    search_doc_key_to_id[key] = db_search_doc.id
                    search_doc_ids_for_tool.append(db_search_doc.id)
-
            tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
                set(search_doc_ids_for_tool)
            )

-    # 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
-    # Use a set to deduplicate by ID (since we've already deduplicated by key above)
-    all_search_doc_ids_set: set[int] = set()
-    for search_doc_ids in tool_call_to_search_doc_ids.values():
-        all_search_doc_ids_set.update(search_doc_ids)
+    # Collect all search doc IDs for ChatMessage linking
+    all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())

-    # 4. Build citation mapping from citation_docs_info
+    # 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
+    # Only include citations that were actually emitted during streaming
    citation_number_to_search_doc_id: dict[int, int] = {}

-    for citation_doc_info in citation_docs_info:
-        # Extract SearchDoc pydantic model
-        search_doc_py = citation_doc_info.search_doc
+    for citation_num, search_doc_py in citation_to_doc.items():
+        # Skip citations that weren't actually emitted (if emitted_citations is provided)
+        if emitted_citations is not None and citation_num not in emitted_citations:
+            continue

        # Create the unique key for this SearchDoc version
-        search_doc_key = _create_search_doc_key(search_doc_py)
+        search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)

        # Get the search doc ID (should already exist from processing tool_calls)
        if search_doc_key in search_doc_key_to_id:
@@ -283,10 +266,7 @@ def save_chat_turn(
                all_search_doc_ids_set.add(db_search_doc_id)

        # Build mapping from citation number to search doc ID
-        if citation_doc_info.citation_number is not None:
-            citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
-                db_search_doc_id
-            )
+        citation_number_to_search_doc_id[citation_num] = db_search_doc_id

    # 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
    final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
@@ -306,23 +286,10 @@ def save_chat_turn(
        tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
    )

-    # 7. Build citations mapping from citation_docs_info
-    # Any citation_doc_info with a citation_number appeared in the text and should be mapped
-    citations: dict[int, int] = {}
-    for citation_doc_info in citation_docs_info:
-        if citation_doc_info.citation_number is not None:
-            search_doc_id = citation_number_to_search_doc_id.get(
-                citation_doc_info.citation_number
-            )
-            if search_doc_id is not None:
-                citations[citation_doc_info.citation_number] = search_doc_id
-            else:
-                logger.warning(
-                    f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
-                    f"but no matching search doc ID in mapping"
-                )
-
-    assistant_message.citations = citations if citations else None
+    # 7. Build citations mapping - use the mapping we already built in step 4
+    assistant_message.citations = (
+        citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
+    )

    # Finally save the messages, tool calls, and docs
    db_session.commit()
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -208,8 +208,19 @@ OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 920
 OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
 OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")

-ENABLE_OPENSEARCH_FOR_ONYX = (
-    os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
+# This is the "base" config for now, the idea is that at least for our dev
+# environments we always want to be dual indexing into both OpenSearch and Vespa
+# to stress test the new codepaths. Only enable this if there is some instance
+# of OpenSearch running for the relevant Onyx instance.
+ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
+    os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
+)
+# Given that the "base" config above is true, this enables whether we want to
+# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
+# in the event we see issues with OpenSearch retrieval in our dev environments.
+ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
+    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
+    and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
 )

 VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -738,6 +749,10 @@ JOB_TIMEOUT = 60 * 60 * 6  # 6 hours default
 LOG_ONYX_MODEL_INTERACTIONS = (
    os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
 )
+
+PROMPT_CACHE_CHAT_HISTORY = (
+    os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
+)
 # If set to `true` will enable additional logs about Vespa query performance
 # (time spent on finding the right docs + time spent fetching summaries from disk)
 LOG_VESPA_TIMING_INFORMATION = (
@@ -1016,3 +1031,14 @@ INSTANCE_TYPE = (
 ## Discord Bot Configuration
 DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
 DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
+
+
+## Stripe Configuration
+# URL to fetch the Stripe publishable key from a public S3 bucket.
+# Publishable keys are safe to expose publicly - they can only initialize
+# Stripe.js and tokenize payment info, not make charges or access data.
+STRIPE_PUBLISHABLE_KEY_URL = (
+    "https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
+)
+# Override for local testing with Stripe test keys (pk_test_*)
+STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -1,6 +1,5 @@
 import os

-INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
 PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
 PERSONAS_YAML = "./onyx/seeding/personas.yaml"
 NUM_RETURNED_HITS = 50
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -15,6 +15,7 @@ from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.llm.interfaces import LLM
 from onyx.llm.models import UserMessage
 from onyx.llm.utils import llm_response_to_string
+from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
 from onyx.onyxbot.slack.models import ChannelType
 from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
 from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
@@ -113,7 +114,7 @@ def is_recency_query(query: str) -> bool:
    if not has_recency_keyword:
        return False

-    # Get combined stop words (NLTK + Slack-specific)
+    # Get combined stop words (English + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    # Extract content words (excluding stop words)
@@ -488,7 +489,7 @@ def build_channel_override_query(channel_references: set[str], time_filter: str)
    return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"


-# Slack-specific stop words (in addition to standard NLTK stop words)
+# Slack-specific stop words (in addition to standard English stop words)
 # These include Slack-specific terms and temporal/recency keywords
 SLACK_SPECIFIC_STOP_WORDS = frozenset(
    RECENCY_KEYWORDS
@@ -508,27 +509,16 @@ SLACK_SPECIFIC_STOP_WORDS = frozenset(
 )


-def _get_combined_stop_words() -> set[str]:
-    """Get combined NLTK + Slack-specific stop words.
+def _get_combined_stop_words() -> frozenset[str]:
+    """Get combined English + Slack-specific stop words.

-    Returns a set of stop words for filtering content words.
-    Falls back to just Slack-specific stop words if NLTK is unavailable.
+    Returns a frozenset of stop words for filtering content words.

    Note: Currently only supports English stop words. Non-English queries
    may have suboptimal content word extraction. Future enhancement could
    detect query language and load appropriate stop words.
    """
-    try:
-        from nltk.corpus import stopwords  # type: ignore
-
-        # TODO: Support multiple languages - currently hardcoded to English
-        # Could detect language or allow configuration
-        nltk_stop_words = set(stopwords.words("english"))
-    except Exception:
-        # Fallback if NLTK not available
-        nltk_stop_words = set()
-
-    return nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS
+    return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS


 def extract_content_words_from_recency_query(
@@ -536,7 +526,7 @@ def extract_content_words_from_recency_query(
 ) -> list[str]:
    """Extract meaningful content words from a recency query.

-    Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.
+    Filters out English stop words, Slack-specific terms, channel references, and proper nouns.

    Args:
        query_text: The user's query text
@@ -545,7 +535,7 @@ def extract_content_words_from_recency_query(
    Returns:
        List of content words (up to MAX_CONTENT_WORDS)
    """
-    # Get combined stop words (NLTK + Slack-specific)
+    # Get combined stop words (English + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    words = query_text.split()
@@ -567,6 +557,23 @@ def extract_content_words_from_recency_query(
    return content_words_filtered[:MAX_CONTENT_WORDS]


+def _is_valid_keyword_query(line: str) -> bool:
+    """Check if a line looks like a valid keyword query vs explanatory text.
+
+    Returns False for lines that appear to be LLM explanations rather than keywords.
+    """
+    # Reject lines that start with parentheses (explanatory notes)
+    if line.startswith("("):
+        return False
+
+    # Reject lines that are too long (likely sentences, not keywords)
+    # Keywords should be short - reject if > 50 chars or > 6 words
+    if len(line) > 50 or len(line.split()) > 6:
+        return False
+
+    return True
+
+
 def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
    """Use LLM to expand query into multiple search variations.

@@ -589,10 +596,18 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
        response_clean = _parse_llm_code_block_response(response)

        # Split into lines and filter out empty lines
-        rephrased_queries = [
+        raw_queries = [
            line.strip() for line in response_clean.split("\n") if line.strip()
        ]

+        # Filter out lines that look like explanatory text rather than keywords
+        rephrased_queries = [q for q in raw_queries if _is_valid_keyword_query(q)]
+
+        # Log if we filtered out garbage
+        if len(raw_queries) != len(rephrased_queries):
+            filtered_out = set(raw_queries) - set(rephrased_queries)
+            logger.warning(f"Filtered out non-keyword LLM responses: {filtered_out}")
+
        # If no queries generated, use empty query
        if not rephrased_queries:
            logger.debug("No content keywords extracted from query expansion")
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -144,10 +144,6 @@ class BasicChunkRequest(BaseModel):
    # In case some queries favor recency more than other queries.
    recency_bias_multiplier: float = 1.0

-    # Sometimes we may want to extract specific keywords from a more semantic query for
-    # a better keyword search.
-    query_keywords: list[str] | None = None  # Not used currently
-
    limit: int | None = None
    offset: int | None = None  # This one is not set currently

@@ -166,6 +162,8 @@ class ChunkIndexRequest(BasicChunkRequest):
    # Calculated final filters
    filters: IndexFilters

+    query_keywords: list[str] | None = None
+

 class ContextExpansionType(str, Enum):
    NOT_RELEVANT = "not_relevant"
@@ -372,6 +370,10 @@ class SearchDocsResponse(BaseModel):
    # document id is  the most staightforward way.
    citation_mapping: dict[int, str]

+    # For cases where the frontend only needs to display a subset of the search docs
+    # The whole list is typically still needed for later steps but this set should be saved separately
+    displayed_docs: list[SearchDoc] | None = None
+

 class SavedSearchDoc(SearchDoc):
    db_doc_id: int
@@ -430,11 +432,6 @@ class SavedSearchDoc(SearchDoc):
        return self_score < other_score


-class CitationDocInfo(BaseModel):
-    search_doc: SearchDoc
-    citation_number: int | None
-
-
 class SavedSearchDocWithContent(SavedSearchDoc):
    """Used for endpoints that need to return the actual contents of the retrieved
    section in addition to the match_highlights."""
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -19,6 +19,7 @@ from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.llm.interfaces import LLM
+from onyx.natural_language_processing.english_stopwords import strip_stopwords
 from onyx.secondary_llm_flows.source_filter import extract_source_filter
 from onyx.secondary_llm_flows.time_filter import extract_time_filter
 from onyx.utils.logger import setup_logger
@@ -278,12 +279,16 @@ def search_pipeline(
        bypass_acl=chunk_search_request.bypass_acl,
    )

+    query_keywords = strip_stopwords(chunk_search_request.query)
+
    query_request = ChunkIndexRequest(
        query=chunk_search_request.query,
        hybrid_alpha=chunk_search_request.hybrid_alpha,
        recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
-        query_keywords=chunk_search_request.query_keywords,
+        query_keywords=query_keywords,
        filters=filters,
+        limit=chunk_search_request.limit,
+        offset=chunk_search_request.offset,
    )

    retrieved_chunks = search_chunks(
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -23,45 +23,6 @@ from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 logger = setup_logger()


-def _dedupe_chunks(
-    chunks: list[InferenceChunk],
-) -> list[InferenceChunk]:
-    used_chunks: dict[tuple[str, int], InferenceChunk] = {}
-    for chunk in chunks:
-        key = (chunk.document_id, chunk.chunk_id)
-        if key not in used_chunks:
-            used_chunks[key] = chunk
-        else:
-            stored_chunk_score = used_chunks[key].score or 0
-            this_chunk_score = chunk.score or 0
-            if stored_chunk_score < this_chunk_score:
-                used_chunks[key] = chunk
-
-    return list(used_chunks.values())
-
-
-def download_nltk_data() -> None:
-    import nltk  # type: ignore[import-untyped]
-
-    resources = {
-        "stopwords": "corpora/stopwords",
-        # "wordnet": "corpora/wordnet",  # Not in use
-        "punkt_tab": "tokenizers/punkt_tab",
-    }
-
-    for resource_name, resource_path in resources.items():
-        try:
-            nltk.data.find(resource_path)
-            logger.info(f"{resource_name} is already downloaded.")
-        except LookupError:
-            try:
-                logger.info(f"Downloading {resource_name}...")
-                nltk.download(resource_name, quiet=True)
-                logger.info(f"{resource_name} downloaded successfully.")
-            except Exception as e:
-                logger.error(f"Failed to download {resource_name}. Error: {e}")
-
-
 def combine_retrieval_results(
    chunk_sets: list[list[InferenceChunk]],
 ) -> list[InferenceChunk]:
--- a/backend/onyx/db/input_prompt.py
+++ b/backend/onyx/db/input_prompt.py
@@ -3,6 +3,8 @@ from uuid import UUID
 from fastapi import HTTPException
 from sqlalchemy import or_
 from sqlalchemy import select
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import aliased
 from sqlalchemy.orm import Session

@@ -18,45 +20,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-def insert_input_prompt_if_not_exists(
-    user: User | None,
-    input_prompt_id: int | None,
-    prompt: str,
-    content: str,
-    active: bool,
-    is_public: bool,
-    db_session: Session,
-    commit: bool = True,
-) -> InputPrompt:
-    if input_prompt_id is not None:
-        input_prompt = (
-            db_session.query(InputPrompt).filter_by(id=input_prompt_id).first()
-        )
-    else:
-        query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt)
-        if user:
-            query = query.filter(InputPrompt.user_id == user.id)
-        else:
-            query = query.filter(InputPrompt.user_id.is_(None))
-        input_prompt = query.first()
-
-    if input_prompt is None:
-        input_prompt = InputPrompt(
-            id=input_prompt_id,
-            prompt=prompt,
-            content=content,
-            active=active,
-            is_public=is_public or user is None,
-            user_id=user.id if user else None,
-        )
-        db_session.add(input_prompt)
-
-    if commit:
-        db_session.commit()
-
-    return input_prompt
-
-
 def insert_input_prompt(
    prompt: str,
    content: str,
@@ -64,16 +27,41 @@ def insert_input_prompt(
    user: User | None,
    db_session: Session,
 ) -> InputPrompt:
-    input_prompt = InputPrompt(
+    user_id = user.id if user else None
+
+    # Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
+    # to avoid race conditions with the uniqueness check
+    stmt = pg_insert(InputPrompt).values(
        prompt=prompt,
        content=content,
        active=True,
        is_public=is_public,
-        user_id=user.id if user is not None else None,
+        user_id=user_id,
    )
-    db_session.add(input_prompt)
-    db_session.commit()

+    # Use the appropriate constraint based on whether this is a user-owned or public prompt
+    if user_id is not None:
+        stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
+    else:
+        # Partial unique indexes cannot be targeted by constraint name;
+        # must use index_elements + index_where
+        stmt = stmt.on_conflict_do_nothing(
+            index_elements=[InputPrompt.prompt],
+            index_where=InputPrompt.user_id.is_(None),
+        )
+
+    stmt = stmt.returning(InputPrompt)
+
+    result = db_session.execute(stmt)
+    input_prompt = result.scalar_one_or_none()
+
+    if input_prompt is None:
+        raise HTTPException(
+            status_code=409,
+            detail=f"A prompt shortcut with the name '{prompt}' already exists",
+        )
+
+    db_session.commit()
    return input_prompt


@@ -98,23 +86,40 @@ def update_input_prompt(
    input_prompt.content = content
    input_prompt.active = active

-    db_session.commit()
+    try:
+        db_session.commit()
+    except IntegrityError:
+        db_session.rollback()
+        raise HTTPException(
+            status_code=409,
+            detail=f"A prompt shortcut with the name '{prompt}' already exists",
+        )
+
    return input_prompt


 def validate_user_prompt_authorization(
    user: User | None, input_prompt: InputPrompt
 ) -> bool:
+    """
+    Check if the user is authorized to modify the given input prompt.
+    Returns True only if the user owns the prompt.
+    Returns False for public prompts (only admins can modify those),
+    unless auth is disabled (then anyone can manage public prompts).
+    """
    prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)

-    if prompt.user_id is not None:
-        if user is None:
-            return False
+    # Public prompts cannot be modified via the user API (unless auth is disabled)
+    if prompt.is_public or prompt.user_id is None:
+        return AUTH_TYPE == AuthType.DISABLED

-        user_details = UserInfo.from_model(user)
-        if str(user_details.id) != str(prompt.user_id):
-            return False
-    return True
+    # User must be logged in
+    if user is None:
+        return False
+
+    # User must own the prompt
+    user_details = UserInfo.from_model(user)
+    return str(user_details.id) == str(prompt.user_id)


 def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:
--- a/backend/onyx/db/memory.py
+++ b/backend/onyx/db/memory.py
@@ -9,6 +9,9 @@ def get_memories(user: User | None, db_session: Session) -> list[str]:
    if user is None:
        return []

+    if not user.use_memories:
+        return []
+
    user_info = [
        f"User's name: {user.personal_name}" if user.personal_name else "",
        f"User's role: {user.personal_role}" if user.personal_role else "",
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -188,6 +188,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
        nullable=True,
        default=None,
    )
+    chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
    # personalization fields are exposed via the chat user settings "Personalization" tab
    personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
    personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
@@ -3626,6 +3627,18 @@ class InputPrompt(Base):
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )

+    __table_args__ = (
+        # Unique constraint on (prompt, user_id) for user-owned prompts
+        UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
+        # Partial unique index for public prompts (user_id IS NULL)
+        Index(
+            "uq_inputprompt_prompt_public",
+            "prompt",
+            unique=True,
+            postgresql_where=text("user_id IS NULL"),
+        ),
+    )
+

 class InputPrompt__User(Base):
    __tablename__ = "inputprompt__user"
@@ -3634,7 +3647,7 @@ class InputPrompt__User(Base):
        ForeignKey("inputprompt.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("inputprompt.id"), primary_key=True
+        ForeignKey("user.id"), primary_key=True
    )
    disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

--- a/backend/onyx/db/swap_index.py
+++ b/backend/onyx/db/swap_index.py
@@ -20,7 +20,7 @@ from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_search_settings_status
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.key_value_store.factory import get_kv_store
 from onyx.utils.logger import setup_logger

@@ -80,39 +80,43 @@ def _perform_index_swap(
        db_session=db_session,
    )

-    # remove the old index from the vector db
-    document_index = get_default_document_index(new_search_settings, None)
+    # This flow is for checking and possibly creating an index so we get all
+    # indices.
+    document_indices = get_all_document_indices(new_search_settings, None, None)

    WAIT_SECONDS = 5

-    success = False
-    for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
-        try:
-            logger.notice(
-                f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
-            )
-            document_index.ensure_indices_exist(
-                primary_embedding_dim=new_search_settings.final_embedding_dim,
-                primary_embedding_precision=new_search_settings.embedding_precision,
-                # just finished swap, no more secondary index
-                secondary_index_embedding_dim=None,
-                secondary_index_embedding_precision=None,
-            )
+    for document_index in document_indices:
+        success = False
+        for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
+            try:
+                logger.notice(
+                    f"Document index {document_index.__class__.__name__} swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
+                )
+                document_index.ensure_indices_exist(
+                    primary_embedding_dim=new_search_settings.final_embedding_dim,
+                    primary_embedding_precision=new_search_settings.embedding_precision,
+                    # just finished swap, no more secondary index
+                    secondary_index_embedding_dim=None,
+                    secondary_index_embedding_precision=None,
+                )

-            logger.notice("Vespa index swap complete.")
-            success = True
-            break
-        except Exception:
-            logger.exception(
-                f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
-            )
-            time.sleep(WAIT_SECONDS)
+                logger.notice("Document index swap complete.")
+                success = True
+                break
+            except Exception:
+                logger.exception(
+                    f"Document index swap for {document_index.__class__.__name__} did not succeed. "
+                    f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
+                )
+                time.sleep(WAIT_SECONDS)

-    if not success:
-        logger.error(
-            f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
-        )
-        return None
+        if not success:
+            logger.error(
+                f"Document index swap for {document_index.__class__.__name__} did not succeed. "
+                f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
+            )
+            return None

    return current_search_settings

--- a/backend/onyx/db/user_preferences.py
+++ b/backend/onyx/db/user_preferences.py
@@ -139,6 +139,20 @@ def update_user_theme_preference(
    db_session.commit()


+def update_user_chat_background(
+    user_id: UUID,
+    chat_background: str | None,
+    db_session: Session,
+) -> None:
+    """Update user's chat background setting."""
+    db_session.execute(
+        update(User)
+        .where(User.id == user_id)  # type: ignore
+        .values(chat_background=chat_background)
+    )
+    db_session.commit()
+
+
 def update_user_personalization(
    user_id: UUID,
    *,
--- a/backend/onyx/deep_research/dr_loop.py
+++ b/backend/onyx/deep_research/dr_loop.py
@@ -287,6 +287,7 @@ def run_deep_research_llm_loop(
                token_count=100,
                message_type=MessageType.USER,
            )
+
            truncated_message_history = construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
--- a/backend/onyx/document_index/chunk_content_enrichment.py
+++ b/backend/onyx/document_index/chunk_content_enrichment.py
@@ -2,13 +2,18 @@ from onyx.configs.app_configs import BLURB_SIZE
 from onyx.configs.constants import RETURN_SEPARATOR
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk


-def generate_enriched_content_for_chunk(chunk: DocMetadataAwareIndexChunk) -> str:
+def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"


+def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
+    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
+
+
 def cleanup_content_for_chunks(
    chunks: list[InferenceChunkUncleaned],
 ) -> list[InferenceChunk]:
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -1,9 +1,8 @@
 import httpx
-from sqlalchemy.orm import Session

-from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
 from onyx.db.models import SearchSettings
-from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
@@ -17,17 +16,24 @@ def get_default_document_index(
    secondary_search_settings: SearchSettings | None,
    httpx_client: httpx.Client | None = None,
 ) -> DocumentIndex:
-    """Primary index is the index that is used for querying/updating etc.
-    Secondary index is for when both the currently used index and the upcoming
-    index both need to be updated, updates are applied to both indices"""
+    """Gets the default document index from env vars.

+    To be used for retrieval only. Indexing should be done through both indices
+    until Vespa is deprecated.
+
+    Pre-existing docstring for this function, although secondary indices are not
+    currently supported:
+    Primary index is the index that is used for querying/updating etc. Secondary
+    index is for when both the currently used index and the upcoming index both
+    need to be updated, updates are applied to both indices.
+    """
    secondary_index_name: str | None = None
    secondary_large_chunks_enabled: bool | None = None
    if secondary_search_settings:
        secondary_index_name = secondary_search_settings.index_name
        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled

-    if ENABLE_OPENSEARCH_FOR_ONYX:
+    if ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX:
        return OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            secondary_index_name=secondary_index_name,
@@ -47,12 +53,48 @@ def get_default_document_index(
        )


-def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
+def get_all_document_indices(
+    search_settings: SearchSettings,
+    secondary_search_settings: SearchSettings | None,
+    httpx_client: httpx.Client | None = None,
+) -> list[DocumentIndex]:
+    """Gets all document indices.
+
+    NOTE: Will only return an OpenSearch index interface if
+    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
+    where we know it won't be enabled.
+
+    Used for indexing only. Until Vespa is deprecated we will index into both
+    document indices. Retrieval is done through only one index however.
+
+    Large chunks and secondary indices are not currently supported so we
+    hardcode appropriate values.
    """
-    TODO: Use redis to cache this or something
-    """
-    search_settings = get_current_search_settings(db_session)
-    return get_default_document_index(
-        search_settings,
-        None,
+    vespa_document_index = VespaIndex(
+        index_name=search_settings.index_name,
+        secondary_index_name=(
+            secondary_search_settings.index_name if secondary_search_settings else None
+        ),
+        large_chunks_enabled=search_settings.large_chunks_enabled,
+        secondary_large_chunks_enabled=(
+            secondary_search_settings.large_chunks_enabled
+            if secondary_search_settings
+            else None
+        ),
+        multitenant=MULTI_TENANT,
+        httpx_client=httpx_client,
    )
+    opensearch_document_index: OpenSearchOldDocumentIndex | None = None
+    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
+        opensearch_document_index = OpenSearchOldDocumentIndex(
+            index_name=search_settings.index_name,
+            secondary_index_name=None,
+            large_chunks_enabled=False,
+            secondary_large_chunks_enabled=None,
+            multitenant=MULTI_TENANT,
+            httpx_client=httpx_client,
+        )
+    result: list[DocumentIndex] = [vespa_document_index]
+    if opensearch_document_index:
+        result.append(opensearch_document_index)
+    return result
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -1,4 +1,5 @@
 import logging
+import time
 from typing import Any
 from typing import Generic
 from typing import TypeVar
@@ -569,6 +570,9 @@ class OpenSearchClient:
    def close(self) -> None:
        """Closes the client.

+        TODO(andrei): Can we have some way to auto close when the client no
+        longer has any references?
+
        Raises:
            Exception: There was an error closing the client.
        """
@@ -596,3 +600,55 @@ class OpenSearchClient:
            )
        hits_second_layer: list[Any] = hits_first_layer.get("hits", [])
        return hits_second_layer
+
+
+def wait_for_opensearch_with_timeout(
+    wait_interval_s: int = 5,
+    wait_limit_s: int = 60,
+    client: OpenSearchClient | None = None,
+) -> bool:
+    """Waits for OpenSearch to become ready subject to a timeout.
+
+    Will create a new dummy client if no client is provided. Will close this
+    client at the end of the function. Will not close the client if it was
+    supplied.
+
+    Args:
+        wait_interval_s: The interval in seconds to wait between checks.
+            Defaults to 5.
+        wait_limit_s: The total timeout in seconds to wait for OpenSearch to
+            become ready. Defaults to 60.
+        client: The OpenSearch client to use for pinging. If None, a new dummy
+            client will be created. Defaults to None.
+
+    Returns:
+        True if OpenSearch is ready, False otherwise.
+    """
+    made_client = False
+    try:
+        if client is None:
+            # NOTE: index_name does not matter because we are only using this object
+            # to ping.
+            # TODO(andrei): Make this better.
+            client = OpenSearchClient(index_name="")
+            made_client = True
+        time_start = time.monotonic()
+        while True:
+            if client.ping():
+                logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
+                return True
+            time_elapsed = time.monotonic() - time_start
+            if time_elapsed > wait_limit_s:
+                logger.info(
+                    f"[OpenSearch] Readiness probe did not succeed within the timeout "
+                    f"({wait_limit_s} seconds)."
+                )
+                return False
+            logger.info(
+                f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
+            )
+            time.sleep(wait_interval_s)
+    finally:
+        if made_client:
+            assert client is not None
+            client.close()
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -17,7 +17,7 @@ from onyx.db.enums import EmbeddingPrecision
 from onyx.db.models import DocumentSource
 from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
 from onyx.document_index.chunk_content_enrichment import (
-    generate_enriched_content_for_chunk,
+    generate_enriched_content_for_chunk_text,
 )
 from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
 from onyx.document_index.interfaces import (
@@ -140,9 +140,12 @@ def _convert_onyx_chunk_to_opensearch_document(
    return DocumentChunk(
        document_id=chunk.source_document.id,
        chunk_index=chunk.chunk_id,
-        title=chunk.source_document.title,
+        # Use get_title_for_document_index to match the logic used when creating
+        # the title_embedding in the embedder. This method falls back to
+        # semantic_identifier when title is None (but not empty string).
+        title=chunk.source_document.get_title_for_document_index(),
        title_vector=chunk.title_embedding,
-        content=generate_enriched_content_for_chunk(chunk),
+        content=generate_enriched_content_for_chunk_text(chunk),
        content_vector=chunk.embeddings.full_embedding,
        source_type=chunk.source_document.source.value,
        metadata_list=chunk.source_document.get_metadata_str_attributes(),
@@ -421,6 +424,24 @@ class OpenSearchDocumentIndex(DocumentIndex):
    def verify_and_create_index_if_necessary(
        self, embedding_dim: int, embedding_precision: EmbeddingPrecision
    ) -> None:
+        """Verifies and creates the index if necessary.
+
+        Also puts the desired search pipeline state, creating the pipelines if
+        they do not exist and updating them otherwise.
+
+        Args:
+            embedding_dim: Vector dimensionality for the vector similarity part
+                of the search.
+            embedding_precision: Precision of the values of the vectors for the
+                similarity part of the search.
+
+        Raises:
+            RuntimeError: There was an error verifying or creating the index or
+                search pipelines.
+        """
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary."
+        )
        expected_mappings = DocumentSchema.get_document_schema(
            embedding_dim, self._tenant_state.multitenant
        )
@@ -450,6 +471,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
+        )
        # Set of doc IDs.
        unique_docs_to_be_indexed: set[str] = set()
        document_indexing_results: list[DocumentInsertionRecord] = []
@@ -494,6 +518,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
    def delete(self, document_id: str, chunk_count: int | None = None) -> int:
        """Deletes all chunks for a given document.

+        Does nothing if the specified document ID does not exist.
+
        TODO(andrei): Make this method require supplying source type.
        TODO(andrei): Consider implementing this method to delete on document
        chunk IDs vs querying for matching document chunks.
@@ -510,6 +536,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
        Returns:
            The number of chunks successfully deleted.
        """
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
+        )
        query_body = DocumentQuery.delete_from_document_id_query(
            document_id=document_id,
            tenant_state=self._tenant_state,
@@ -523,6 +552,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
    ) -> None:
        """Updates some set of chunks.

+        NOTE: Will raise if the specified document chunks do not exist.
        NOTE: Requires document chunk count be known; will raise if it is not.
        NOTE: Each update request must have some field to update; if not it is
        assumed there is a bug in the caller and this will raise.
@@ -539,6 +569,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
            RuntimeError: Failed to update some or all of the chunks for the
                specified documents.
        """
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
+        )
        for update_request in update_requests:
            properties_to_update: dict[str, Any] = dict()
            # TODO(andrei): Nit but consider if we can use DocumentChunk
@@ -604,6 +637,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
        TODO(andrei): Consider implementing this method to retrieve on document
        chunk IDs vs querying for matching document chunks.
        """
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
+        )
        results: list[InferenceChunk] = []
        for chunk_request in chunk_requests:
            search_hits: list[SearchHit[DocumentChunk]] = []
@@ -643,6 +679,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
        num_to_retrieve: int,
        offset: int = 0,
    ) -> list[InferenceChunk]:
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
+        )
        query_body = DocumentQuery.get_hybrid_search_query(
            query_text=query,
            query_vector=query_embedding,
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -17,7 +17,7 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
 )
 from onyx.document_index.chunk_content_enrichment import (
-    generate_enriched_content_for_chunk,
+    generate_enriched_content_for_chunk_text,
 )
 from onyx.document_index.document_index_utils import get_uuid_from_chunk
 from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
@@ -186,7 +186,7 @@ def _index_vespa_chunk(
        # For the BM25 index, the keyword suffix is used, the vector is already generated with the more
        # natural language representation of the metadata section
        CONTENT: remove_invalid_unicode_chars(
-            generate_enriched_content_for_chunk(chunk)
+            generate_enriched_content_for_chunk_text(chunk)
        ),
        # This duplication of `content` is needed for keyword highlighting
        # Note that it's not exactly the same as the actual content
--- a/backend/onyx/indexing/embedder.py
+++ b/backend/onyx/indexing/embedder.py
@@ -7,6 +7,9 @@ from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorStopSignal
 from onyx.connectors.models import DocumentFailure
 from onyx.db.models import SearchSettings
+from onyx.document_index.chunk_content_enrichment import (
+    generate_enriched_content_for_chunk_embedding,
+)
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.models import ChunkEmbedding
 from onyx.indexing.models import DocAwareChunk
@@ -126,7 +129,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
            if chunk.large_chunk_reference_ids:
                large_chunks_present = True
            chunk_text = (
-                f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
+                generate_enriched_content_for_chunk_embedding(chunk)
            ) or chunk.source_document.get_title_for_document_index()

            if not chunk_text:
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -37,6 +37,7 @@ from onyx.document_index.document_index_utils import (
    get_multipass_config,
 )
 from onyx.document_index.interfaces import DocumentIndex
+from onyx.document_index.interfaces import DocumentInsertionRecord
 from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
@@ -163,7 +164,7 @@ def index_doc_batch_with_handler(
    *,
    chunker: Chunker,
    embedder: IndexingEmbedder,
-    document_index: DocumentIndex,
+    document_indices: list[DocumentIndex],
    document_batch: list[Document],
    request_id: str | None,
    tenant_id: str,
@@ -176,7 +177,7 @@ def index_doc_batch_with_handler(
        index_pipeline_result = index_doc_batch(
            chunker=chunker,
            embedder=embedder,
-            document_index=document_index,
+            document_indices=document_indices,
            document_batch=document_batch,
            request_id=request_id,
            tenant_id=tenant_id,
@@ -627,7 +628,7 @@ def index_doc_batch(
    document_batch: list[Document],
    chunker: Chunker,
    embedder: IndexingEmbedder,
-    document_index: DocumentIndex,
+    document_indices: list[DocumentIndex],
    request_id: str | None,
    tenant_id: str,
    adapter: IndexingBatchAdapter,
@@ -743,47 +744,57 @@ def index_doc_batch(
        short_descriptor_log = str(short_descriptor_list)[:1024]
        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")

-        # A document will not be spread across different batches, so all the
-        # documents with chunks in this set, are fully represented by the chunks
-        # in this set
-        (
-            insertion_records,
-            vector_db_write_failures,
-        ) = write_chunks_to_vector_db_with_backoff(
-            document_index=document_index,
-            chunks=result.chunks,
-            index_batch_params=IndexBatchParams(
-                doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
-                doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
-                tenant_id=tenant_id,
-                large_chunks_enabled=chunker.enable_large_chunks,
-            ),
-        )
+        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
+        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
+        for document_index in document_indices:
+            # A document will not be spread across different batches, so all the
+            # documents with chunks in this set, are fully represented by the chunks
+            # in this set
+            (
+                insertion_records,
+                vector_db_write_failures,
+            ) = write_chunks_to_vector_db_with_backoff(
+                document_index=document_index,
+                chunks=result.chunks,
+                index_batch_params=IndexBatchParams(
+                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
+                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
+                    tenant_id=tenant_id,
+                    large_chunks_enabled=chunker.enable_large_chunks,
+                ),
+            )

-        all_returned_doc_ids = (
-            {record.document_id for record in insertion_records}
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in vector_db_write_failures
-                    if record.failed_document
-                }
-            )
-            .union(
-                {
-                    record.failed_document.document_id
-                    for record in embedding_failures
-                    if record.failed_document
-                }
-            )
-        )
-        if all_returned_doc_ids != set(updatable_ids):
-            raise RuntimeError(
-                f"Some documents were not successfully indexed. "
-                f"Updatable IDs: {updatable_ids}, "
-                f"Returned IDs: {all_returned_doc_ids}. "
-                "This should never happen."
+            all_returned_doc_ids: set[str] = (
+                {record.document_id for record in insertion_records}
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in vector_db_write_failures
+                        if record.failed_document
+                    }
+                )
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in embedding_failures
+                        if record.failed_document
+                    }
+                )
            )
+            if all_returned_doc_ids != set(updatable_ids):
+                raise RuntimeError(
+                    f"Some documents were not successfully indexed. "
+                    f"Updatable IDs: {updatable_ids}, "
+                    f"Returned IDs: {all_returned_doc_ids}. "
+                    "This should never happen."
+                    f"This occured for document index {document_index.__class__.__name__}"
+                )
+            # We treat the first document index we got as the primary one used
+            # for reporting the state of indexing.
+            if primary_doc_idx_insertion_records is None:
+                primary_doc_idx_insertion_records = insertion_records
+            if primary_doc_idx_vector_db_write_failures is None:
+                primary_doc_idx_vector_db_write_failures = vector_db_write_failures

        adapter.post_index(
            context=context,
@@ -792,11 +803,15 @@ def index_doc_batch(
            result=result,
        )

+    assert primary_doc_idx_insertion_records is not None
+    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=len([r for r in insertion_records if not r.already_existed]),
+        new_docs=len(
+            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
+        ),
        total_docs=len(filtered_documents),
        total_chunks=len(chunks_with_embeddings),
-        failures=vector_db_write_failures + embedding_failures,
+        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
    )


@@ -805,7 +820,7 @@ def run_indexing_pipeline(
    document_batch: list[Document],
    request_id: str | None,
    embedder: IndexingEmbedder,
-    document_index: DocumentIndex,
+    document_indices: list[DocumentIndex],
    db_session: Session,
    tenant_id: str,
    adapter: IndexingBatchAdapter,
@@ -846,7 +861,7 @@ def run_indexing_pipeline(
    return index_doc_batch_with_handler(
        chunker=chunker,
        embedder=embedder,
-        document_index=document_index,
+        document_indices=document_indices,
        document_batch=document_batch,
        request_id=request_id,
        tenant_id=tenant_id,
--- a/backend/onyx/kg/clustering/normalizations.py
+++ b/backend/onyx/kg/clustering/normalizations.py
@@ -41,6 +41,11 @@ alphanum_regex = re.compile(r"[^a-z0-9]+")
 rem_email_regex = re.compile(r"(?<=\S)@([a-z0-9-]+)\.([a-z]{2,6})$")


+def _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:
+    """Generate n-grams from a sequence."""
+    return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]
+
+
 def _clean_name(entity_name: str) -> str:
    """
    Clean an entity string by removing non-alphanumeric characters and email addresses.
@@ -58,8 +63,6 @@ def _normalize_one_entity(
    attributes: dict[str, str],
    allowed_docs_temp_view_name: str | None = None,
 ) -> str | None:
-    from nltk import ngrams  # type: ignore
-
    """
    Matches a single entity to the best matching entity of the same type.
    """
@@ -150,16 +153,16 @@ def _normalize_one_entity(

    # step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank
    n1, n2, n3 = (
-        set(ngrams(cleaned_entity, 1)),
-        set(ngrams(cleaned_entity, 2)),
-        set(ngrams(cleaned_entity, 3)),
+        set(_ngrams(cleaned_entity, 1)),
+        set(_ngrams(cleaned_entity, 2)),
+        set(_ngrams(cleaned_entity, 3)),
    )
    for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):
        cleaned_candidate = _clean_name(candidate_name)
        h_n1, h_n2, h_n3 = (
-            set(ngrams(cleaned_candidate, 1)),
-            set(ngrams(cleaned_candidate, 2)),
-            set(ngrams(cleaned_candidate, 3)),
+            set(_ngrams(cleaned_candidate, 1)),
+            set(_ngrams(cleaned_candidate, 2)),
+            set(_ngrams(cleaned_candidate, 3)),
        )

        # compute ngram overlap, renormalize scores if the names are too short for larger ngrams
--- a/backend/onyx/llm/model_metadata_enrichments.json
+++ b/backend/onyx/llm/model_metadata_enrichments.json
@@ -54,11 +54,6 @@
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
-  "anthropic.claude-3-5-haiku-20241022-v1:0": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022-v1:0"
-  },
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -1465,11 +1460,6 @@
    "model_vendor": "mistral",
    "model_version": "v0:1"
  },
-  "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022-v1:0"
-  },
  "chat-bison": {
    "display_name": "Chat Bison",
    "model_vendor": "google",
@@ -1500,16 +1490,6 @@
    "model_vendor": "openai",
    "model_version": "latest"
  },
-  "claude-3-5-haiku-20241022": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022"
-  },
-  "claude-3-5-haiku-latest": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "latest"
-  },
  "claude-3-5-sonnet-20240620": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -1715,11 +1695,6 @@
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
-  "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022-v1:0"
-  },
  "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -3251,15 +3226,6 @@
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
-  "openrouter/anthropic/claude-3-5-haiku": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic"
-  },
-  "openrouter/anthropic/claude-3-5-haiku-20241022": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022"
-  },
  "openrouter/anthropic/claude-3-haiku": {
    "display_name": "Claude Haiku 3",
    "model_vendor": "anthropic"
@@ -3774,11 +3740,6 @@
    "model_vendor": "amazon",
    "model_version": "1:0"
  },
-  "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022"
-  },
  "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -3899,15 +3860,6 @@
    "model_vendor": "twelvelabs",
    "model_version": "v1:0"
  },
-  "vertex_ai/claude-3-5-haiku": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic"
-  },
-  "vertex_ai/claude-3-5-haiku@20241022": {
-    "display_name": "Claude Haiku 3.5",
-    "model_vendor": "anthropic",
-    "model_version": "20241022"
-  },
  "vertex_ai/claude-3-5-sonnet": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic"
--- a/backend/onyx/llm/multi_llm.py
+++ b/backend/onyx/llm/multi_llm.py
@@ -301,6 +301,12 @@ class LitellmLLM(LLM):
        )
        is_ollama = self._model_provider == LlmProviderNames.OLLAMA_CHAT
        is_mistral = self._model_provider == LlmProviderNames.MISTRAL
+        is_vertex_ai = self._model_provider == LlmProviderNames.VERTEX_AI
+        # Vertex Anthropic Opus 4.5 rejects output_config (LiteLLM maps reasoning_effort).
+        # Keep this guard until LiteLLM/Vertex accept the field for this model.
+        is_vertex_opus_4_5 = (
+            is_vertex_ai and "claude-opus-4-5" in self.config.model_name.lower()
+        )

        #########################
        # Build arguments
@@ -331,12 +337,16 @@ class LitellmLLM(LLM):
        # Temperature
        temperature = 1 if is_reasoning else self._temperature

-        if stream:
+        if stream and not is_vertex_opus_4_5:
            optional_kwargs["stream_options"] = {"include_usage": True}

        # Use configured default if not provided (if not set in env, low)
        reasoning_effort = reasoning_effort or ReasoningEffort(DEFAULT_REASONING_EFFORT)
-        if is_reasoning and reasoning_effort != ReasoningEffort.OFF:
+        if (
+            is_reasoning
+            and reasoning_effort != ReasoningEffort.OFF
+            and not is_vertex_opus_4_5
+        ):
            if is_openai_model:
                # OpenAI API does not accept reasoning params for GPT 5 chat models
                # (neither reasoning nor reasoning_effort are accepted)
--- a/backend/onyx/natural_language_processing/english_stopwords.py
+++ b/backend/onyx/natural_language_processing/english_stopwords.py
@@ -0,0 +1,225 @@
+import re
+
+ENGLISH_STOPWORDS = [
+    "a",
+    "about",
+    "above",
+    "after",
+    "again",
+    "against",
+    "ain",
+    "all",
+    "am",
+    "an",
+    "and",
+    "any",
+    "are",
+    "aren",
+    "aren't",
+    "as",
+    "at",
+    "be",
+    "because",
+    "been",
+    "before",
+    "being",
+    "below",
+    "between",
+    "both",
+    "but",
+    "by",
+    "can",
+    "couldn",
+    "couldn't",
+    "d",
+    "did",
+    "didn",
+    "didn't",
+    "do",
+    "does",
+    "doesn",
+    "doesn't",
+    "doing",
+    "don",
+    "don't",
+    "down",
+    "during",
+    "each",
+    "few",
+    "for",
+    "from",
+    "further",
+    "had",
+    "hadn",
+    "hadn't",
+    "has",
+    "hasn",
+    "hasn't",
+    "have",
+    "haven",
+    "haven't",
+    "having",
+    "he",
+    "he'd",
+    "he'll",
+    "he's",
+    "her",
+    "here",
+    "hers",
+    "herself",
+    "him",
+    "himself",
+    "his",
+    "how",
+    "i",
+    "i'd",
+    "i'll",
+    "i'm",
+    "i've",
+    "if",
+    "in",
+    "into",
+    "is",
+    "isn",
+    "isn't",
+    "it",
+    "it'd",
+    "it'll",
+    "it's",
+    "its",
+    "itself",
+    "just",
+    "ll",
+    "m",
+    "ma",
+    "me",
+    "mightn",
+    "mightn't",
+    "more",
+    "most",
+    "mustn",
+    "mustn't",
+    "my",
+    "myself",
+    "needn",
+    "needn't",
+    "no",
+    "nor",
+    "not",
+    "now",
+    "o",
+    "of",
+    "off",
+    "on",
+    "once",
+    "only",
+    "or",
+    "other",
+    "our",
+    "ours",
+    "ourselves",
+    "out",
+    "over",
+    "own",
+    "re",
+    "s",
+    "same",
+    "shan",
+    "shan't",
+    "she",
+    "she'd",
+    "she'll",
+    "she's",
+    "should",
+    "should've",
+    "shouldn",
+    "shouldn't",
+    "so",
+    "some",
+    "such",
+    "t",
+    "than",
+    "that",
+    "that'll",
+    "the",
+    "their",
+    "theirs",
+    "them",
+    "themselves",
+    "then",
+    "there",
+    "these",
+    "they",
+    "they'd",
+    "they'll",
+    "they're",
+    "they've",
+    "this",
+    "those",
+    "through",
+    "to",
+    "too",
+    "under",
+    "until",
+    "up",
+    "ve",
+    "very",
+    "was",
+    "wasn",
+    "wasn't",
+    "we",
+    "we'd",
+    "we'll",
+    "we're",
+    "we've",
+    "were",
+    "weren",
+    "weren't",
+    "what",
+    "when",
+    "where",
+    "which",
+    "while",
+    "who",
+    "whom",
+    "why",
+    "will",
+    "with",
+    "won",
+    "won't",
+    "wouldn",
+    "wouldn't",
+    "y",
+    "you",
+    "you'd",
+    "you'll",
+    "you're",
+    "you've",
+    "your",
+    "yours",
+    "yourself",
+    "yourselves",
+]
+
+ENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)
+
+
+def strip_stopwords(text: str) -> list[str]:
+    """Remove English stopwords from text.
+
+    Matching is case-insensitive and ignores leading/trailing punctuation
+    on each word. Internal punctuation (like apostrophes in contractions)
+    is preserved for matching, so "you're" matches the stopword "you're"
+    but "youre" would not.
+    """
+    words = text.split()
+    result = []
+
+    for word in words:
+        # Strip leading/trailing punctuation to get the core word for comparison
+        # This preserves internal punctuation like apostrophes
+        core = re.sub(r"^[^\w']+|[^\w']+$", "", word)
+        if core.lower() not in ENGLISH_STOPWORDS_SET:
+            result.append(word)
+
+    return result
--- a/backend/onyx/onyxbot/slack/listener.py
+++ b/backend/onyx/onyxbot/slack/listener.py
@@ -32,9 +32,6 @@ from onyx.configs.constants import MessageType
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
 from onyx.connectors.slack.utils import expert_info_from_slack_id
-from onyx.context.search.retrieval.search_runner import (
-    download_nltk_data,
-)
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.sql_engine import get_session_with_tenant
 from onyx.db.engine.sql_engine import SqlEngine
@@ -1129,9 +1126,6 @@ if __name__ == "__main__":

    set_is_ee_based_on_env_variable()

-    logger.info("Verifying query preprocessing (NLTK) data is downloaded")
-    download_nltk_data()
-
    try:
        # Keep the main thread alive
        while tenant_handler.running:
--- a/backend/onyx/prompts/chat_prompts.py
+++ b/backend/onyx/prompts/chat_prompts.py
@@ -96,7 +96,7 @@ ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."

 CHAT_NAMING_SYSTEM_PROMPT = """
 Given the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \
-Make sure the name is in the same language as the user's language.
+Make sure the name is in the same language as the user's first message.

 IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
 """.strip()
--- a/backend/onyx/prompts/deep_research/orchestration_layer.py
+++ b/backend/onyx/prompts/deep_research/orchestration_layer.py
@@ -19,7 +19,7 @@ If you need to ask questions, follow these guidelines:
 - Be concise and do not ask more than 5 questions.
 - If there are ambiguous terms or questions, ask the user to clarify.
 - Your questions should be a numbered list for clarity.
- Respond in the user's language.
+- Respond in the same language as the user's query.
 - Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}
 - Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
 """.strip()
@@ -44,9 +44,9 @@ For context, the date is {current_datetime}.

 The research plan should be formatted as a numbered list of steps and have 6 or less individual steps.

-Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps.
+Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.

-Output only the numbered list of steps with no additional prefix or suffix. Respond in the user's language.
+Output only the numbered list of steps with no additional prefix or suffix.
 """.strip()


@@ -76,10 +76,11 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c

 ## {RESEARCH_AGENT_TOOL_NAME}
 The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
-It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
+It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
+The research task should be in the same language as the overall research plan.

 CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \
-You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
+You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}

 You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

@@ -129,7 +130,7 @@ For context, the date is {current_datetime}.

 Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long.

-You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible. Respond in the user's language.
+You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.

 Not every fact retrieved will be relevant to the user's query.

@@ -165,10 +166,11 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c

 ## {RESEARCH_AGENT_TOOL_NAME}
 The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
-It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
+It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
+The research task should be in the same language as the overall research plan.

 CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \
-You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
+You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}

 You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

--- a/backend/onyx/prompts/federated_search.py
+++ b/backend/onyx/prompts/federated_search.py
@@ -1,30 +1,39 @@
 from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS

 SLACK_QUERY_EXPANSION_PROMPT = f"""
-Rewrite the user's query and, if helpful, split it into at most {MAX_SLACK_QUERY_EXPANSIONS} \
-keyword-only queries, so that Slack's keyword search yields the best matches.
+Rewrite the user's query into at most {MAX_SLACK_QUERY_EXPANSIONS} keyword-only queries for Slack's keyword search.

-Keep in mind the Slack's search behavior:
- Pure keyword AND search (no semantics).
- Word order matters.
- More words = fewer matches, so keep each query concise.
- IMPORTANT: Prefer simple 1-2 word queries over longer multi-word queries.
+Slack search behavior:
+- Pure keyword AND search (no semantics)
+- More words = fewer matches, so keep queries concise (1-3 words)

-Critical: Extract ONLY keywords that would actually appear in Slack message content.
+ALWAYS include:
+- Person names (e.g., "Sarah Chen", "Mike Johnson") - people search for messages from/about specific people
+- Project/product names, technical terms, proper nouns
+- Actual content words: "performance", "bug", "deployment", "API", "error"

 DO NOT include:
- Meta-words: "topics", "conversations", "discussed", "summary", "messages", "big", "main", "talking"
- Temporal: "today", "yesterday", "week", "month", "recent", "past", "last"
- Channels/Users: "general", "eng-general", "engineering", "@username"
-
-DO include:
- Actual content: "performance", "bug", "deployment", "API", "database", "error", "feature"
+- Meta-words: "topics", "conversations", "discussed", "summary", "messages"
+- Temporal: "today", "yesterday", "week", "month", "recent", "last"
+- Channel names: "general", "eng-general", "random"

 Examples:

 Query: "what are the big topics in eng-general this week?"
 Output:

+Query: "messages with Sarah about the deployment"
+Output:
+Sarah deployment
+Sarah
+deployment
+
+Query: "what did Mike say about the budget?"
+Output:
+Mike budget
+Mike
+budget
+
 Query: "performance issues in eng-general"
 Output:
 performance issues
@@ -41,7 +50,7 @@ Now process this query:

 {{query}}

-Output:
+Output (keywords only, one per line, NO explanations or commentary):
 """

 SLACK_DATE_EXTRACTION_PROMPT = """
--- a/backend/onyx/prompts/tool_prompts.py
+++ b/backend/onyx/prompts/tool_prompts.py
@@ -48,7 +48,7 @@ Do not use the "site:" operator in your web search queries.
 OPEN_URLS_GUIDANCE = """

 ## open_url
-Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches.
+Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs.
 You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources.
 You should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.
 """
--- a/backend/onyx/seeding/input_prompts.yaml
+++ b/backend/onyx/seeding/input_prompts.yaml
@@ -1,24 +0,0 @@
-input_prompts:
-  - id: -5
-    prompt: "Elaborate"
-    content: "Elaborate on the above, give me a more in depth explanation."
-    active: true
-    is_public: true
-
-  - id: -4
-    prompt: "Reword"
-    content: "Help me rewrite the following politely and concisely for professional communication:\n"
-    active: true
-    is_public: true
-
-  - id: -3
-    prompt: "Email"
-    content: "Write a professional email for me including a subject line, signature, etc. Template the parts that need editing with [ ]. The email should cover the following points:\n"
-    active: true
-    is_public: true
-
-  - id: -2
-    prompt: "Debug"
-    content: "Provide step-by-step troubleshooting instructions for the following issue:\n"
-    active: true
-    is_public: true
--- a/backend/onyx/seeding/load_yamls.py
+++ b/backend/onyx/seeding/load_yamls.py
@@ -1,40 +0,0 @@
-import yaml
-from sqlalchemy.orm import Session
-
-from onyx.configs.chat_configs import INPUT_PROMPT_YAML
-from onyx.db.input_prompt import insert_input_prompt_if_not_exists
-from onyx.utils.logger import setup_logger
-
-
-logger = setup_logger()
-
-
-def load_input_prompts_from_yaml(
-    db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
-) -> None:
-    with open(input_prompts_yaml, "r") as file:
-        data = yaml.safe_load(file)
-
-    all_input_prompts = data.get("input_prompts", [])
-    for input_prompt in all_input_prompts:
-        # If these prompts are deleted (which is a hard delete in the DB), on server startup
-        # they will be recreated, but the user can always just deactivate them, just a light inconvenience
-
-        insert_input_prompt_if_not_exists(
-            user=None,
-            input_prompt_id=input_prompt.get("id"),
-            prompt=input_prompt["prompt"],
-            content=input_prompt["content"],
-            is_public=input_prompt["is_public"],
-            active=input_prompt.get("active", True),
-            db_session=db_session,
-            commit=True,
-        )
-
-
-def load_chat_yamls(
-    db_session: Session,
-    input_prompts_yaml: str = INPUT_PROMPT_YAML,
-) -> None:
-    """Load all chat-related YAML configurations (such as the prompt shortcuts which are called input prompts on the backend)"""
-    load_input_prompts_from_yaml(db_session, input_prompts_yaml)
--- a/backend/onyx/server/documents/document.py
+++ b/backend/onyx/server/documents/document.py
@@ -32,6 +32,7 @@ def get_document_info(
    db_session: Session = Depends(get_session),
 ) -> DocumentInfo:
    search_settings = get_current_search_settings(db_session)
+    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    user_acl_filters = build_access_filters_for_user(user, db_session)
@@ -76,6 +77,7 @@ def get_chunk_info(
    db_session: Session = Depends(get_session),
 ) -> ChunkInfo:
    search_settings = get_current_search_settings(db_session)
+    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    user_acl_filters = build_access_filters_for_user(user, db_session)
--- a/backend/onyx/server/features/mcp/api.py
+++ b/backend/onyx/server/features/mcp/api.py
@@ -821,20 +821,36 @@ def _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User | None) ->


 def _db_mcp_server_to_api_mcp_server(
-    db_server: DbMCPServer, email: str, db: Session, include_auth_config: bool = False
+    db_server: DbMCPServer,
+    db: Session,
+    request_user: User | None,
+    include_auth_config: bool = False,
 ) -> MCPServer:
    """Convert database MCP server to API model"""

+    email = request_user.email if request_user else ""
+
    # Check if user has authentication configured and extract credentials
    auth_performer = db_server.auth_performer
    user_authenticated: bool | None = None
    user_credentials = None
    admin_credentials = None
+    can_view_admin_credentials = bool(include_auth_config) and (
+        request_user is not None
+        and (
+            request_user.role == UserRole.ADMIN
+            or (request_user.email and request_user.email == db_server.owner)
+        )
+    )
    if db_server.auth_type == MCPAuthenticationType.NONE:
        user_authenticated = True  # No auth required
    elif auth_performer == MCPAuthenticationPerformer.ADMIN:
        user_authenticated = db_server.admin_connection_config is not None
-        if include_auth_config and db_server.admin_connection_config is not None:
+        if (
+            can_view_admin_credentials
+            and db_server.admin_connection_config is not None
+            and include_auth_config
+        ):
            if db_server.auth_type == MCPAuthenticationType.API_TOKEN:
                admin_credentials = {
                    "api_key": db_server.admin_connection_config.config["headers"][
@@ -890,11 +906,12 @@ def _db_mcp_server_to_api_mcp_server(
            if client_info:
                if not client_info.client_id or not client_info.client_secret:
                    raise ValueError("Stored client info had empty client ID or secret")
-                admin_credentials = {
-                    "client_id": client_info.client_id,
-                    "client_secret": client_info.client_secret,
-                }
-            else:
+                if can_view_admin_credentials:
+                    admin_credentials = {
+                        "client_id": client_info.client_id,
+                        "client_secret": client_info.client_secret,
+                    }
+            elif can_view_admin_credentials:
                admin_credentials = {}
                logger.warning(f"No client info found for server {db_server.name}")

@@ -961,14 +978,13 @@ def get_mcp_servers_for_assistant(

    logger.info(f"Fetching MCP servers for assistant: {assistant_id}")

-    email = user.email if user else ""
    try:
        persona_id = int(assistant_id)
        db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)

        # Convert to API model format with opportunistic token refresh for OAuth
        mcp_servers = [
-            _db_mcp_server_to_api_mcp_server(db_server, email, db)
+            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
            for db_server in db_mcp_servers
        ]

@@ -981,6 +997,25 @@ def get_mcp_servers_for_assistant(
        raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")


+@router.get("/servers", response_model=MCPServersResponse)
+def get_mcp_servers_for_user(
+    db: Session = Depends(get_session),
+    user: User | None = Depends(current_user),
+) -> MCPServersResponse:
+    """List all MCP servers for use in agent configuration and chat UI.
+
+    This endpoint is intentionally available to all authenticated users so they
+    can attach MCP actions to assistants. Sensitive admin credentials are never
+    returned.
+    """
+    db_mcp_servers = get_all_mcp_servers(db)
+    mcp_servers = [
+        _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
+        for db_server in db_mcp_servers
+    ]
+    return MCPServersResponse(mcp_servers=mcp_servers)
+
+
 def _get_connection_config(
    mcp_server: DbMCPServer, is_admin: bool, user: User | None, db_session: Session
 ) -> MCPConnectionConfig | None:
@@ -1528,8 +1563,6 @@ def get_mcp_server_detail(

    _ensure_mcp_server_owner_or_admin(server, user)

-    email = user.email if user else ""
-
    # TODO: user permissions per mcp server not yet implemented, for now
    # permissions are based on access to assistants
    # # Quick permission check – admin or user has access
@@ -1537,7 +1570,10 @@ def get_mcp_server_detail(
    #     raise HTTPException(status_code=403, detail="Forbidden")

    return _db_mcp_server_to_api_mcp_server(
-        server, email, db_session, include_auth_config=True
+        server,
+        db_session,
+        include_auth_config=True,
+        request_user=user,
    )


@@ -1596,13 +1632,12 @@ def get_mcp_servers_for_admin(

    logger.info("Fetching all MCP servers for admin display")

-    email = user.email if user else ""
    try:
        db_mcp_servers = get_all_mcp_servers(db)

        # Convert to API model format
        mcp_servers = [
-            _db_mcp_server_to_api_mcp_server(db_server, email, db)
+            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
            for db_server in db_mcp_servers
        ]

@@ -1845,7 +1880,9 @@ def update_mcp_server_simple(
    db_session.commit()

    # Return the updated server in API format
-    return _db_mcp_server_to_api_mcp_server(updated_server, user.email, db_session)
+    return _db_mcp_server_to_api_mcp_server(
+        updated_server, db_session, request_user=user
+    )


@admin_router.delete("/server/{server_id}")
--- a/backend/onyx/server/manage/get_state.py
+++ b/backend/onyx/server/manage/get_state.py
@@ -13,6 +13,7 @@ from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
 from onyx.configs.constants import DEV_VERSION_PATTERN
 from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.configs.constants import STABLE_VERSION_PATTERN
+from onyx.db.auth import get_user_count
 from onyx.server.manage.models import AllVersions
 from onyx.server.manage.models import AuthTypeResponse
 from onyx.server.manage.models import ContainerVersions
@@ -28,12 +29,14 @@ def healthcheck() -> StatusResponse:


@router.get("/auth/type", tags=PUBLIC_API_TAGS)
-def get_auth_type() -> AuthTypeResponse:
+async def get_auth_type() -> AuthTypeResponse:
+    user_count = await get_user_count()
    return AuthTypeResponse(
        auth_type=AUTH_TYPE,
        requires_verification=user_needs_to_be_verified(),
        anonymous_user_enabled=anonymous_user_enabled(),
        password_min_length=PASSWORD_MIN_LENGTH,
+        has_users=user_count > 0,
    )


--- a/backend/onyx/server/manage/models.py
+++ b/backend/onyx/server/manage/models.py
@@ -44,6 +44,8 @@ class AuthTypeResponse(BaseModel):
    requires_verification: bool
    anonymous_user_enabled: bool | None = None
    password_min_length: int
+    # whether there are any users in the system
+    has_users: bool = True


 class UserSpecificAssistantPreference(BaseModel):
@@ -65,6 +67,7 @@ class UserPreferences(BaseModel):
    auto_scroll: bool | None = None
    temperature_override_enabled: bool | None = None
    theme_preference: ThemePreference | None = None
+    chat_background: str | None = None

    # controls which tools are enabled for the user for a specific assistant
    assistant_specific_configs: UserSpecificAssistantPreferences | None = None
@@ -136,6 +139,7 @@ class UserInfo(BaseModel):
                    auto_scroll=user.auto_scroll,
                    temperature_override_enabled=user.temperature_override_enabled,
                    theme_preference=user.theme_preference,
+                    chat_background=user.chat_background,
                    assistant_specific_configs=assistant_specific_configs,
                )
            ),
@@ -199,6 +203,10 @@ class ThemePreferenceRequest(BaseModel):
    theme_preference: ThemePreference


+class ChatBackgroundRequest(BaseModel):
+    chat_background: str | None
+
+
 class PersonalizationUpdateRequest(BaseModel):
    name: str | None = None
    role: str | None = None
--- a/backend/onyx/server/manage/search_settings.py
+++ b/backend/onyx/server/manage/search_settings.py
@@ -6,33 +6,25 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
-from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.context.search.models import SavedSearchSettings
 from onyx.context.search.models import SearchSettingsCreationRequest
-from onyx.db.connector_credential_pair import get_connector_credential_pairs
-from onyx.db.connector_credential_pair import resync_cc_pair
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.index_attempt import expire_index_attempts
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import User
-from onyx.db.search_settings import create_search_settings
 from onyx.db.search_settings import delete_search_settings
 from onyx.db.search_settings import get_current_search_settings
-from onyx.db.search_settings import get_embedding_provider_from_provider_type
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_search_settings_status
-from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_processing.unstructured import delete_unstructured_api_key
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import update_unstructured_api_key
-from onyx.natural_language_processing.search_nlp_models import clean_model_name
 from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
 from onyx.server.manage.models import FullModelVersionResponse
 from onyx.server.models import IdReturn
 from onyx.utils.logger import setup_logger
-from shared_configs.configs import ALT_INDEX_SUFFIX
 from shared_configs.configs import MULTI_TENANT

 router = APIRouter(prefix="/search-settings")
@@ -48,91 +40,97 @@ def set_new_search_settings(
    """Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
    Gives an error if the same model name is used as the current or secondary index
    """
-    if search_settings_new.index_name:
-        logger.warning("Index name was specified by request, this is not suggested")
-
-    # Disallow contextual RAG for cloud deployments
-    if MULTI_TENANT and search_settings_new.enable_contextual_rag:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Contextual RAG disabled in Onyx Cloud",
-        )
-
-    # Validate cloud provider exists or create new LiteLLM provider
-    if search_settings_new.provider_type is not None:
-        cloud_provider = get_embedding_provider_from_provider_type(
-            db_session, provider_type=search_settings_new.provider_type
-        )
-
-        if cloud_provider is None:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
-            )
-
-    search_settings = get_current_search_settings(db_session)
-
-    if search_settings_new.index_name is None:
-        # We define index name here
-        index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
-        if (
-            search_settings_new.model_name == search_settings.model_name
-            and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
-        ):
-            index_name += ALT_INDEX_SUFFIX
-        search_values = search_settings_new.model_dump()
-        search_values["index_name"] = index_name
-        new_search_settings_request = SavedSearchSettings(**search_values)
-    else:
-        new_search_settings_request = SavedSearchSettings(
-            **search_settings_new.model_dump()
-        )
-
-    secondary_search_settings = get_secondary_search_settings(db_session)
-
-    if secondary_search_settings:
-        # Cancel any background indexing jobs
-        expire_index_attempts(
-            search_settings_id=secondary_search_settings.id, db_session=db_session
-        )
-
-        # Mark previous model as a past model directly
-        update_search_settings_status(
-            search_settings=secondary_search_settings,
-            new_status=IndexModelStatus.PAST,
-            db_session=db_session,
-        )
-
-    new_search_settings = create_search_settings(
-        search_settings=new_search_settings_request, db_session=db_session
+    # TODO(andrei): Re-enable.
+    logger.error("Setting new search settings is temporarily disabled.")
+    raise HTTPException(
+        status_code=status.HTTP_501_NOT_IMPLEMENTED,
+        detail="Setting new search settings is temporarily disabled.",
    )
+    # if search_settings_new.index_name:
+    #     logger.warning("Index name was specified by request, this is not suggested")

-    # Ensure Vespa has the new index immediately
-    get_multipass_config(search_settings)
-    get_multipass_config(new_search_settings)
-    document_index = get_default_document_index(search_settings, new_search_settings)
+    # # Disallow contextual RAG for cloud deployments
+    # if MULTI_TENANT and search_settings_new.enable_contextual_rag:
+    #     raise HTTPException(
+    #         status_code=status.HTTP_400_BAD_REQUEST,
+    #         detail="Contextual RAG disabled in Onyx Cloud",
+    #     )

-    document_index.ensure_indices_exist(
-        primary_embedding_dim=search_settings.final_embedding_dim,
-        primary_embedding_precision=search_settings.embedding_precision,
-        secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
-        secondary_index_embedding_precision=new_search_settings.embedding_precision,
-    )
+    # # Validate cloud provider exists or create new LiteLLM provider
+    # if search_settings_new.provider_type is not None:
+    #     cloud_provider = get_embedding_provider_from_provider_type(
+    #         db_session, provider_type=search_settings_new.provider_type
+    #     )

-    # Pause index attempts for the currently in use index to preserve resources
-    if DISABLE_INDEX_UPDATE_ON_SWAP:
-        expire_index_attempts(
-            search_settings_id=search_settings.id, db_session=db_session
-        )
-        for cc_pair in get_connector_credential_pairs(db_session):
-            resync_cc_pair(
-                cc_pair=cc_pair,
-                search_settings_id=new_search_settings.id,
-                db_session=db_session,
-            )
+    #     if cloud_provider is None:
+    #         raise HTTPException(
+    #             status_code=status.HTTP_400_BAD_REQUEST,
+    #             detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
+    #         )

-    db_session.commit()
-    return IdReturn(id=new_search_settings.id)
+    # search_settings = get_current_search_settings(db_session)
+
+    # if search_settings_new.index_name is None:
+    #     # We define index name here
+    #     index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
+    #     if (
+    #         search_settings_new.model_name == search_settings.model_name
+    #         and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
+    #     ):
+    #         index_name += ALT_INDEX_SUFFIX
+    #     search_values = search_settings_new.model_dump()
+    #     search_values["index_name"] = index_name
+    #     new_search_settings_request = SavedSearchSettings(**search_values)
+    # else:
+    #     new_search_settings_request = SavedSearchSettings(
+    #         **search_settings_new.model_dump()
+    #     )
+
+    # secondary_search_settings = get_secondary_search_settings(db_session)
+
+    # if secondary_search_settings:
+    #     # Cancel any background indexing jobs
+    #     expire_index_attempts(
+    #         search_settings_id=secondary_search_settings.id, db_session=db_session
+    #     )
+
+    #     # Mark previous model as a past model directly
+    #     update_search_settings_status(
+    #         search_settings=secondary_search_settings,
+    #         new_status=IndexModelStatus.PAST,
+    #         db_session=db_session,
+    #     )
+
+    # new_search_settings = create_search_settings(
+    #     search_settings=new_search_settings_request, db_session=db_session
+    # )
+
+    # # Ensure Vespa has the new index immediately
+    # get_multipass_config(search_settings)
+    # get_multipass_config(new_search_settings)
+    # document_index = get_default_document_index(search_settings, new_search_settings)
+
+    # document_index.ensure_indices_exist(
+    #     primary_embedding_dim=search_settings.final_embedding_dim,
+    #     primary_embedding_precision=search_settings.embedding_precision,
+    #     secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
+    #     secondary_index_embedding_precision=new_search_settings.embedding_precision,
+    # )
+
+    # # Pause index attempts for the currently in use index to preserve resources
+    # if DISABLE_INDEX_UPDATE_ON_SWAP:
+    #     expire_index_attempts(
+    #         search_settings_id=search_settings.id, db_session=db_session
+    #     )
+    #     for cc_pair in get_connector_credential_pairs(db_session):
+    #         resync_cc_pair(
+    #             cc_pair=cc_pair,
+    #             search_settings_id=new_search_settings.id,
+    #             db_session=db_session,
+    #         )
+
+    # db_session.commit()
+    # return IdReturn(id=new_search_settings.id)


@router.post("/cancel-new-embedding")
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -56,6 +56,7 @@ from onyx.db.user_preferences import get_latest_access_token_for_user
 from onyx.db.user_preferences import update_assistant_preferences
 from onyx.db.user_preferences import update_user_assistant_visibility
 from onyx.db.user_preferences import update_user_auto_scroll
+from onyx.db.user_preferences import update_user_chat_background
 from onyx.db.user_preferences import update_user_default_model
 from onyx.db.user_preferences import update_user_personalization
 from onyx.db.user_preferences import update_user_pinned_assistants
@@ -75,6 +76,7 @@ from onyx.server.documents.models import PaginatedReturn
 from onyx.server.features.projects.models import UserFileSnapshot
 from onyx.server.manage.models import AllUsersResponse
 from onyx.server.manage.models import AutoScrollRequest
+from onyx.server.manage.models import ChatBackgroundRequest
 from onyx.server.manage.models import PersonalizationUpdateRequest
 from onyx.server.manage.models import TenantInfo
 from onyx.server.manage.models import TenantSnapshot
@@ -784,6 +786,25 @@ def update_user_theme_preference_api(
    update_user_theme_preference(user.id, request.theme_preference, db_session)


+@router.patch("/user/chat-background")
+def update_user_chat_background_api(
+    request: ChatBackgroundRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    if user is None:
+        if AUTH_TYPE == AuthType.DISABLED:
+            store = get_kv_store()
+            no_auth_user = fetch_no_auth_user(store)
+            no_auth_user.preferences.chat_background = request.chat_background
+            set_no_auth_user_preferences(store, no_auth_user.preferences)
+            return
+        else:
+            raise RuntimeError("This should never happen")
+
+    update_user_chat_background(user.id, request.chat_background, db_session)
+
+
@router.patch("/user/default-model")
 def update_user_default_model_api(
    request: ChosenDefaultModelRequest,
--- a/backend/onyx/server/onyx_api/ingestion.py
+++ b/backend/onyx/server/onyx_api/ingestion.py
@@ -22,7 +22,7 @@ from onyx.db.models import User
 from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.indexing.adapters.document_indexing_adapter import (
    DocumentIndexingBatchAdapter,
 )
@@ -103,9 +103,11 @@ def upsert_ingestion_doc(

    # Need to index for both the primary and secondary index if possible
    active_search_settings = get_active_search_settings(db_session)
-    curr_doc_index = get_default_document_index(
+    # This flow is for indexing so we get all indices.
+    document_indices = get_all_document_indices(
        active_search_settings.primary,
        None,
+        None,
    )

    search_settings = get_current_search_settings(db_session)
@@ -128,7 +130,7 @@ def upsert_ingestion_doc(

    indexing_pipeline_result = run_indexing_pipeline(
        embedder=index_embedding_model,
-        document_index=curr_doc_index,
+        document_indices=document_indices,
        ignore_time_skip=True,
        db_session=db_session,
        tenant_id=tenant_id,
@@ -151,13 +153,14 @@ def upsert_ingestion_doc(
            search_settings=sec_search_settings
        )

-        sec_doc_index = get_default_document_index(
-            active_search_settings.secondary, None
+        # This flow is for indexing so we get all indices.
+        sec_document_indices = get_all_document_indices(
+            active_search_settings.secondary, None, None
        )

        run_indexing_pipeline(
            embedder=new_index_embedding_model,
-            document_index=sec_doc_index,
+            document_indices=sec_document_indices,
            ignore_time_skip=True,
            db_session=db_session,
            tenant_id=tenant_id,
@@ -192,15 +195,18 @@ def delete_ingestion_doc(
        )

    active_search_settings = get_active_search_settings(db_session)
-    doc_index = get_default_document_index(
+    # This flow is for deletion so we get all indices.
+    document_indices = get_all_document_indices(
        active_search_settings.primary,
        active_search_settings.secondary,
+        None,
    )
-    doc_index.delete_single(
-        doc_id=document_id,
-        tenant_id=tenant_id,
-        chunk_count=document.chunk_count,
-    )
+    for document_index in document_indices:
+        document_index.delete_single(
+            doc_id=document_id,
+            tenant_id=tenant_id,
+            chunk_count=document.chunk_count,
+        )

    # Delete from database
    delete_documents_complete__no_commit(db_session, [document_id])
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -530,7 +530,30 @@ def handle_new_chat_message(
    return StreamingResponse(stream_generator(), media_type="text/event-stream")


-@router.post("/send-chat-message", response_model=None, tags=PUBLIC_API_TAGS)
+@router.post(
+    "/send-chat-message",
+    response_model=ChatFullResponse,
+    tags=PUBLIC_API_TAGS,
+    responses={
+        200: {
+            "description": (
+                "If `stream=true`, returns `text/event-stream`.\n"
+                "If `stream=false`, returns `application/json` (ChatFullResponse)."
+            ),
+            "content": {
+                "text/event-stream": {
+                    "schema": {"type": "string"},
+                    "examples": {
+                        "stream": {
+                            "summary": "Stream of NDJSON AnswerStreamPart's",
+                            "value": "string",
+                        }
+                    },
+                },
+            },
+        }
+    },
+)
 def handle_send_chat_message(
    chat_message_req: SendMessageRequest,
    request: Request,
--- a/backend/onyx/server/query_and_chat/query_backend.py
+++ b/backend/onyx/server/query_and_chat/query_backend.py
@@ -51,6 +51,7 @@ def admin_search(
        tenant_id=tenant_id,
    )
    search_settings = get_current_search_settings(db_session)
+    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    if not isinstance(document_index, VespaIndex):
--- a/backend/onyx/server/query_and_chat/session_loading.py
+++ b/backend/onyx/server/query_and_chat/session_loading.py
@@ -4,6 +4,7 @@ from typing import cast

 from sqlalchemy.orm import Session

+from onyx.chat.citation_utils import extract_citation_order_from_text
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SavedSearchDoc
 from onyx.context.search.models import SearchDoc
@@ -521,6 +522,13 @@ def translate_assistant_message_to_packets(
                    )
                )

+        # Sort citations by order of appearance in message text
+        citation_order = extract_citation_order_from_text(chat_message.message or "")
+        order_map = {num: idx for idx, num in enumerate(citation_order)}
+        citation_info_list.sort(
+            key=lambda c: order_map.get(c.citation_number, float("inf"))
+        )
+
    # Message comes after tool calls, with optional reasoning step beforehand
    message_turn_index = max_tool_turn + 1
    if chat_message.reasoning_tokens:
--- a/backend/onyx/setup.py
+++ b/backend/onyx/setup.py
@@ -6,7 +6,6 @@ from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
-from onyx.configs.chat_configs import INPUT_PROMPT_YAML
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.configs.constants import KV_SEARCH_SETTINGS
 from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
@@ -14,9 +13,6 @@ from onyx.configs.embedding_configs import SupportedEmbeddingModel
 from onyx.configs.model_configs import GEN_AI_API_KEY
 from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
 from onyx.context.search.models import SavedSearchSettings
-from onyx.context.search.retrieval.search_runner import (
-    download_nltk_data,
-)
 from onyx.db.connector import check_connectors_exist
 from onyx.db.connector import create_initial_default_connector
 from onyx.db.connector_credential_pair import associate_default_cc_pair
@@ -36,7 +32,7 @@ from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.models import IndexingSetting
@@ -46,7 +42,6 @@ from onyx.llm.constants import LlmProviderNames
 from onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
-from onyx.seeding.load_yamls import load_input_prompts_from_yaml
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.server.settings.store import load_settings
@@ -116,9 +111,6 @@ def setup_onyx(
                f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}."
            )

-    logger.notice("Verifying query preprocessing (NLTK) data is downloaded")
-    download_nltk_data()
-
    # setup Postgres with default credential, llm providers, etc.
    setup_postgres(db_session)

@@ -132,13 +124,15 @@ def setup_onyx(
    # Ensure Vespa is setup correctly, this step is relatively near the end because Vespa
    # takes a bit of time to start up
    logger.notice("Verifying Document Index(s) is/are available.")
-    document_index = get_default_document_index(
+    # This flow is for setting up the document index so we get all indices here.
+    document_indices = get_all_document_indices(
        search_settings,
        secondary_search_settings,
+        None,
    )

-    success = setup_vespa(
-        document_index,
+    success = setup_document_indices(
+        document_indices,
        IndexingSetting.from_db_model(search_settings),
        (
            IndexingSetting.from_db_model(secondary_search_settings)
@@ -147,7 +141,9 @@ def setup_onyx(
        ),
    )
    if not success:
-        raise RuntimeError("Could not connect to Vespa within the specified timeout.")
+        raise RuntimeError(
+            "Could not connect to a document index within the specified timeout."
+        )

    logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}")
    if search_settings.provider_type is None:
@@ -229,44 +225,62 @@ def mark_reindex_flag(db_session: Session) -> None:
        kv_store.store(KV_REINDEX_KEY, False)


-def setup_vespa(
-    document_index: DocumentIndex,
+def setup_document_indices(
+    document_indices: list[DocumentIndex],
    index_setting: IndexingSetting,
    secondary_index_setting: IndexingSetting | None,
    num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,
 ) -> bool:
-    # Vespa startup is a bit slow, so give it a few seconds
-    WAIT_SECONDS = 5
-    for x in range(num_attempts):
-        try:
-            logger.notice(f"Setting up Vespa (attempt {x+1}/{num_attempts})...")
-            document_index.ensure_indices_exist(
-                primary_embedding_dim=index_setting.final_embedding_dim,
-                primary_embedding_precision=index_setting.embedding_precision,
-                secondary_index_embedding_dim=(
-                    secondary_index_setting.final_embedding_dim
-                    if secondary_index_setting
-                    else None
-                ),
-                secondary_index_embedding_precision=(
-                    secondary_index_setting.embedding_precision
-                    if secondary_index_setting
-                    else None
-                ),
-            )
+    """Sets up all input document indices.

-            logger.notice("Vespa setup complete.")
-            return True
-        except Exception:
-            logger.exception(
-                f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
-            )
-            time.sleep(WAIT_SECONDS)
+    If any document index setup fails, the function will return False. Otherwise
+    returns True.
+    """
+    for document_index in document_indices:
+        # Document index startup is a bit slow, so give it a few seconds.
+        WAIT_SECONDS = 5
+        document_index_setup_success = False
+        for x in range(num_attempts):
+            try:
+                logger.notice(
+                    f"Setting up document index {document_index.__class__.__name__} (attempt {x+1}/{num_attempts})..."
+                )
+                document_index.ensure_indices_exist(
+                    primary_embedding_dim=index_setting.final_embedding_dim,
+                    primary_embedding_precision=index_setting.embedding_precision,
+                    secondary_index_embedding_dim=(
+                        secondary_index_setting.final_embedding_dim
+                        if secondary_index_setting
+                        else None
+                    ),
+                    secondary_index_embedding_precision=(
+                        secondary_index_setting.embedding_precision
+                        if secondary_index_setting
+                        else None
+                    ),
+                )

-    logger.error(
-        f"Vespa setup did not succeed. Attempt limit reached. ({num_attempts})"
-    )
-    return False
+                logger.notice(
+                    f"Document index {document_index.__class__.__name__} setup complete."
+                )
+                document_index_setup_success = True
+                break
+            except Exception:
+                logger.exception(
+                    f"Document index {document_index.__class__.__name__} setup did not succeed. "
+                    "The relevant service may not be ready yet. "
+                    f"Retrying in {WAIT_SECONDS} seconds."
+                )
+                time.sleep(WAIT_SECONDS)
+
+        if not document_index_setup_success:
+            logger.error(
+                f"Document index {document_index.__class__.__name__} setup did not succeed. "
+                f"Attempt limit reached. ({num_attempts})"
+            )
+            return False
+
+    return True


 def setup_postgres(db_session: Session) -> None:
@@ -275,10 +289,6 @@ def setup_postgres(db_session: Session) -> None:
    create_initial_default_connector(db_session)
    associate_default_cc_pair(db_session)

-    # Load input prompts and user folders from YAML
-    logger.notice("Loading input prompts and user folders")
-    load_input_prompts_from_yaml(db_session, INPUT_PROMPT_YAML)
-
    if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
        # Only for dev flows
        logger.notice("Setting up default OpenAI LLM for dev.")
@@ -347,6 +357,8 @@ def setup_multitenant_onyx() -> None:


 def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:
+    # TODO(andrei): We don't yet support OpenSearch for multi-tenant instances
+    # so this function remains unchanged.
    # This is for local testing
    WAIT_SECONDS = 5
    VESPA_ATTEMPTS = 5
--- a/backend/onyx/tools/fake_tools/research_agent.py
+++ b/backend/onyx/tools/fake_tools/research_agent.py
@@ -60,6 +60,7 @@ from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
+from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
 from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
 from onyx.tools.tool_runner import run_tool_calls
 from onyx.tools.utils import generate_tools_description
@@ -431,6 +432,14 @@ def run_research_agent_call(
                        max_concurrent_tools=1,
                        # May be better to not do this step, hard to say, needs to be tested
                        skip_search_query_expansion=False,
+                        url_snippet_map=extract_url_snippet_map(
+                            [
+                                search_doc
+                                for tool_call in state_container.get_tool_calls()
+                                if tool_call.search_docs
+                                for search_doc in tool_call.search_docs
+                            ]
+                        ),
                    )
                    tool_responses = parallel_tool_call_results.tool_responses
                    citation_mapping = (
@@ -465,8 +474,14 @@ def run_research_agent_call(
                            )

                        search_docs = None
+                        displayed_docs = None
                        if isinstance(tool_response.rich_response, SearchDocsResponse):
                            search_docs = tool_response.rich_response.search_docs
+                            displayed_docs = tool_response.rich_response.displayed_docs
+
+                            # Add ALL search docs to state container for DB persistence
+                            if search_docs:
+                                state_container.add_search_docs(search_docs)

                            # This is used for the Open URL reminder in the next cycle
                            # only do this if the web search tool yielded results
@@ -499,7 +514,7 @@ def run_research_agent_call(
                            or most_recent_reasoning,
                            tool_call_arguments=tool_call.tool_args,
                            tool_call_response=tool_response.llm_facing_response,
-                            search_docs=search_docs,
+                            search_docs=displayed_docs or search_docs,
                            generated_images=None,
                        )
                        state_container.add_tool_call(tool_call_info)
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -36,6 +36,15 @@ class ToolCallException(Exception):
        self.llm_facing_message = llm_facing_message


+class ToolExecutionException(Exception):
+    """Exception raise for errors during tool execution."""
+
+    def __init__(self, message: str, emit_error_packet: bool = False):
+        super().__init__(message)
+
+        self.emit_error_packet = emit_error_packet
+
+
 class SearchToolUsage(str, Enum):
    DISABLED = "disabled"
    ENABLED = "enabled"
@@ -142,6 +151,7 @@ class OpenURLToolOverrideKwargs(BaseModel):
    # To know what citation number to start at for constructing the string to the LLM
    starting_citation_num: int
    citation_mapping: dict[str, int]
+    url_snippet_map: dict[str, str]


 # None indicates that the default value should be used
--- a/backend/onyx/tools/tool_constructor.py
+++ b/backend/onyx/tools/tool_constructor.py
@@ -19,7 +19,6 @@ from onyx.db.oauth_config import get_oauth_config
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.tools import get_builtin_tool
 from onyx.document_index.factory import get_default_document_index
-from onyx.document_index.interfaces import DocumentIndex
 from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
 from onyx.llm.interfaces import LLM
 from onyx.llm.interfaces import LLMConfig
@@ -120,18 +119,9 @@ def construct_tools(
    if user and user.oauth_accounts:
        user_oauth_token = user.oauth_accounts[0].access_token

-    document_index_cache: DocumentIndex | None = None
-    search_settings_cache = None
-
-    def _get_document_index() -> DocumentIndex:
-        nonlocal document_index_cache, search_settings_cache
-        if document_index_cache is None:
-            if search_settings_cache is None:
-                search_settings_cache = get_current_search_settings(db_session)
-            document_index_cache = get_default_document_index(
-                search_settings_cache, None
-            )
-        return document_index_cache
+    search_settings = get_current_search_settings(db_session)
+    # This flow is for search so we do not get all indices.
+    document_index = get_default_document_index(search_settings, None)

    added_search_tool = False
    for db_tool_model in persona.tools:
@@ -174,7 +164,7 @@ def construct_tools(
                    user=user,
                    persona=persona,
                    llm=llm,
-                    document_index=_get_document_index(),
+                    document_index=document_index,
                    user_selected_filters=search_tool_config.user_selected_filters,
                    project_id=search_tool_config.project_id,
                    bypass_acl=search_tool_config.bypass_acl,
@@ -228,7 +218,7 @@ def construct_tools(
                        OpenURLTool(
                            tool_id=db_tool_model.id,
                            emitter=emitter,
-                            document_index=_get_document_index(),
+                            document_index=document_index,
                            user=user,
                        )
                    ]
@@ -387,9 +377,6 @@ def construct_tools(
        if not search_tool_config:
            search_tool_config = SearchToolConfig()

-        search_settings = get_current_search_settings(db_session)
-        document_index = get_default_document_index(search_settings, None)
-
        search_tool = SearchTool(
            tool_id=search_tool_db_model.id,
            db_session=db_session,
--- a/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
+++ b/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
@@ -23,6 +23,7 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
 from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
 from onyx.server.query_and_chat.streaming_models import Packet
 from onyx.tools.interface import Tool
+from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
@@ -188,7 +189,9 @@ class ImageGenerationTool(Tool[None]):

        except requests.RequestException as e:
            logger.error(f"Error fetching or converting image: {e}")
-            raise ValueError("Failed to fetch or convert the generated image")
+            raise ToolExecutionException(
+                "Failed to fetch or convert the generated image", emit_error_packet=True
+            )
        except Exception as e:
            logger.debug(f"Error occurred during image generation: {e}")

@@ -198,18 +201,27 @@ class ImageGenerationTool(Tool[None]):
                    "Your request was rejected as a result of our safety system"
                    in error_message
                ):
-                    raise ValueError(
-                        "The image generation request was rejected due to OpenAI's content policy. Please try a different prompt."
+                    raise ToolExecutionException(
+                        (
+                            "The image generation request was rejected due to OpenAI's content policy. "
+                            "Please try a different prompt."
+                        ),
+                        emit_error_packet=True,
                    )
                elif "Invalid image URL" in error_message:
-                    raise ValueError("Invalid image URL provided for image generation.")
+                    raise ToolExecutionException(
+                        "Invalid image URL provided for image generation.",
+                        emit_error_packet=True,
+                    )
                elif "invalid_request_error" in error_message:
-                    raise ValueError(
-                        "Invalid request for image generation. Please check your input."
+                    raise ToolExecutionException(
+                        "Invalid request for image generation. Please check your input.",
+                        emit_error_packet=True,
                    )

-            raise ValueError(
-                "An error occurred during image generation. Please try again later."
+            raise ToolExecutionException(
+                f"An error occurred during image generation. error={error_message}",
+                emit_error_packet=True,
            )

    def run(
--- a/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
+++ b/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
@@ -492,7 +492,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
            indexed_result, crawled_result = run_functions_tuples_in_parallel(
                [
                    (_retrieve_indexed_with_filters, (all_requests,)),
-                    (self._fetch_web_content, (urls,)),
+                    (self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),
                ],
                allow_failures=True,
                timeout=OPEN_URL_TIMEOUT_SECONDS,
@@ -800,7 +800,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
        return merged_sections

    def _fetch_web_content(
-        self, urls: list[str]
+        self, urls: list[str], url_snippet_map: dict[str, str]
    ) -> tuple[list[InferenceSection], list[str]]:
        if not urls:
            return [], []
@@ -831,7 +831,11 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
                and content.full_content
                and not is_insufficient
            ):
-                sections.append(inference_section_from_internet_page_scrape(content))
+                sections.append(
+                    inference_section_from_internet_page_scrape(
+                        content, url_snippet_map.get(content.link, "")
+                    )
+                )
            else:
                # TODO: Slight improvement - if failed URL reasons are passed back to the LLM
                # for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would
--- a/backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py
+++ b/backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py
@@ -0,0 +1,239 @@
+import unicodedata
+
+from pydantic import BaseModel
+from rapidfuzz import fuzz
+from rapidfuzz import utils
+
+from onyx.utils.text_processing import is_zero_width_char
+from onyx.utils.text_processing import normalize_char
+
+
+class SnippetMatchResult(BaseModel):
+    snippet_located: bool
+
+    start_idx: int = -1
+    end_idx: int = -1
+
+
+NegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)
+
+
+def find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:
+    """
+    Finds where the snippet is located in the content.
+
+    Strategy:
+    1. Normalize the snippet & attempt to find it in the content
+    2. Perform a token based fuzzy search for the snippet in the content
+
+    Notes:
+     - If there are multiple matches of snippet, we choose the first normalised occurrence
+    """
+    if not snippet or not content:
+        return NegativeSnippetMatchResult
+
+    result = _normalize_and_match(content, snippet)
+    if result.snippet_located:
+        return result
+
+    result = _token_based_match(content, snippet)
+    if result.snippet_located:
+        return result
+
+    return NegativeSnippetMatchResult
+
+
+def _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:
+    """
+    Normalizes the snippet & content, then performs a direct string match.
+    """
+    normalized_content, content_map = _normalize_text_with_mapping(content)
+    normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)
+
+    if not normalized_content or not normalized_snippet:
+        return NegativeSnippetMatchResult
+
+    pos = normalized_content.find(normalized_snippet)
+    if pos != -1:
+        original_start = content_map[pos]
+
+        # Account for leading characters stripped from snippet during normalization
+        # (e.g., leading punctuation like "[![]![]]" that was removed)
+        if url_snippet_map:
+            first_snippet_orig_pos = url_snippet_map[0]
+            if first_snippet_orig_pos > 0:
+                # There were leading characters stripped from snippet
+                # Extend start position backwards to include them from content
+                original_start = max(original_start - first_snippet_orig_pos, 0)
+
+        # Determine end position, including any trailing characters that were
+        # normalized away (e.g., punctuation)
+        match_end_norm = pos + len(normalized_snippet)
+        if match_end_norm >= len(content_map):
+            # Match extends to end of normalized content - include all trailing chars
+            original_end = len(content) - 1
+        else:
+            # Match is in the middle - end at character before next normalized char
+            original_end = content_map[match_end_norm] - 1
+
+        # Account for trailing characters stripped from snippet during normalization
+        # (e.g., trailing punctuation like "\n[" that was removed)
+        if url_snippet_map:
+            last_snippet_orig_pos = url_snippet_map[-1]
+            trailing_stripped = len(snippet) - last_snippet_orig_pos - 1
+            if trailing_stripped > 0:
+                # Extend end position to include trailing characters from content
+                # that correspond to the stripped trailing snippet characters
+                original_end = min(original_end + trailing_stripped, len(content) - 1)
+
+        return SnippetMatchResult(
+            snippet_located=True,
+            start_idx=original_start,
+            end_idx=original_end,
+        )
+
+    return NegativeSnippetMatchResult
+
+
+def _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:
+    """
+    Text normalization that maintains position mapping.
+
+    Returns:
+        tuple: (normalized_text, position_map)
+        - position_map[i] gives the original position for normalized position i
+    """
+    if not text:
+        return "", []
+
+    original_text = text
+
+    # Step 1: NFC normalization with position mapping
+    nfc_text = unicodedata.normalize("NFC", text)
+
+    # Build mapping from NFC positions to original start positions
+    nfc_to_orig: list[int] = []
+    orig_idx = 0
+    for nfc_char in nfc_text:
+        nfc_to_orig.append(orig_idx)
+        # Find how many original chars contributed to this NFC char
+        for length in range(1, len(original_text) - orig_idx + 1):
+            substr = original_text[orig_idx : orig_idx + length]
+            if unicodedata.normalize("NFC", substr) == nfc_char:
+                orig_idx += length
+                break
+        else:
+            orig_idx += 1  # Fallback
+
+    # Work with NFC text from here
+    text = nfc_text
+
+    html_entities = {
+        "&nbsp;": " ",
+        "&#160;": " ",
+        "&amp;": "&",
+        "&lt;": "<",
+        "&gt;": ">",
+        "&quot;": '"',
+        "&apos;": "'",
+        "&#39;": "'",
+        "&#x27;": "'",
+        "&ndash;": "-",
+        "&mdash;": "-",
+        "&hellip;": "...",
+        "&#xB0;": "°",
+        "&#xBA;": "°",
+        "&zwj;": "",
+    }
+
+    # Sort entities by length (longest first) for greedy matching
+    sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)
+
+    result_chars = []
+    result_map = []
+    i = 0
+    last_was_space = True  # Track to avoid leading spaces
+
+    while i < len(text):
+        # Convert NFC position to original position
+        orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1
+        char = text[i]
+        output = None
+        step = 1
+
+        # Check for HTML entities first (greedy match)
+        for entity in sorted_entities:
+            if text[i : i + len(entity)] == entity:
+                output = html_entities[entity]
+                step = len(entity)
+                break
+
+        # If no entity matched, process single character
+        if output is None:
+            # Skip zero-width characters
+            if is_zero_width_char(char):
+                i += 1
+                continue
+
+            output = normalize_char(char)
+
+        # Add output to result, normalizing each character from entity output
+        if output:
+            for out_char in output:
+                # Normalize entity output the same way as regular chars
+                normalized = normalize_char(out_char)
+
+                # Handle whitespace collapsing
+                if normalized == " ":
+                    if not last_was_space:
+                        result_chars.append(" ")
+                        result_map.append(orig_pos)
+                        last_was_space = True
+                else:
+                    result_chars.append(normalized)
+                    result_map.append(orig_pos)
+                    last_was_space = False
+
+        i += step
+
+    # Remove trailing space if present
+    if result_chars and result_chars[-1] == " ":
+        result_chars.pop()
+        result_map.pop()
+
+    return "".join(result_chars), result_map
+
+
+def _token_based_match(
+    content: str,
+    snippet: str,
+    min_threshold: float = 0.8,
+) -> SnippetMatchResult:
+    """
+    Performs a token based fuzzy search for the snippet in the content.
+
+    min_threshold exists in the range [0, 1]
+    """
+    if not content or not snippet:
+        return NegativeSnippetMatchResult
+
+    res = fuzz.partial_ratio_alignment(
+        content, snippet, processor=utils.default_process
+    )
+
+    if not res:
+        return NegativeSnippetMatchResult
+
+    score = res.score
+
+    if score >= (min_threshold * 100):
+        start_idx = res.src_start
+        end_idx = res.src_end
+
+        return SnippetMatchResult(
+            snippet_located=True,
+            start_idx=start_idx,
+            end_idx=end_idx,
+        )
+
+    return NegativeSnippetMatchResult
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -832,7 +832,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                top_sections=merged_sections,
                citation_start=override_kwargs.starting_citation_num,
                limit=override_kwargs.max_llm_chunks,
-                include_document_id=True,
+                include_document_id=False,
            )

            # End overall timing
@@ -844,12 +844,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                f"document expansion: {document_expansion_elapsed:.3f}s)"
            )

-            # TODO: extension - this can include the smaller set of approved docs to be saved/displayed in the UI
-            # for replaying. Currently the full set is returned and saved.
            return ToolResponse(
                # Typically the rich response will give more docs in case it needs to be displayed in the UI
                rich_response=SearchDocsResponse(
-                    search_docs=search_docs, citation_mapping=citation_mapping
+                    search_docs=search_docs,
+                    citation_mapping=citation_mapping,
+                    displayed_docs=final_ui_docs or None,
                ),
                # The LLM facing response typically includes less docs to cut down on noise and token usage
                llm_facing_response=docs_str,
--- a/backend/onyx/tools/tool_implementations/utils.py
+++ b/backend/onyx/tools/tool_implementations/utils.py
@@ -73,7 +73,7 @@ def convert_inference_sections_to_llm_string(
                link = next(iter(chunk.source_links.values()), None)
            if link:
                result["url"] = link
-        if include_document_id and "url" not in result:
+        if include_document_id:
            result["document_identifier"] = chunk.document_id
        if chunk.metadata:
            result["metadata"] = json.dumps(chunk.metadata)
--- a/backend/onyx/tools/tool_implementations/web_search/utils.py
+++ b/backend/onyx/tools/tool_implementations/web_search/utils.py
@@ -1,11 +1,19 @@
 from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import SearchDoc
 from onyx.tools.tool_implementations.open_url.models import WebContent
+from onyx.tools.tool_implementations.open_url.snippet_matcher import (
+    find_snippet_in_content,
+)
 from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
 from onyx.tools.tool_implementations.web_search.models import WebSearchResult


+TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
+TRUNCATED_CONTENT_PREFIX = "[...truncated] "
+
+
 def filter_web_search_results_with_no_title_or_snippet(
    results: list[WebSearchResult],
 ) -> list[WebSearchResult]:
@@ -26,14 +34,99 @@ def truncate_search_result_content(content: str, max_chars: int = 15000) -> str:
    """Truncate search result content to a maximum number of characters"""
    if len(content) <= max_chars:
        return content
-    return content[:max_chars] + " [...truncated]"
+    return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX
+
+
+def _truncate_content_around_snippet(
+    content: str, snippet: str, max_chars: int = 15000
+) -> str:
+    """
+    Truncates content around snippet with max_chars
+
+    Assumes snippet exists
+    """
+    result = find_snippet_in_content(content, snippet)
+
+    if not result.snippet_located:
+        return ""
+
+    start_idx = result.start_idx
+    end_idx = result.end_idx
+
+    new_start, new_end = _expand_range_centered(
+        start_idx, end_idx + 1, len(content), max_chars
+    )
+
+    truncated_content = content[new_start:new_end]
+
+    # Add the AFFIX to the start and end of truncated content
+    if new_start > 0:
+        truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content
+
+    if new_end < len(content):
+        truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX
+
+    return truncated_content
+
+
+def _expand_range_centered(
+    start_idx: int, end_idx: int, N: int, target_size: int
+) -> tuple[int, int]:
+    """
+    Expands a range [start_idx, end_idx) to be centered within a list of size N
+
+    Args:
+        start_idx: Starting index (inclusive)
+        end_idx: Ending index (exclusive)
+        N: Size of the list
+        target_size: Target size of the range
+
+    Returns:
+        Tuple of (new start index, new end index)
+    """
+    current_size = end_idx - start_idx
+
+    if current_size >= target_size:
+        return start_idx, end_idx
+
+    padding_needed = target_size - current_size
+    padding_top = padding_needed // 2
+    padding_bottom = padding_needed - padding_top
+
+    # Try expand symmetrically
+    new_start = start_idx - padding_top
+    new_end = end_idx + padding_bottom
+
+    # Handle overflow
+    if new_start < 0:
+        overflow = -new_start
+        new_start = 0
+        new_end = min(N, new_end + overflow)
+
+    if new_end > N:
+        overflow = new_end - N
+        new_end = N
+        new_start = max(0, new_start - overflow)
+
+    return new_start, new_end


 def inference_section_from_internet_page_scrape(
    result: WebContent,
+    snippet: str,
    rank: int = 0,
 ) -> InferenceSection:
-    truncated_content = truncate_search_result_content(result.full_content)
+    # truncate the content around snippet if snippet exists
+    truncated_content = ""
+    if snippet:
+        truncated_content = _truncate_content_around_snippet(
+            result.full_content, snippet
+        )
+
+    # Fallback if no snippet exists or we failed to find it
+    if not truncated_content:
+        truncated_content = truncate_search_result_content(result.full_content)
+
    # Calculate score using reciprocal rank to preserve ordering
    score = 1.0 / (rank + 1)

@@ -97,3 +190,14 @@ def inference_section_from_internet_search_result(
        chunks=[chunk],
        combined_content=result.snippet,
    )
+
+
+def extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:
+    """
+    Given a list of SearchDocs, this will extract the url -> summary map.
+    """
+    url_snippet_map: dict[str, str] = {}
+    for document in documents:
+        if document.source_type == DocumentSource.WEB and document.link:
+            url_snippet_map[document.link] = document.blurb
+    return url_snippet_map
--- a/backend/onyx/tools/tool_runner.py
+++ b/backend/onyx/tools/tool_runner.py
@@ -7,6 +7,7 @@ from onyx.chat.models import ChatMessageSimple
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDocsResponse
 from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import PacketException
 from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.tools.interface import Tool
 from onyx.tools.models import ChatMinimalTextMessage
@@ -15,6 +16,7 @@ from onyx.tools.models import ParallelToolCallResponse
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolCallKickoff
+from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
 from onyx.tools.models import WebSearchToolOverrideKwargs
 from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
@@ -152,6 +154,33 @@ def _safe_run_single_tool(
                    },
                )
            )
+        except ToolExecutionException as e:
+            # Unexpected error during tool execution
+            logger.error(f"Unexpected error running tool {tool.name}: {e}")
+            tool_response = ToolResponse(
+                rich_response=None,
+                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
+            )
+            _error_tracing.attach_error_to_current_span(
+                SpanError(
+                    message="Tool execution error (unexpected)",
+                    data={
+                        "tool_name": tool.name,
+                        "tool_call_id": tool_call.tool_call_id,
+                        "tool_args": tool_call.tool_args,
+                        "error": str(e),
+                        "stack_trace": traceback.format_exc(),
+                        "error_type": type(e).__name__,
+                    },
+                )
+            )
+            if e.emit_error_packet:
+                tool.emitter.emit(
+                    Packet(
+                        placement=tool_call.placement,
+                        obj=PacketException(exception=e),
+                    )
+                )
        except Exception as e:
            # Unexpected error during tool execution
            logger.error(f"Unexpected error running tool {tool.name}: {e}")
@@ -200,6 +229,8 @@ def run_tool_calls(
    max_concurrent_tools: int | None = None,
    # Skip query expansion for repeat search tool calls
    skip_search_query_expansion: bool = False,
+    # A map of url -> summary for passing web results to open url tool
+    url_snippet_map: dict[str, str] = {},
 ) -> ParallelToolCallResponse:
    """Run (optionally merged) tool calls in parallel and update citation mappings.

@@ -330,6 +361,7 @@ def run_tool_calls(
            override_kwargs = OpenURLToolOverrideKwargs(
                starting_citation_num=starting_citation_num,
                citation_mapping=url_to_citation,
+                url_snippet_map=url_snippet_map,
            )
            starting_citation_num += 100

--- a/backend/onyx/utils/text_processing.py
+++ b/backend/onyx/utils/text_processing.py
@@ -9,6 +9,36 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger(__name__)

+# Mapping of curly/smart quotes to straight quotes
+CURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {
+    "\u2019": "'",  # Right single quotation mark
+    "\u2018": "'",  # Left single quotation mark
+    "\u201c": '"',  # Left double quotation mark
+    "\u201d": '"',  # Right double quotation mark
+}
+
+# Zero-width characters that should typically be removed during text normalization
+ZERO_WIDTH_CHARS: set[str] = {
+    "\u200b",  # Zero-width space
+    "\u200c",  # Zero-width non-joiner
+    "\u200d",  # Zero-width joiner
+    "\ufeff",  # Byte order mark / zero-width no-break space
+    "\u2060",  # Word joiner
+}
+
+
+def normalize_curly_quotes(text: str) -> str:
+    """Convert curly/smart quotes to straight quotes."""
+    for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():
+        text = text.replace(curly, straight)
+    return text
+
+
+def is_zero_width_char(c: str) -> bool:
+    """Check if a character is a zero-width character."""
+    return c in ZERO_WIDTH_CHARS
+
+
 ESCAPE_SEQUENCE_RE = re.compile(
    r"""
    ( \\U........      # 8-digit hex escapes
@@ -257,3 +287,15 @@ def remove_invalid_unicode_chars(text: str) -> str:
    - Unicode non-characters
    """
    return _INVALID_UNICODE_CHARS_RE.sub("", text)
+
+
+def normalize_char(c: str) -> str:
+    """Normalize a single character (curly quotes, whitespace, punctuation)."""
+    if c in CURLY_TO_STRAIGHT_QUOTES:
+        c = CURLY_TO_STRAIGHT_QUOTES[c]
+    if c.isspace():
+        return " "
+    elif re.match(r"[^\w\s\']", c):
+        return " "
+    else:
+        return c.lower()
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -255,11 +255,11 @@ fastapi==0.116.1
    #   onyx
 fastapi-limiter==0.1.6
    # via onyx
-fastapi-users==14.0.1
+fastapi-users==15.0.2
    # via
    #   fastapi-users-db-sqlalchemy
    #   onyx
-fastapi-users-db-sqlalchemy==5.0.0
+fastapi-users-db-sqlalchemy==7.0.0
    # via onyx
 fastavro==1.12.1
    # via cohere
@@ -608,9 +608,7 @@ mypy-extensions==1.0.0
 nest-asyncio==1.6.0
    # via onyx
 nltk==3.9.1
-    # via
-    #   onyx
-    #   unstructured
+    # via unstructured
 numpy==2.4.1
    # via
    #   magika
@@ -784,7 +782,7 @@ psycopg2-binary==2.9.9
    # via onyx
 puremagic==1.28
    # via onyx
-pwdlib==0.2.1
+pwdlib==0.3.0
    # via fastapi-users
 py==1.11.0
    # via retry
@@ -904,7 +902,7 @@ python-json-logger==4.0.0
    # via pydocket
 python-magic==0.4.27
    # via unstructured
-python-multipart==0.0.20
+python-multipart==0.0.21
    # via
    #   fastapi-users
    #   mcp
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -298,7 +298,7 @@ numpy==2.4.1
    #   pandas-stubs
    #   shapely
    #   voyageai
-onyx-devtools==0.3.2
+onyx-devtools==0.4.0
    # via onyx
 openai==2.14.0
    # via
--- a/backend/scripts/force_delete_connector_by_id.py
+++ b/backend/scripts/force_delete_connector_by_id.py
@@ -45,7 +45,9 @@ from onyx.db.connector_credential_pair import (
    get_connector_credential_pair,
 )
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import (
+    get_all_document_indices,
+)
 from onyx.file_store.file_store import get_default_file_store

 # pylint: enable=E402
@@ -59,7 +61,7 @@ _DELETION_BATCH_SIZE = 1000

 def _unsafe_deletion(
    db_session: Session,
-    document_index: DocumentIndex,
+    document_indices: list[DocumentIndex],
    cc_pair: ConnectorCredentialPair,
    pair_id: int,
 ) -> int:
@@ -80,11 +82,12 @@ def _unsafe_deletion(
            break

        for document in documents:
-            document_index.delete_single(
-                doc_id=document.id,
-                tenant_id=POSTGRES_DEFAULT_SCHEMA,
-                chunk_count=document.chunk_count,
-            )
+            for document_index in document_indices:
+                document_index.delete_single(
+                    doc_id=document.id,
+                    tenant_id=POSTGRES_DEFAULT_SCHEMA,
+                    chunk_count=document.chunk_count,
+                )

        delete_documents_complete__no_commit(
            db_session=db_session,
@@ -211,14 +214,16 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
    try:
        logger.notice("Deleting information from Vespa and Postgres")
        active_search_settings = get_active_search_settings(db_session)
-        document_index = get_default_document_index(
+        # This flow is for deletion so we get all indices.
+        document_indices = get_all_document_indices(
            active_search_settings.primary,
            active_search_settings.secondary,
+            None,
        )

        files_deleted_count = _unsafe_deletion(
            db_session=db_session,
-            document_index=document_index,
+            document_indices=document_indices,
            cc_pair=cc_pair,
            pair_id=cc_pair_id,
        )
--- a/backend/scripts/restart_opensearch_container.sh
+++ b/backend/scripts/restart_opensearch_container.sh
@@ -3,28 +3,8 @@
 # We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
 source "$(dirname "$0")/../../.vscode/.env"

-OPENSEARCH_CONTAINER_NAME="onyx-opensearch"
-OPENSEARCH_IMAGE="opensearchproject/opensearch:3.4.0"
-# First check the env for OPENSEARCH_REST_API_PORT, else hardcode to 9200.
-OPENSEARCH_REST_API_PORT=${OPENSEARCH_REST_API_PORT:-9200}
-OPENSEARCH_PERFORMANCE_ANALYZER_PORT=9600
+cd "$(dirname "$0")/../../deployment/docker_compose"

-function stop_and_remove_opensearch_container() {
-  echo "Stopping and removing the existing OpenSearch container..."
-  docker stop "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
-  docker rm "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
-}
-
-# Set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file.
-if [ -z "$OPENSEARCH_ADMIN_PASSWORD" ]; then
-  echo "Error: OPENSEARCH_ADMIN_PASSWORD environment variable is not set." >&2
-  echo "Please set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file." >&2
-  exit 1
-fi
-
-# Stop and remove the existing container.
-stop_and_remove_opensearch_container
-
-# Start the OpenSearch container.
-echo "Starting OpenSearch container..."
-docker run --detach --name "$OPENSEARCH_CONTAINER_NAME" --publish "$OPENSEARCH_REST_API_PORT:9200" --publish "$OPENSEARCH_PERFORMANCE_ANALYZER_PORT:9600" -e "discovery.type=single-node" -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_ADMIN_PASSWORD" "$OPENSEARCH_IMAGE"
+# Start OpenSearch.
+echo "Forcefully starting fresh OpenSearch container..."
+docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch
--- a/backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_index_name.py
+++ b/backend/scripts/tenant_cleanup/on_pod_scripts/get_tenant_index_name.py
@@ -13,8 +13,6 @@ import sys
 from onyx.db.engine.sql_engine import get_session_with_tenant
 from onyx.db.engine.sql_engine import SqlEngine
 from onyx.db.search_settings import get_current_search_settings
-from onyx.db.search_settings import get_secondary_search_settings
-from onyx.document_index.factory import get_default_document_index


 def get_tenant_index_name(tenant_id: str) -> dict[str, str]:
@@ -26,14 +24,7 @@ def get_tenant_index_name(tenant_id: str) -> dict[str, str]:
    try:
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            search_settings = get_current_search_settings(db_session)
-            secondary_search_settings = get_secondary_search_settings(db_session)
-
-            document_index = get_default_document_index(
-                search_settings=search_settings,
-                secondary_search_settings=secondary_search_settings,
-            )
-            index_name = document_index.index_name
-
+            index_name = search_settings.index_name
            print(f"Found index name: {index_name}", file=sys.stderr)
            return {"status": "success", "index_name": index_name}

--- a/backend/tests/external_dependency_unit/answer/conftest.py
+++ b/backend/tests/external_dependency_unit/answer/conftest.py
@@ -14,6 +14,10 @@ from onyx.llm.constants import LlmProviderNames
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest


+# Counter for generating unique file IDs in mock file store
+_mock_file_id_counter = 0
+
+
 def ensure_default_llm_provider(db_session: Session) -> None:
    """Ensure a default LLM provider exists for tests that exercise chat flows."""

@@ -80,11 +84,34 @@ def mock_vespa_query() -> Iterator[None]:
        yield


+@pytest.fixture
+def mock_file_store() -> Iterator[None]:
+    """Mock the file store to avoid S3/storage dependencies in tests."""
+    global _mock_file_id_counter
+
+    def _mock_save_file(*args: Any, **kwargs: Any) -> str:
+        global _mock_file_id_counter
+        _mock_file_id_counter += 1
+        # Return a predictable file ID for tests
+        return "123"
+
+    mock_store = MagicMock()
+    mock_store.save_file.side_effect = _mock_save_file
+    mock_store.initialize.return_value = None
+
+    with patch(
+        "onyx.file_store.utils.get_default_file_store",
+        return_value=mock_store,
+    ):
+        yield
+
+
@pytest.fixture
 def mock_external_deps(
    mock_nlp_embeddings_post: None,
    mock_gpu_status: None,
    mock_vespa_query: None,
+    mock_file_store: None,
 ) -> Iterator[None]:
    """Convenience fixture to enable all common external dependency mocks."""
    yield
--- a/backend/tests/external_dependency_unit/answer/stream_test_assertions.py
+++ b/backend/tests/external_dependency_unit/answer/stream_test_assertions.py
@@ -0,0 +1,156 @@
+from __future__ import annotations
+
+from typing import cast
+
+from onyx.chat.models import AnswerStreamPart
+from onyx.chat.models import CreateChatSessionID
+from onyx.chat.models import MessageResponseIDInfo
+from onyx.context.search.models import SearchDoc
+from onyx.server.query_and_chat.streaming_models import AgentResponseStart
+from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
+from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
+from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
+
+
+def assert_answer_stream_part_correct(
+    received: AnswerStreamPart, expected: AnswerStreamPart
+) -> None:
+    assert isinstance(received, type(expected))
+
+    if isinstance(received, Packet):
+        r_packet = cast(Packet, received)
+        e_packet = cast(Packet, expected)
+
+        assert r_packet.placement == e_packet.placement
+
+        if isinstance(r_packet.obj, SearchToolDocumentsDelta):
+            assert isinstance(e_packet.obj, SearchToolDocumentsDelta)
+            assert is_search_tool_document_delta_equal(r_packet.obj, e_packet.obj)
+            return
+        elif isinstance(r_packet.obj, OpenUrlDocuments):
+            assert isinstance(e_packet.obj, OpenUrlDocuments)
+            assert is_open_url_documents_equal(r_packet.obj, e_packet.obj)
+            return
+        elif isinstance(r_packet.obj, AgentResponseStart):
+            assert isinstance(e_packet.obj, AgentResponseStart)
+            assert is_agent_response_start_equal(r_packet.obj, e_packet.obj)
+            return
+        elif isinstance(r_packet.obj, ImageGenerationFinal):
+            assert isinstance(e_packet.obj, ImageGenerationFinal)
+            assert is_image_generation_final_equal(r_packet.obj, e_packet.obj)
+            return
+
+        assert r_packet.obj == e_packet.obj
+    elif isinstance(received, MessageResponseIDInfo):
+        # We're not going to make assumptions about what the user id / assistant id should be
+        # So just return
+        return
+    elif isinstance(received, CreateChatSessionID):
+        # Don't worry about same session ids
+        return
+    else:
+        raise NotImplementedError("Not implemented")
+
+
+def _are_search_docs_equal(
+    received: list[SearchDoc],
+    expected: list[SearchDoc],
+) -> bool:
+    """
+    What we care about:
+     - All documents are present (order does not)
+     - Expected document_id, link, blurb, source_type and hidden
+    """
+    if len(received) != len(expected):
+        return False
+
+    received.sort(key=lambda x: x.document_id)
+    expected.sort(key=lambda x: x.document_id)
+
+    for received_document, expected_document in zip(received, expected):
+        if received_document.document_id != expected_document.document_id:
+            return False
+        if received_document.link != expected_document.link:
+            return False
+        if received_document.blurb != expected_document.blurb:
+            return False
+        if received_document.source_type != expected_document.source_type:
+            return False
+        if received_document.hidden != expected_document.hidden:
+            return False
+    return True
+
+
+def is_search_tool_document_delta_equal(
+    received: SearchToolDocumentsDelta,
+    expected: SearchToolDocumentsDelta,
+) -> bool:
+    """
+    What we care about:
+     - All documents are present (order does not)
+     - Expected document_id, link, blurb, source_type and hidden
+    """
+    received_documents = received.documents
+    expected_documents = expected.documents
+
+    return _are_search_docs_equal(received_documents, expected_documents)
+
+
+def is_open_url_documents_equal(
+    received: OpenUrlDocuments,
+    expected: OpenUrlDocuments,
+) -> bool:
+    """
+    What we care about:
+     - All documents are present (order does not)
+     - Expected document_id, link, blurb, source_type and hidden
+    """
+    received_documents = received.documents
+    expected_documents = expected.documents
+
+    return _are_search_docs_equal(received_documents, expected_documents)
+
+
+def is_agent_response_start_equal(
+    received: AgentResponseStart,
+    expected: AgentResponseStart,
+) -> bool:
+    """
+    What we care about:
+     - All documents are present (order does not)
+     - Expected document_id, link, blurb, source_type and hidden
+    """
+    received_documents = received.final_documents
+    expected_documents = expected.final_documents
+
+    if received_documents is None and expected_documents is None:
+        return True
+    if not received_documents or not expected_documents:
+        return False
+
+    return _are_search_docs_equal(received_documents, expected_documents)
+
+
+def is_image_generation_final_equal(
+    received: ImageGenerationFinal,
+    expected: ImageGenerationFinal,
+) -> bool:
+    """
+    What we care about:
+     - Number of images are the same
+     - On each image, url and file_id are aligned such that url=/api/chat/file/{file_id}
+     - Revised prompt is expected
+     - Shape is expected
+    """
+    if len(received.images) != len(expected.images):
+        return False
+
+    for received_image, expected_image in zip(received.images, expected.images):
+        if received_image.url != f"/api/chat/file/{received_image.file_id}":
+            return False
+        if received_image.revised_prompt != expected_image.revised_prompt:
+            return False
+        if received_image.shape != expected_image.shape:
+            return False
+    return True
--- a/backend/tests/external_dependency_unit/answer/stream_test_builder.py
+++ b/backend/tests/external_dependency_unit/answer/stream_test_builder.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from onyx.chat.models import AnswerStreamPart
+from onyx.context.search.models import SearchDoc
+from onyx.server.query_and_chat.streaming_models import AgentResponseStart
+from onyx.server.query_and_chat.streaming_models import OverallStop
+from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import ReasoningDone
+from onyx.server.query_and_chat.streaming_models import ReasoningStart
+from tests.external_dependency_unit.answer.stream_test_assertions import (
+    assert_answer_stream_part_correct,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    create_packet_with_agent_response_delta,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    create_packet_with_reasoning_delta,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import create_placement
+from tests.external_dependency_unit.mock_llm import LLMResponse
+from tests.external_dependency_unit.mock_llm import MockLLMController
+
+
+class StreamTestBuilder:
+    def __init__(self, llm_controller: MockLLMController) -> None:
+        self._llm_controller = llm_controller
+
+        # List of (expected_packet, forward_count) tuples
+        self._expected_packets_queue: list[tuple[Packet, int]] = []
+
+    def add_response(self, response: LLMResponse) -> StreamTestBuilder:
+        self._llm_controller.add_response(response)
+
+        return self
+
+    def add_responses_together(self, *responses: LLMResponse) -> StreamTestBuilder:
+        """Add multiple responses that should be emitted together in the same tick."""
+        self._llm_controller.add_responses_together(*responses)
+
+        return self
+
+    def expect(
+        self, expected_pkt: Packet, forward: int | bool = True
+    ) -> StreamTestBuilder:
+        """
+        Add an expected packet to the queue.
+
+        Args:
+            expected_pkt: The packet to expect
+            forward: Number of tokens to forward before expecting this packet.
+                     True = 1 token, False = 0 tokens, int = that many tokens.
+        """
+        forward_count = 1 if forward is True else (0 if forward is False else forward)
+        self._expected_packets_queue.append((expected_pkt, forward_count))
+
+        return self
+
+    def expect_packets(
+        self, packets: list[Packet], forward: int | bool = True
+    ) -> StreamTestBuilder:
+        """
+        Add multiple expected packets to the queue.
+
+        Args:
+            packets: List of packets to expect
+            forward: Number of tokens to forward before expecting EACH packet.
+                     True = 1 token per packet, False = 0 tokens, int = that many tokens per packet.
+        """
+        forward_count = 1 if forward is True else (0 if forward is False else forward)
+        for pkt in packets:
+            self._expected_packets_queue.append((pkt, forward_count))
+
+        return self
+
+    def expect_reasoning(
+        self,
+        reasoning_tokens: list[str],
+        turn_index: int,
+    ) -> StreamTestBuilder:
+        return (
+            self.expect(
+                Packet(
+                    placement=create_placement(turn_index),
+                    obj=ReasoningStart(),
+                )
+            )
+            .expect_packets(
+                [
+                    create_packet_with_reasoning_delta(token, turn_index)
+                    for token in reasoning_tokens
+                ]
+            )
+            .expect(
+                Packet(
+                    placement=create_placement(turn_index),
+                    obj=ReasoningDone(),
+                )
+            )
+        )
+
+    def expect_agent_response(
+        self,
+        answer_tokens: list[str],
+        turn_index: int,
+        final_documents: list[SearchDoc] | None = None,
+    ) -> StreamTestBuilder:
+        return (
+            self.expect(
+                Packet(
+                    placement=create_placement(turn_index),
+                    obj=AgentResponseStart(
+                        final_documents=final_documents,
+                    ),
+                )
+            )
+            .expect_packets(
+                [
+                    create_packet_with_agent_response_delta(token, turn_index)
+                    for token in answer_tokens
+                ]
+            )
+            .expect(
+                Packet(
+                    placement=create_placement(turn_index),
+                    obj=OverallStop(),
+                )
+            )
+        )
+
+    def run_and_validate(self, stream: Iterator[AnswerStreamPart]) -> None:
+        while self._expected_packets_queue:
+            expected_pkt, forward_count = self._expected_packets_queue.pop(0)
+            if forward_count > 0:
+                self._llm_controller.forward(forward_count)
+            received_pkt = next(stream)
+
+            assert_answer_stream_part_correct(received_pkt, expected_pkt)
--- a/backend/tests/external_dependency_unit/answer/stream_test_utils.py
+++ b/backend/tests/external_dependency_unit/answer/stream_test_utils.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
+from uuid import UUID
+
+from sqlalchemy.orm import Session
+
+from onyx.chat.chat_utils import create_chat_session_from_request
+from onyx.chat.models import AnswerStreamPart
+from onyx.chat.process_message import handle_stream_message_objects
+from onyx.configs.constants import DocumentSource
+from onyx.context.search.models import SearchDoc
+from onyx.db.models import ChatSession
+from onyx.db.models import User
+from onyx.server.query_and_chat.models import ChatSessionCreationRequest
+from onyx.server.query_and_chat.models import SendMessageRequest
+from onyx.server.query_and_chat.placement import Placement
+from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
+from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import ReasoningDelta
+from tests.external_dependency_unit.mock_content_provider import MockWebContent
+from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult
+
+
+def create_placement(
+    turn_index: int,
+    tab_index: int = 0,
+    sub_turn_index: int | None = None,
+) -> Placement:
+    return Placement(
+        turn_index=turn_index,
+        tab_index=tab_index,
+        sub_turn_index=sub_turn_index,
+    )
+
+
+def submit_query(
+    query: str, chat_session_id: UUID | None, db_session: Session, user: User
+) -> Iterator[AnswerStreamPart]:
+    request = SendMessageRequest(
+        message=query,
+        chat_session_id=chat_session_id,
+        stream=True,
+        chat_session_info=(
+            ChatSessionCreationRequest() if chat_session_id is None else None
+        ),
+    )
+
+    return handle_stream_message_objects(
+        new_msg_req=request,
+        user=user,
+        db_session=db_session,
+    )
+
+
+def create_chat_session(
+    db_session: Session,
+    user: User,
+) -> ChatSession:
+    return create_chat_session_from_request(
+        chat_session_request=ChatSessionCreationRequest(),
+        user_id=user.id,
+        db_session=db_session,
+    )
+
+
+def create_packet_with_agent_response_delta(token: str, turn_index: int) -> Packet:
+    return Packet(
+        placement=create_placement(turn_index),
+        obj=AgentResponseDelta(
+            content=token,
+        ),
+    )
+
+
+def create_packet_with_reasoning_delta(token: str, turn_index: int) -> Packet:
+    return Packet(
+        placement=create_placement(turn_index),
+        obj=ReasoningDelta(
+            reasoning=token,
+        ),
+    )
+
+
+def create_web_search_doc(
+    semantic_identifier: str,
+    link: str,
+    blurb: str,
+) -> SearchDoc:
+    return SearchDoc(
+        document_id=f"WEB_SEARCH_DOC_{link}",
+        chunk_ind=0,
+        semantic_identifier=semantic_identifier,
+        link=link,
+        blurb=blurb,
+        source_type=DocumentSource.WEB,
+        boost=1,
+        hidden=False,
+        metadata={},
+        match_highlights=[],
+    )
+
+
+def mock_web_search_result_to_search_doc(result: MockWebSearchResult) -> SearchDoc:
+    return create_web_search_doc(
+        semantic_identifier=result.title,
+        link=result.link,
+        blurb=result.snippet,
+    )
+
+
+def mock_web_content_to_search_doc(content: MockWebContent) -> SearchDoc:
+    return create_web_search_doc(
+        semantic_identifier=content.title,
+        link=content.url,
+        blurb=content.title,
+    )
+
+
+def tokenise(text: str) -> list[str]:
+    return [(token + " ") for token in text.split(" ")]
--- a/backend/tests/external_dependency_unit/answer/test_stream_chat_message.py
+++ b/backend/tests/external_dependency_unit/answer/test_stream_chat_message.py
@@ -0,0 +1,982 @@
+from __future__ import annotations
+
+import json
+from uuid import UUID
+
+import pytest
+from sqlalchemy.orm import Session
+
+from onyx.chat.models import CreateChatSessionID
+from onyx.chat.models import MessageResponseIDInfo
+from onyx.configs.constants import DocumentSource
+from onyx.server.query_and_chat.streaming_models import AgentResponseStart
+from onyx.server.query_and_chat.streaming_models import GeneratedImage
+from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
+from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat
+from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
+from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
+from onyx.server.query_and_chat.streaming_models import OpenUrlStart
+from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
+from onyx.server.query_and_chat.streaming_models import OverallStop
+from onyx.server.query_and_chat.streaming_models import Packet
+from onyx.server.query_and_chat.streaming_models import ReasoningDone
+from onyx.server.query_and_chat.streaming_models import ReasoningStart
+from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
+from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
+from onyx.server.query_and_chat.streaming_models import SearchToolStart
+from onyx.server.query_and_chat.streaming_models import SectionEnd
+from onyx.server.query_and_chat.streaming_models import TopLevelBranching
+from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
+from tests.external_dependency_unit.answer.stream_test_assertions import (
+    assert_answer_stream_part_correct,
+)
+from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
+from tests.external_dependency_unit.answer.stream_test_utils import create_chat_session
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    create_packet_with_agent_response_delta,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    create_packet_with_reasoning_delta,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import create_placement
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    mock_web_content_to_search_doc,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import (
+    mock_web_search_result_to_search_doc,
+)
+from tests.external_dependency_unit.answer.stream_test_utils import submit_query
+from tests.external_dependency_unit.answer.stream_test_utils import tokenise
+from tests.external_dependency_unit.conftest import create_test_user
+from tests.external_dependency_unit.mock_content_provider import MockWebContent
+from tests.external_dependency_unit.mock_content_provider import (
+    use_mock_content_provider,
+)
+from tests.external_dependency_unit.mock_image_provider import (
+    use_mock_image_generation_provider,
+)
+from tests.external_dependency_unit.mock_llm import LLMAnswerResponse
+from tests.external_dependency_unit.mock_llm import LLMReasoningResponse
+from tests.external_dependency_unit.mock_llm import LLMToolCallResponse
+from tests.external_dependency_unit.mock_llm import use_mock_llm
+from tests.external_dependency_unit.mock_search_pipeline import MockInternalSearchResult
+from tests.external_dependency_unit.mock_search_pipeline import use_mock_search_pipeline
+from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult
+from tests.external_dependency_unit.mock_search_provider import use_mock_web_provider
+
+
+def test_stream_chat_with_answer(
+    db_session: Session,
+    full_deployment_setup: None,
+    mock_external_deps: None,
+) -> None:
+    """Test that the stream chat with answer endpoint returns a valid answer."""
+    ensure_default_llm_provider(db_session)
+    test_user = create_test_user(
+        db_session, email_prefix="test_stream_chat_with_answer"
+    )
+
+    query = "What is the capital of France?"
+    answer = "The capital of France is Paris."
+
+    answer_tokens = tokenise(answer)
+
+    with use_mock_llm() as mock_llm:
+        handler = StreamTestBuilder(llm_controller=mock_llm)
+
+        handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens))
+
+        chat_session = create_chat_session(db_session=db_session, user=test_user)
+
+        answer_stream = submit_query(
+            query=query,
+            chat_session_id=chat_session.id,
+            db_session=db_session,
+            user=test_user,
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=MessageResponseIDInfo(
+                user_message_id=1,
+                reserved_assistant_message_id=1,
+            ),
+        )
+
+        handler.expect_agent_response(
+            answer_tokens=answer_tokens,
+            turn_index=0,
+        ).run_and_validate(stream=answer_stream)
+
+        with pytest.raises(StopIteration):
+            next(answer_stream)
+
+
+def test_stream_chat_with_answer_create_chat(
+    db_session: Session,
+    full_deployment_setup: None,
+    mock_external_deps: None,
+) -> None:
+    ensure_default_llm_provider(db_session)
+    test_user = create_test_user(
+        db_session, email_prefix="test_stream_chat_with_answer_create_chat"
+    )
+
+    query = "Hi there friends"
+    answer = "Hello friend"
+
+    tokens = [answer]
+
+    with use_mock_llm() as mock_llm:
+        handler = StreamTestBuilder(llm_controller=mock_llm)
+
+        handler.add_response(LLMAnswerResponse(answer_tokens=tokens))
+
+        answer_stream = submit_query(
+            query=query,
+            chat_session_id=None,
+            db_session=db_session,
+            user=test_user,
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=CreateChatSessionID(
+                chat_session_id=UUID("123e4567-e89b-12d3-a456-426614174000")
+            ),
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=MessageResponseIDInfo(
+                user_message_id=1,
+                reserved_assistant_message_id=2,
+            ),
+        )
+
+        handler.expect_agent_response(
+            answer_tokens=tokens,
+            turn_index=0,
+        ).run_and_validate(stream=answer_stream)
+
+        with pytest.raises(StopIteration):
+            next(answer_stream)
+
+
+def test_stream_chat_with_search_and_openurl_tools(
+    db_session: Session,
+    full_deployment_setup: None,
+    mock_external_deps: None,
+) -> None:
+    ensure_default_llm_provider(db_session)
+    test_user = create_test_user(
+        db_session, email_prefix="test_stream_chat_with_search_tool"
+    )
+
+    QUERY = "What is the weather in Sydney?"
+
+    REASONING_RESPONSE_1 = (
+        "I need to perform a web search to get current weather details. "
+        "I can use the search tool to do this."
+    )
+
+    WEB_QUERY_1 = "weather in sydney"
+    WEB_QUERY_2 = "current weather in sydney"
+
+    RESULTS1 = [
+        MockWebSearchResult(
+            title="Official Weather",
+            link="www.weather.com.au",
+            snippet="The current weather in Sydney is 20 degrees Celsius.",
+        ),
+        MockWebSearchResult(
+            title="Weather CHannel",
+            link="www.wc.com.au",
+            snippet="Morning is 10 degree Celsius, afternoon is 25 degrees Celsius.",
+        ),
+    ]
+
+    RESULTS2 = [
+        MockWebSearchResult(
+            title="Weather Now!",
+            link="www.weathernow.com.au",
+            snippet="The weather right now is sunny with a temperature of 22 degrees Celsius.",
+        )
+    ]
+
+    REASONING_RESPONSE_2 = "I like weathernow and the official weather site"
+
+    QUERY_URLS_1 = ["www.weathernow.com.au", "www.weather.com.au"]
+
+    CONTENT1 = [
+        MockWebContent(
+            title="Weather Now!",
+            url="www.weathernow.com.au",
+            content="The weather right now is sunny with a temperature of 22 degrees Celsius.",
+        ),
+        MockWebContent(
+            title="Weather Official",
+            url="www.weather.com.au",
+            content="The current weather in Sydney is 20 degrees Celsius.",
+        ),
+    ]
+
+    REASONING_RESPONSE_3 = (
+        "I now know everything that I need to know. " "I can now answer the question."
+    )
+
+    ANSWER_RESPONSE_1 = (
+        "The weather in Sydney is sunny with a temperature of 22 degrees celsius."
+    )
+
+    with (
+        use_mock_llm() as mock_llm,
+        use_mock_web_provider(db_session) as mock_web,
+        use_mock_content_provider() as mock_content,
+    ):
+        handler = StreamTestBuilder(
+            llm_controller=mock_llm,
+        )
+
+        chat_session = create_chat_session(db_session=db_session, user=test_user)
+
+        answer_stream = submit_query(
+            query=QUERY,
+            chat_session_id=chat_session.id,
+            db_session=db_session,
+            user=test_user,
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=MessageResponseIDInfo(
+                user_message_id=1,
+                reserved_assistant_message_id=1,
+            ),
+        )
+
+        # LLM Stream Response 1
+        mock_web.add_results(WEB_QUERY_1, RESULTS1)
+        mock_web.add_results(WEB_QUERY_2, RESULTS2)
+
+        handler.add_response(
+            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_1))
+        ).add_response(
+            LLMToolCallResponse(
+                tool_name="web_search",
+                tool_call_id="123",
+                tool_call_argument_tokens=[
+                    json.dumps({"queries": [WEB_QUERY_1, WEB_QUERY_2]})
+                ],
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(0),
+                obj=ReasoningStart(),
+            )
+        ).expect_packets(
+            [
+                create_packet_with_reasoning_delta(token, 0)
+                for token in tokenise(REASONING_RESPONSE_1)
+            ]
+        ).expect(
+            Packet(placement=create_placement(0), obj=ReasoningDone())
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=SearchToolStart(
+                    is_internet_search=True,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=SearchToolQueriesDelta(
+                    queries=[WEB_QUERY_1, WEB_QUERY_2],
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=SearchToolDocumentsDelta(
+                    documents=[
+                        mock_web_search_result_to_search_doc(result)
+                        for result in RESULTS1
+                    ]
+                    + [
+                        mock_web_search_result_to_search_doc(result)
+                        for result in RESULTS2
+                    ]
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=SectionEnd(),
+            )
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 2
+        for content in CONTENT1:
+            mock_content.add_content(content)
+
+        handler.add_response(
+            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_2))
+        ).add_response(
+            LLMToolCallResponse(
+                tool_name="open_url",
+                tool_call_id="123",
+                tool_call_argument_tokens=[json.dumps({"urls": QUERY_URLS_1})],
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(2),
+                obj=ReasoningStart(),
+            )
+        ).expect_packets(
+            [
+                create_packet_with_reasoning_delta(token, 2)
+                for token in tokenise(REASONING_RESPONSE_2)
+            ]
+        ).expect(
+            Packet(
+                placement=create_placement(2),
+                obj=ReasoningDone(),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(3),
+                obj=OpenUrlStart(),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(3),
+                obj=OpenUrlUrls(urls=[content.url for content in CONTENT1]),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(3),
+                obj=OpenUrlDocuments(
+                    documents=[
+                        mock_web_content_to_search_doc(content) for content in CONTENT1
+                    ]
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(3),
+                obj=SectionEnd(),
+            )
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 3
+        handler.add_response(
+            LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_3))
+        ).add_response(
+            LLMAnswerResponse(answer_tokens=tokenise(ANSWER_RESPONSE_1))
+        ).expect(
+            Packet(
+                placement=create_placement(4),
+                obj=ReasoningStart(),
+            )
+        ).expect_packets(
+            [
+                create_packet_with_reasoning_delta(token, 4)
+                for token in tokenise(REASONING_RESPONSE_3)
+            ]
+        ).expect(
+            Packet(
+                placement=create_placement(4),
+                obj=ReasoningDone(),
+            )
+        ).expect_agent_response(
+            answer_tokens=tokenise(ANSWER_RESPONSE_1),
+            turn_index=5,
+            final_documents=[
+                mock_web_search_result_to_search_doc(result) for result in RESULTS1
+            ]
+            + [mock_web_search_result_to_search_doc(result) for result in RESULTS2]
+            + [mock_web_content_to_search_doc(content) for content in CONTENT1],
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        with pytest.raises(StopIteration):
+            next(answer_stream)
+
+
+def test_image_generation_tool_no_reasoning(
+    db_session: Session,
+    full_deployment_setup: None,
+    mock_external_deps: None,
+) -> None:
+    ensure_default_llm_provider(db_session)
+    test_user = create_test_user(db_session, email_prefix="test_image_generation_tool")
+
+    QUERY = "Create me an image of a dog on a rocketship"
+
+    IMAGE_DATA = (
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfF"
+        "cSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+    )
+    # Heartbeat interval is 5 seconds. A delay of 8 seconds ensures exactly 2 heartbeats:
+    IMAGE_DELAY = 8.0
+
+    ANSWER_RESPONSE = "Here is a dog on a rocketship"
+
+    with (
+        use_mock_llm() as mock_llm,
+        use_mock_image_generation_provider() as mock_image_gen,
+    ):
+        handler = StreamTestBuilder(
+            llm_controller=mock_llm,
+        )
+
+        chat_session = create_chat_session(db_session=db_session, user=test_user)
+
+        answer_stream = submit_query(
+            query=QUERY,
+            chat_session_id=chat_session.id,
+            db_session=db_session,
+            user=test_user,
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=MessageResponseIDInfo(
+                user_message_id=1,
+                reserved_assistant_message_id=1,
+            ),
+        )
+
+        # LLM Stream Response 1
+        mock_image_gen.add_image(IMAGE_DATA, IMAGE_DELAY)
+        mock_llm.set_max_timeout(
+            IMAGE_DELAY + 5.0
+        )  # Give enough buffer for image generation
+
+        # The LLMToolCallResponse has 2 tokens (1 for tool name/id + 1 for arguments).
+        # We need to forward all 2 tokens before the tool starts executing and emitting packets.
+        # The tool then emits: start, heartbeats (during image generation), final, and section end.
+        handler.add_response(
+            LLMToolCallResponse(
+                tool_name="generate_image",
+                tool_call_id="123",
+                tool_call_argument_tokens=[json.dumps({"prompt": QUERY})],
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(0),
+                obj=ImageGenerationToolStart(),
+            ),
+            forward=2,  # Forward both tool call tokens before expecting first packet
+        ).expect_packets(
+            [
+                Packet(
+                    placement=create_placement(0),
+                    obj=ImageGenerationToolHeartbeat(),
+                )
+            ]
+            * 2,
+            forward=False,
+        ).expect(
+            Packet(
+                placement=create_placement(0),
+                obj=ImageGenerationFinal(
+                    images=[
+                        GeneratedImage(
+                            file_id="123",
+                            url="/api/chat/file/123",
+                            revised_prompt=QUERY,
+                            shape="square",
+                        )
+                    ]
+                ),
+            ),
+            forward=False,
+        ).expect(
+            Packet(
+                placement=create_placement(0),
+                obj=SectionEnd(),
+            ),
+            forward=False,
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 2 - the answer comes after the tool call, so turn_index=1
+        handler.add_response(
+            LLMAnswerResponse(
+                answer_tokens=tokenise(ANSWER_RESPONSE),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=AgentResponseStart(final_documents=None),
+            )
+        ).expect_packets(
+            [
+                create_packet_with_agent_response_delta(token, 1)
+                for token in tokenise(ANSWER_RESPONSE)
+            ]
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=OverallStop(),
+            )
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        with pytest.raises(StopIteration):
+            next(answer_stream)
+
+
+def test_parallel_internal_and_web_search_tool_calls(
+    db_session: Session,
+    full_deployment_setup: None,
+    mock_external_deps: None,
+) -> None:
+    """
+    User asks a question
+    LLM does some thinking
+    LLM runs parallel tool calls for internal & web search
+
+    -> Interal Search Branch performs seach + read ~10 documents
+    -> Web Search: Searches the web for information
+
+    LLM reads web documents
+    LLM does thinking across all results
+    LLM reads one more website
+    LLM does more thinking
+    LLM generates answer
+    """
+    ensure_default_llm_provider(db_session)
+    test_user = create_test_user(
+        db_session, email_prefix="test_parallel_internal_and_web_search_tool_calls"
+    )
+
+    AVALIABLE_CONNECTORS = [
+        DocumentSource.GOOGLE_DRIVE,
+        DocumentSource.CONFLUENCE,
+        DocumentSource.LINEAR,
+        DocumentSource.FIREFLIES,
+    ]
+
+    QUERY = "How will forecasts against 2026 global GDP growth affect our Q2 strategy?"
+
+    THINKING_RESPONSE_1 = (
+        "I need to build more context around the user's query to answer it. "
+        "I should look at GDP growth projections for 2026. "
+        "I should also look at what the Q2 strategy is and what projects are included. "
+        "I should perform both web and internal searches in parallel to get information efficiently."
+    )
+
+    WEB_QUERIES_1 = [
+        "2026 global GDP growth projections",
+        "GDP growth 2026",
+        "GDP forecast 2026",
+    ]
+
+    WEB_RESULTS_1 = {
+        WEB_QUERIES_1[0]: [
+            MockWebSearchResult(
+                title="World Economic Outlook Update, January 2026",
+                link="https://www.imf.org/weo/issues/2026/01/19/world-economic-outlook-update-january-2026",
+                snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
+            ),
+            MockWebSearchResult(
+                title="IMF sees steady global growth in 2026 as AI boom offsets ...",
+                link="https://www.reuters.com/article/us-world-economy-imf-idUSKBN2JU23E",
+                snippet="IMF forecasts 2026 global GDP growth at 3.3% even with stronger 2025 performance",
+            ),
+            MockWebSearchResult(
+                title="The Global Economy Is Forecast to Post...",
+                link="https://www.goldmansachs.com/insights/articles/123",
+                snippet="Global GDP is projected by Goldman Sachs Research to increase 2.8% in 2026",
+            ),
+        ],
+        WEB_QUERIES_1[1]: [
+            MockWebSearchResult(
+                title="US third-quarter economic growth revised  slightly higher",
+                link="https://www.reuters.com/word/us-third-quarter-eco",
+                snippet="Gross domestic product increased at an upwardly revised 4.4% annualized rate, the ...",
+            ),
+            MockWebSearchResult(
+                title="US GDP Growth Is Projected to Outperform Economist ...",
+                link="https://www.goldmansachs.com/insights/articles/321",
+                snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
+            ),
+            MockWebSearchResult(
+                title="Gross Domestic Product",
+                link="https://www.bea.gov/data/gdp/gross-domestic-product",
+                snippet="Real gross domestic product (GDP) increased at an annual rate of 4.4 percent in the third quarter",
+            ),
+        ],
+        WEB_QUERIES_1[2]: [
+            MockWebSearchResult(
+                title="World Economic Outlook Update, January 2026",
+                link="https://www.imf.org/web/issues/2026/01/19/world-economic-outlook-update-january-2026",
+                snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
+            ),
+            MockWebSearchResult(
+                title="US GDP Growth Is Projected to Outperform Economist ...",
+                link="https://www.goldmansachs.com/insights/articles/321",
+                snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
+            ),
+            MockWebSearchResult(
+                title="Our economic outlook for the United States - Vanguard",
+                link="https://corporate.vanguard.com/content/corp/vemo",
+                snippet="We expect strong capital investment to remain a principal strength in the year ahead",
+            ),
+        ],
+    }
+
+    INTERNAL_QUERIES_1 = ["Q2 strategy 2026", "GDP growth 2026 projects", "Q2 projects"]
+
+    INTERNAL_RESULTS_1 = {
+        INTERNAL_QUERIES_1[0]: [
+            MockInternalSearchResult(
+                document_id="123456789",
+                source_type=DocumentSource.GOOGLE_DRIVE,
+                semantic_identifier="Q2 strategy 2026",
+                chunk_ind=11,
+            ),
+            MockInternalSearchResult(
+                document_id="732190732173",
+                source_type=DocumentSource.FIREFLIES,
+                semantic_identifier="What we think is going to happen in Q2",
+                chunk_ind=5,
+            ),
+            MockInternalSearchResult(
+                document_id="12389123219",
+                source_type=DocumentSource.CONFLUENCE,
+                semantic_identifier="Strategy roadmap for Q2 2026",
+                chunk_ind=7,
+            ),
+        ],
+        INTERNAL_QUERIES_1[1]: [
+            MockInternalSearchResult(
+                document_id="123123",
+                source_type=DocumentSource.LINEAR,
+                semantic_identifier="GDP growth 2026 projects",
+                chunk_ind=13,
+            )
+        ],
+        INTERNAL_QUERIES_1[2]: [
+            MockInternalSearchResult(
+                document_id="98823643243",
+                source_type=DocumentSource.GOOGLE_DRIVE,
+                semantic_identifier="Full list of Q2 projects",
+                chunk_ind=1,
+            )
+        ],
+    }
+
+    OPEN_URL_URLS_1 = [
+        WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
+        WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
+        WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
+    ]
+
+    OPEN_URL_DOCUMENTS_1 = [
+        MockWebContent(
+            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].title,
+            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
+            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
+        ),
+        MockWebContent(
+            title=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].title,
+            url=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
+            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
+        ),
+        MockWebContent(
+            title=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].title,
+            url=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
+            content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
+        ),
+    ]
+
+    THINKING_RESPONSE_2 = (
+        "I now have a clear picture of the 2026 global GDP projections and the Q2 strategy. "
+        "I would like to now about the outperform expections though..."
+    )
+
+    OPEN_URL_URLS_2 = [WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link]
+    OPEN_URL_DOCUMENTS_2 = [
+        MockWebContent(
+            title=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].title,
+            url=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link,
+            content="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
+        )
+    ]
+
+    REASONING_RESPONSE_3 = (
+        "I now have all the information I need to answer the user's question."
+    )
+
+    ANSWER_RESPONSE = (
+        "We will have to change around some of our projects to accomodate the outperform expections. "
+        "We should focus on aggresive expansion projects and prioritize them over cost-cutting initiatives."
+    )
+
+    expected_web_docs = []
+    seen_web_results = set()
+    for web_results in WEB_RESULTS_1.values():
+        for web_result in web_results:
+            key = (web_result.title, web_result.link)
+            if key in seen_web_results:
+                continue
+            seen_web_results.add(key)
+            expected_web_docs.append(mock_web_search_result_to_search_doc(web_result))
+
+    expected_internal_docs = []
+    seen_internal_results = set()
+    for internal_results in INTERNAL_RESULTS_1.values():
+        for internal_result in internal_results:
+            key = (internal_result.semantic_identifier, internal_result.document_id)
+            if key in seen_internal_results:
+                continue
+            seen_internal_results.add(key)
+            expected_internal_docs.append(internal_result.to_search_doc())
+
+    with (
+        use_mock_llm() as mock_llm,
+        use_mock_search_pipeline(
+            connectors=AVALIABLE_CONNECTORS
+        ) as mock_search_pipeline,
+        use_mock_web_provider(db_session) as mock_web,
+        use_mock_content_provider() as mock_content,
+    ):
+        for query, web_results in WEB_RESULTS_1.items():
+            mock_web.add_results(query, web_results)
+
+        for query, internal_results in INTERNAL_RESULTS_1.items():
+            mock_search_pipeline.add_search_results(query, internal_results)
+
+        handler = StreamTestBuilder(
+            llm_controller=mock_llm,
+        )
+
+        chat_session = create_chat_session(db_session=db_session, user=test_user)
+
+        answer_stream = submit_query(
+            query=QUERY,
+            chat_session_id=chat_session.id,
+            db_session=db_session,
+            user=test_user,
+        )
+
+        assert_answer_stream_part_correct(
+            received=next(answer_stream),
+            expected=MessageResponseIDInfo(
+                user_message_id=1,
+                reserved_assistant_message_id=1,
+            ),
+        )
+
+        # LLM Stream Response 1
+        handler.add_response(
+            LLMReasoningResponse(
+                reasoning_tokens=tokenise(THINKING_RESPONSE_1),
+            )
+        ).add_responses_together(
+            LLMToolCallResponse(
+                tool_name="internal_search",
+                tool_call_id="123",
+                tool_call_argument_tokens=[json.dumps({"queries": INTERNAL_QUERIES_1})],
+            ),
+            LLMToolCallResponse(
+                tool_name="web_search",
+                tool_call_id="321",
+                tool_call_argument_tokens=[json.dumps({"queries": WEB_QUERIES_1})],
+            ),
+        ).expect_reasoning(
+            reasoning_tokens=tokenise(THINKING_RESPONSE_1),
+            turn_index=0,
+        ).expect(
+            Packet(
+                placement=create_placement(1),
+                obj=TopLevelBranching(
+                    num_parallel_branches=2,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 0),
+                obj=SearchToolStart(
+                    is_internet_search=False,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 1),
+                obj=SearchToolStart(
+                    is_internet_search=True,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 0),
+                obj=SearchToolQueriesDelta(
+                    queries=INTERNAL_QUERIES_1 + [QUERY],
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 0),
+                obj=SearchToolDocumentsDelta(
+                    documents=expected_internal_docs,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 0),
+                obj=SectionEnd(),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 1),
+                obj=SearchToolQueriesDelta(
+                    queries=WEB_QUERIES_1,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 1),
+                obj=SearchToolDocumentsDelta(
+                    documents=expected_web_docs,
+                ),
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(1, 1),
+                obj=SectionEnd(),
+            )
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 2
+        for content in OPEN_URL_DOCUMENTS_1:
+            mock_content.add_content(content)
+
+        handler.add_response(
+            LLMToolCallResponse(
+                tool_name="open_url",
+                tool_call_id="456",
+                tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_1})],
+            )
+        ).expect(
+            Packet(
+                placement=create_placement(2, 0),
+                obj=OpenUrlStart(),
+            ),
+            forward=2,  # Need both header + argument tokens for the tool call
+        ).expect(
+            Packet(
+                placement=create_placement(2, 0),
+                obj=OpenUrlUrls(urls=OPEN_URL_URLS_1),
+            ),
+            forward=False,
+        ).expect(
+            Packet(
+                placement=create_placement(2, 0),
+                obj=OpenUrlDocuments(
+                    documents=[
+                        mock_web_content_to_search_doc(content)
+                        for content in OPEN_URL_DOCUMENTS_1
+                    ]
+                ),
+            ),
+            forward=False,
+        ).expect(
+            Packet(
+                placement=create_placement(2, 0),
+                obj=SectionEnd(),
+            ),
+            forward=False,
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 3
+        for content in OPEN_URL_DOCUMENTS_2:
+            mock_content.add_content(content)
+
+        handler.add_response(
+            LLMReasoningResponse(
+                reasoning_tokens=tokenise(THINKING_RESPONSE_2),
+            )
+        ).add_response(
+            LLMToolCallResponse(
+                tool_name="open_url",
+                tool_call_id="789",
+                tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_2})],
+            )
+        ).expect_reasoning(
+            reasoning_tokens=tokenise(THINKING_RESPONSE_2),
+            turn_index=3,
+        ).expect(
+            Packet(
+                placement=create_placement(4),
+                obj=OpenUrlStart(),
+            )
+        ).expect(
+            Packet(placement=create_placement(4), obj=OpenUrlUrls(urls=OPEN_URL_URLS_2))
+        ).expect(
+            Packet(
+                placement=create_placement(4),
+                obj=OpenUrlDocuments(
+                    documents=[
+                        mock_web_content_to_search_doc(content)
+                        for content in OPEN_URL_DOCUMENTS_2
+                    ]
+                ),
+            ),
+            forward=False,
+        ).expect(
+            Packet(
+                placement=create_placement(4),
+                obj=SectionEnd(),
+            )
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # LLM Stream Response 4
+        handler.add_response(
+            LLMReasoningResponse(
+                reasoning_tokens=tokenise(REASONING_RESPONSE_3),
+            )
+        ).add_response(
+            LLMAnswerResponse(
+                answer_tokens=tokenise(ANSWER_RESPONSE),
+            )
+        ).expect_reasoning(
+            reasoning_tokens=tokenise(REASONING_RESPONSE_3),
+            turn_index=5,
+        ).expect_agent_response(
+            answer_tokens=tokenise(ANSWER_RESPONSE),
+            turn_index=6,
+            final_documents=expected_internal_docs
+            + expected_web_docs
+            + [
+                mock_web_content_to_search_doc(content)
+                for content in OPEN_URL_DOCUMENTS_1
+            ]
+            + [
+                mock_web_content_to_search_doc(content)
+                for content in OPEN_URL_DOCUMENTS_2
+            ],
+        ).run_and_validate(
+            stream=answer_stream
+        )
+
+        # End stream
+        with pytest.raises(StopIteration):
+            next(answer_stream)
--- a/backend/tests/external_dependency_unit/full_setup.py
+++ b/backend/tests/external_dependency_unit/full_setup.py
@@ -4,16 +4,14 @@ import os
 from pathlib import Path
 from typing import Optional

-import nltk  # type: ignore
-
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.sql_engine import SqlEngine
 from onyx.db.search_settings import get_active_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.file_store import get_default_file_store
 from onyx.indexing.models import IndexingSetting
+from onyx.setup import setup_document_indices
 from onyx.setup import setup_postgres
-from onyx.setup import setup_vespa
 from shared_configs import configs as shared_configs_module
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from tests.external_dependency_unit.constants import TEST_TENANT_ID
@@ -32,7 +30,6 @@ def ensure_full_deployment_setup(
    - Runs setup_onyx (Postgres defaults, Vespa indices)
    - Initializes file store (best-effort)
    - Ensures Vespa indices exist
-    - Installs NLTK stopwords and punkt_tab
    """
    global _SETUP_COMPLETE
    if _SETUP_COMPLETE:
@@ -49,9 +46,6 @@ def ensure_full_deployment_setup(
    # Avoid warm-up network calls during setup
    shared_configs_module.SKIP_WARM_UP = True

-    nltk.download("stopwords", quiet=True)
-    nltk.download("punkt_tab", quiet=True)
-
    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant)
    original_cwd = os.getcwd()
    backend_dir = Path(__file__).resolve().parents[2]  # points to 'backend'
@@ -73,8 +67,8 @@ def ensure_full_deployment_setup(
            document_index = get_default_document_index(
                active.primary, active.secondary
            )
-            ok = setup_vespa(
-                document_index=document_index,
+            ok = setup_document_indices(
+                document_indices=[document_index],
                index_setting=IndexingSetting.from_db_model(active.primary),
                secondary_index_setting=(
                    IndexingSetting.from_db_model(active.secondary)
--- a/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
+++ b/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
@@ -282,12 +282,12 @@ def test_anthropic_prompt_caching_reduces_costs(
    Anthropic requires explicit cache_control parameters.
    """
    # Create Anthropic LLM
-    # NOTE: prompt caching support is model-specific; `claude-3-5-haiku-20241022` is known
+    # NOTE: prompt caching support is model-specific; `claude-3-haiku-20240307` is known
    # to return cache_creation/cache_read usage metrics, while some newer aliases may not.
    llm = LitellmLLM(
        api_key=os.environ["ANTHROPIC_API_KEY"],
        model_provider="anthropic",
-        model_name="claude-3-5-haiku-20241022",
+        model_name="claude-3-haiku-20240307",
        max_input_tokens=200000,
    )

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Yuhong Sun	b1e92d8e8f	k	2026-01-23 17:17:28 -08:00
Danelegend	0594fd17de	chore(tests): add more packet tests (#7677 )	2026-01-23 19:49:41 +00:00
Jamison Lahman	fded81dc28	chore(extensions): pull in chrome extension (#7703 )	2026-01-23 10:17:05 -08:00
Danelegend	31db112de9	feat(url): Open url around snippet (#7488 )	2026-01-23 17:02:38 +00:00
Jamison Lahman	a3e2da2c51	chore(vscode): add useful database operations (#7702 )	2026-01-23 08:49:59 -08:00
Evan Lohn	f4d33bcc0d	feat: basic user MCP action attaching (#7681 )	2026-01-23 05:50:49 +00:00
Jamison Lahman	464d957494	chore(devtools): upgrade ods v0.4.0; vscode to restore seeded db (#7696 )	2026-01-23 05:21:46 +00:00
Jamison Lahman	be12de9a44	chore(devtools): `ods db restore --fetch-seeded` (#7689 )	2026-01-22 20:41:28 -08:00
Yuhong Sun	3e4a1f8a09	feat: Maintain correct docs on replay (#7683 )	2026-01-22 19:24:10 -08:00
Raunak Bhagat	af9b7826ab	fix: Remove cursor pointer from view-only field (#7688 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-01-23 02:47:08 +00:00
Danelegend	cb16eb13fc	chore(tests): Mock LLM (#7590 )	2026-01-23 01:48:54 +00:00
Jamison Lahman	20a73bdd2e	chore(desktop): make artifact filename version-agnostic (#7679 )	2026-01-22 15:15:52 -08:00
Justin Tahara	85cc2b99b7	fix(fastapi): Resolve CVE-2025-68481 (#7661 )	2026-01-22 20:07:25 +00:00
Jamison Lahman	1208a3ee2b	chore(fe): disable blur when there is not a custom background (#7673 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-01-22 11:26:16 -08:00
Justin Tahara	900fcef9dd	feat(desktop): Domain Configuration (#7655 )	2026-01-22 18:15:44 +00:00
Justin Tahara	d4ed25753b	fix(ui): Coda Logo (#7656 )	2026-01-22 10:10:02 -08:00
Justin Tahara	0ee58333b4	fix(ui): User Groups Connectors Fix (#7658 )	2026-01-22 17:59:12 +00:00
Justin Tahara	11b7e0d571	fix(ui): First Connector Result (#7657 )	2026-01-22 17:52:02 +00:00
acaprau	a35831f328	fix(opensearch): Release Onyx Helm Charts was failing (#7672 )	2026-01-22 17:41:47 +00:00
Justin Tahara	048a6d5259	fix(ui): Fix Token Rate Limits Page (#7659 )	2026-01-22 17:20:21 +00:00
Ciaran Sweet	e4bdb15910	docs: enhance send-chat-message docs to also show ChatFullResponse (#7430 )	2026-01-22 16:48:26 +00:00
Jamison Lahman	3517d59286	chore(fe): add custom backgrounds to the settings page (#7668 )	2026-01-21 21:32:56 -08:00
Jamison Lahman	4bc08e5d88	chore(fe): remove Text pseudo-element padding (#7665 )	2026-01-21 19:50:42 -08:00
Yuhong Sun	4bd080cf62	chore: Redirect user to create account (#7654 )	2026-01-22 02:44:58 +00:00
Raunak Bhagat	b0a8625ffc	feat: Add confirmation modal for connector disconnect (#7637 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-22 02:08:19 +00:00
Yuhong Sun	f94baf6143	fix: DR Language Tuning (#7660 )	2026-01-21 17:36:50 -08:00
Wenxi	9e1867638a	feat: onyx discord bot - frontend (#7497 )	2026-01-22 00:00:12 +00:00
Yuhong Sun	5b6d7c9f0d	chore: Onboarding Image Generation (#7653 )	2026-01-21 15:49:15 -08:00
Danelegend	e5dcf31f10	fix(image): Emit error to user (#7644 )	2026-01-21 22:50:12 +00:00
Nikolas Garza	8ca06ef3e7	fix: deflake chat user journey test (#7646 )	2026-01-21 22:33:30 +00:00
Justin Tahara	6897dbd610	feat(desktop): Properly Sign Mac App (#7608 )	2026-01-21 22:17:45 +00:00
Evan Lohn	7f3cb77466	chore: remove prompt caching from chat history (#7636 )	2026-01-21 21:35:11 +00:00
acaprau	267042a5aa	fix(opensearch): Use the same method for getting title that the title embedding logic uses; small cleanup for content embedding (#7638 )	2026-01-21 21:34:38 +00:00
Yuhong Sun	d02b3ae6ac	chore: Remove default prompt shortcuts (#7639 )	2026-01-21 21:28:53 +00:00
Yuhong Sun	683c3f7a7e	fix: color mode and memories (#7642 )	2026-01-21 13:29:33 -08:00
Nikolas Garza	008b4d2288	fix(slack): Extract person names and filter garbage in query expansion (#7632 )	2026-01-21 21:09:50 +00:00
Jamison Lahman	8be261405a	chore(deployments): fix region (#7640 )	2026-01-21 13:14:42 -08:00
acaprau	61f2c48ebc	feat(opensearch): Add helm charts (#7606 )	2026-01-21 19:34:18 +00:00
acaprau	dbde2e6d6d	chore(opensearch): Create OpenSearch docker compose, enabling test_opensearch_client.py to run in CI (#7611 )	2026-01-21 18:41:23 +00:00
Raunak Bhagat	2860136214	feat: Refreshed user settings page (#7455 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 16:41:56 +00:00
Raunak Bhagat	49ec5994d3	refactor: Improve refresh-components with cleanup and truncation (#7622 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 00:29:25 -08:00
Raunak Bhagat	8d5fb67f0f	feat: improve prompt shortcuts with uniqueness constraints and enhancements (#7619 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 07:31:35 +00:00
Raunak Bhagat	15d02f6e3c	fix: Prevent description duplication in Modal header (#7609 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 04:32:22 +00:00
Jamison Lahman	e58974c419	chore(fe): move chatpage footer inside background element (#7618 )	2026-01-21 04:21:49 +00:00
Yuhong Sun	6b66c07952	chore: Delete multilingual docker compose file (#7616 )	2026-01-20 19:50:01 -08:00
Jamison Lahman	cae058a3ac	chore(extensions): simplify and de-dupe NRFPage (#7607 )	2026-01-21 03:42:19 +00:00
Nikolas Garza	aa3b21a191	fix: scroll to bottom when loading existing conversations (#7614 )	2026-01-20 19:19:18 -08:00
Raunak Bhagat	7a07a78696	fix: Set width to fit for `rightChildren` section in `LineItem` (#7604 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 01:55:03 +00:00
Nikolas Garza	a8db236e37	feat(billing): fetch Stripe publishable key from S3 (#7595 )	2026-01-21 01:32:57 +00:00
Raunak Bhagat	8a2e4ed36f	fix: Fix flashing in `progress-circle` icon (#7605 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 01:03:52 +00:00
Evan Lohn	216f2c95a7	chore: add dialog description to modal (#7603 )	2026-01-21 00:41:35 +00:00
Evan Lohn	67081efe08	fix: modal header in index attempt errors (#7601 )	2026-01-21 00:37:23 +00:00
Yuhong Sun	9d40b8336f	feat: Allow no system prompt (#7600 )	2026-01-20 16:16:39 -08:00
Evan Lohn	23f0033302	chore: bg services launch.json (#7597 )	2026-01-21 00:05:20 +00:00
Raunak Bhagat	9011b76eb0	refactor: Add new layout component (#7588 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-20 23:36:18 +00:00
Yuhong Sun	45e436bafc	fix: prompt tunings (#7594 )	2026-01-20 15:13:05 -08:00
Justin Tahara	010bc36d61	Revert "chore(deps): Bump fastapi-users from 14.0.1 to 15.0.2 in /backend/requirements" (#7593 )	2026-01-20 14:44:21 -08:00
dependabot[bot]	468e488bdb	chore(deps): bump docker/setup-buildx-action from 3.11.1 to 3.12.0 (#7527 ) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-01-20 22:36:39 +00:00
dependabot[bot]	9104c0ffce	chore(deps): Bump fastapi-users from 14.0.1 to 15.0.2 in /backend/requirements (#6897 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: justin-tahara <justintahara@gmail.com>	2026-01-20 22:31:02 +00:00
Jamison Lahman	d36a6bd0b4	feat(fe): custom chat backgrounds (#7486 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-01-20 14:29:06 -08:00
Jamison Lahman	a3603c498c	chore(deployments): fetch secrets from AWS (#7584 )	2026-01-20 22:10:19 +00:00
Jamison Lahman	8f274e34c9	chore(blame): unignore checked in `.vscode/` files (#7592 )	2026-01-20 14:07:27 -08:00
Justin Tahara	5c256760ff	fix(vertex ai): Extra Args for Opus 4.5 (#7586 )	2026-01-20 14:07:14 -08:00
Nikolas Garza	258e1372b3	fix(billing): remove grandfathered pricing option when subscription lapses (#7583 )	2026-01-20 21:55:37 +00:00
Yuhong Sun	83a543a265	chore: NLTK and stopwords (#7587 )	2026-01-20 13:36:04 -08:00
Evan Lohn	f9719d199d	fix: drive connector creation ui (#7578 )	2026-01-20 21:10:06 +00:00
Raunak Bhagat	1c7bb6e56a	fix: Input variant refactor (#7579 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-20 13:04:16 -08:00
acaprau	982ad7d329	feat(opensearch): Add dual document indices (#7539 )	2026-01-20 20:53:24 +00:00
Jamison Lahman	f94292808b	chore(vscode): `launch.template.jsonc` -> `launch.json` (#7440 )	2026-01-20 20:32:46 +00:00
Justin Tahara	293553a2e2	fix(tests): Anthropic Prompt Caching Test (#7585 )	2026-01-20 20:32:24 +00:00
Justin Tahara	ba906ae6fa	chore(llm): Removing Claude Haiku 3.5 (#7577 )	2026-01-20 19:06:14 +00:00
Raunak Bhagat	c84c7a354e	refactor: refactor to use string-enum props instead of boolean props (#7575 ) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-20 18:59:54 +00:00
Jamison Lahman	2187b0dd82	chore(pre-commit): disallow large files (#7576 )	2026-01-20 11:02:00 -08:00
acaprau	d88a417bf9	feat(opensearch): Formally disable secondary indices in the backend (#7541 )	2026-01-20 18:21:47 +00:00
Jamison Lahman	f2d32b0b3b	fix(fe): inline code text wraps (#7574 )	2026-01-20 17:11:42 +00:00