fix(api memory): replace glibc with jemalloc for memory allocating (#9196 )

feat(slack): convert markdown tables to Slack-friendly format (#8999 )
fix(celery): Guardrail for User File Processing (#8633 )
2026-03-27 10:32:41 +00:00 · 2026-03-25 14:42:24 -07:00 · 2026-03-04 11:54:38 -08:00 · 2026-03-01 10:30:03 -08:00 · 2026-02-26 17:27:24 -08:00 · 2026-02-26 17:27:24 -08:00
300 changed files with 7035 additions and 16595 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -404,7 +404,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -477,7 +477,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -537,7 +537,7 @@ jobs:
          parse-json-secrets: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -615,7 +615,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -696,7 +696,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -764,7 +764,7 @@ jobs:
          parse-json-secrets: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -839,7 +839,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -911,7 +911,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -970,7 +970,7 @@ jobs:
          parse-json-secrets: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -1049,7 +1049,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

@@ -1128,7 +1128,7 @@ jobs:
            latest=false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
        with:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

@@ -1193,7 +1193,7 @@ jobs:
          parse-json-secrets: true

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -29,7 +29,6 @@ jobs:
        run: |
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -94,7 +94,7 @@ jobs:

    steps:
    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -45,9 +45,6 @@ env:
  # TODO: debug why this is failing and enable
  CODE_INTERPRETER_BASE_URL: http://localhost:8000

-  # OpenSearch
-  OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
-
 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -128,13 +125,11 @@ jobs:
          docker compose \
            -f docker-compose.yml \
            -f docker-compose.dev.yml \
-            -f docker-compose.opensearch.yml \
            up -d \
            minio \
            relational_db \
            cache \
            index \
-            opensearch \
            code-interpreter

      - name: Run migrations
@@ -163,7 +158,7 @@ jobs:
          cd deployment/docker_compose

          # Get list of running containers
-          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
+          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)

          # Collect logs from each container
          for container in $containers; do
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -88,7 +88,6 @@ jobs:
          echo "=== Adding Helm repositories ==="
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
@@ -181,11 +180,6 @@ jobs:
          trap cleanup EXIT

          # Run the actual installation with detailed logging
-          # Note that opensearch.enabled is true whereas others in this install
-          # are false. There is some work that needs to be done to get this
-          # entire step working in CI, enabling opensearch here is a small step
-          # in that direction. If this is causing issues, disabling it in this
-          # step should be ok in the short term.
          echo "=== Starting ct install ==="
          set +e
          ct install --all \
@@ -193,8 +187,6 @@ jobs:
              --set=nginx.enabled=false \
              --set=minio.enabled=false \
              --set=vespa.enabled=false \
-              --set=opensearch.enabled=true \
-              --set=auth.opensearch.enabled=true \
              --set=slackbot.enabled=false \
              --set=postgresql.enabled=true \
              --set=postgresql.nameOverride=cloudnative-pg \
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -103,7 +103,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
          persist-credentials: false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -95,7 +95,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -85,7 +85,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -50,8 +50,9 @@ jobs:
        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: backend/.mypy_cache
-          key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
+          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
          restore-keys: |
+            mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
            mypy-${{ runner.os }}-

      - name: Run MyPy
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -70,7 +70,7 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435

      - name: Build and load
        uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,5 @@
 # editors
 .vscode
-!/.vscode/env_template.txt
-!/.vscode/launch.json
-!/.vscode/tasks.template.jsonc
 .zed
 .cursor

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -74,13 +74,6 @@ repos:
      #   pass_filenames: true
      #   files: ^backend/.*\.py$

-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
-    hooks:
-      - id: check-added-large-files
-        name: Check for added large files
-        args: ["--maxkb=1500"]
-
  - repo: https://github.com/rhysd/actionlint
    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -1,3 +1,5 @@
+/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
+
 {
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
@@ -22,7 +24,7 @@
        "Slack Bot",
        "Celery primary",
        "Celery light",
-        "Celery heavy",
+        "Celery background",
        "Celery docfetching",
        "Celery docprocessing",
        "Celery beat"
@@ -149,6 +151,24 @@
      },
      "consoleTitle": "Slack Bot Console"
    },
+    {
+      "name": "Discord Bot",
+      "consoleName": "Discord Bot",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "onyx/onyxbot/discord/client.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Discord Bot Console"
+    },
    {
      "name": "MCP Server",
      "consoleName": "MCP Server",
@@ -577,99 +597,6 @@
        "group": "3"
      }
    },
-    {
-      // Dummy entry used to label the group
-      "name": "--- Database ---",
-      "type": "node",
-      "request": "launch",
-      "presentation": {
-        "group": "4",
-        "order": 0
-      }
-    },
-    {
-      "name": "Clean restore seeded database dump (destructive)",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "restore",
-        "--fetch-seeded",
-        "--clean",
-        "--yes"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Create database snapshot",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "dump",
-        "backup.dump"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Clean restore database snapshot (destructive)",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "restore",
-        "--clean",
-        "--yes",
-        "backup.dump"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Upgrade database to head revision",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "upgrade"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
--- a/backend/.trivyignore
+++ b/backend/.trivyignore
@@ -37,6 +37,10 @@ CVE-2023-50868
 CVE-2023-52425
 CVE-2024-28757

+# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
+# No impact in our settings
+CVE-2023-7104
+
 # libharfbuzz0b, O(n^2) growth, worst case is denial of service
 # Accept the risk
 CVE-2023-25193
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -42,7 +42,9 @@ RUN apt-get update && \
        pkg-config \
        gcc \
        nano \
-        vim && \
+        vim \
+        libjemalloc2 \
+        && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

@@ -89,6 +91,12 @@ RUN uv pip install --system --no-cache-dir --upgrade \
 RUN python -c "from tokenizers import Tokenizer; \
 Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"

+# Pre-downloading NLTK for setups with limited egress
+RUN python -c "import nltk; \
+nltk.download('stopwords', quiet=True); \
+nltk.download('punkt_tab', quiet=True);"
+# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
+
 # Pre-downloading tiktoken for setups with limited egress
 RUN python -c "import tiktoken; \
 tiktoken.get_encoding('cl100k_base')"
@@ -124,6 +132,13 @@ ENV PYTHONPATH=/app
 ARG ONYX_VERSION=0.0.0-dev
 ENV ONYX_VERSION=${ONYX_VERSION}

+# Use jemalloc instead of glibc malloc to reduce memory fragmentation
+# in long-running Python processes (API server, Celery workers).
+# The soname is architecture-independent; the dynamic linker resolves
+# the correct path from standard library directories.
+# Placed after all RUN steps so build-time processes are unaffected.
+ENV LD_PRELOAD=libjemalloc.so.2
+
 # Default command which does nothing
 # This container is used by api server and background which specify their own CMD
 CMD ["tail", "-f", "/dev/null"]
--- a/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
+++ b/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
@@ -1,42 +0,0 @@
-"""add_unique_constraint_to_inputprompt_prompt_user_id
-
-Revision ID: 2c2430828bdf
-Revises: fb80bdd256de
-Create Date: 2026-01-20 16:01:54.314805
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "2c2430828bdf"
-down_revision = "fb80bdd256de"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Create unique constraint on (prompt, user_id) for user-owned prompts
-    # This ensures each user can only have one shortcut with a given name
-    op.create_unique_constraint(
-        "uq_inputprompt_prompt_user_id",
-        "inputprompt",
-        ["prompt", "user_id"],
-    )
-
-    # Create partial unique index for public prompts (where user_id IS NULL)
-    # PostgreSQL unique constraints don't enforce uniqueness for NULL values,
-    # so we need a partial index to ensure public prompt names are also unique
-    op.execute(
-        """
-        CREATE UNIQUE INDEX uq_inputprompt_prompt_public
-        ON inputprompt (prompt)
-        WHERE user_id IS NULL
-        """
-    )
-
-
-def downgrade() -> None:
-    op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
-    op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")
--- a/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
+++ b/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
@@ -1,29 +0,0 @@
-"""remove default prompt shortcuts
-
-Revision ID: 41fa44bef321
-Revises: 2c2430828bdf
-Create Date: 2025-01-21
-
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "41fa44bef321"
-down_revision = "2c2430828bdf"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Delete any user associations for the default prompts first (foreign key constraint)
-    op.execute(
-        "DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
-    )
-    # Delete the pre-seeded default prompt shortcuts (they have negative IDs)
-    op.execute("DELETE FROM inputprompt WHERE id < 0")
-
-
-def downgrade() -> None:
-    # We don't restore the default prompts on downgrade
-    pass
--- a/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
+++ b/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
@@ -1,31 +0,0 @@
-"""add chat_background to user
-
-Revision ID: fb80bdd256de
-Revises: 8b5ce697290e
-Create Date: 2026-01-16 16:15:59.222617
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "fb80bdd256de"
-down_revision = "8b5ce697290e"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "user",
-        sa.Column(
-            "chat_background",
-            sa.String(),
-            nullable=True,
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("user", "chat_background")
--- a/backend/ee/onyx/search/process_search_query.py
+++ b/backend/ee/onyx/search/process_search_query.py
@@ -17,8 +17,7 @@ from onyx.context.search.models import InferenceChunk
 from onyx.context.search.pipeline import merge_individual_chunks
 from onyx.context.search.pipeline import search_pipeline
 from onyx.db.models import User
-from onyx.db.search_settings import get_current_search_settings
-from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.factory import get_current_primary_default_document_index
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.llm.factory import get_default_llm
 from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
@@ -43,13 +42,11 @@ def _run_single_search(
    document_index: DocumentIndex,
    user: User | None,
    db_session: Session,
-    num_hits: int | None = None,
 ) -> list[InferenceChunk]:
    """Execute a single search query and return chunks."""
    chunk_search_request = ChunkSearchRequest(
        query=query,
        user_selected_filters=filters,
-        limit=num_hits,
    )

    return search_pipeline(
@@ -75,9 +72,7 @@ def stream_search_query(
    Used by both streaming and non-streaming endpoints.
    """
    # Get document index
-    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
-    document_index = get_default_document_index(search_settings, None)
+    document_index = get_current_primary_default_document_index(db_session)

    # Determine queries to execute
    original_query = request.search_query
@@ -119,7 +114,6 @@ def stream_search_query(
            document_index=document_index,
            user=user,
            db_session=db_session,
-            num_hits=request.num_hits,
        )
    else:
        # Multiple queries - run in parallel and merge with RRF
@@ -127,14 +121,7 @@ def stream_search_query(
        search_functions = [
            (
                _run_single_search,
-                (
-                    query,
-                    request.filters,
-                    document_index,
-                    user,
-                    db_session,
-                    request.num_hits,
-                ),
+                (query, request.filters, document_index, user, db_session),
            )
            for query in all_executed_queries
        ]
@@ -181,9 +168,6 @@ def stream_search_query(
    # Merge chunks into sections
    sections = merge_individual_chunks(chunks)

-    # Truncate to the requested number of hits
-    sections = sections[: request.num_hits]
-
    # Apply LLM document selection if requested
    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -10,8 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    ("/enterprise-settings/logo", {"GET"}),
    ("/enterprise-settings/logotype", {"GET"}),
    ("/enterprise-settings/custom-analytics-script", {"GET"}),
-    # Stripe publishable key is safe to expose publicly
-    ("/tenants/stripe-publishable-key", {"GET"}),
 ]


--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -32,7 +32,6 @@ class SendSearchQueryRequest(BaseModel):
    filters: BaseFilters | None = None
    num_docs_fed_to_llm_selection: int | None = None
    run_query_expansion: bool = False
-    num_hits: int = 50

    include_content: bool = False
    stream: bool = False
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -1,6 +1,3 @@
-import asyncio
-
-import httpx
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
@@ -15,14 +12,11 @@ from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
 from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
 from ee.onyx.server.tenants.models import ProductGatingResponse
-from ee.onyx.server.tenants.models import StripePublishableKeyResponse
 from ee.onyx.server.tenants.models import SubscriptionSessionResponse
 from ee.onyx.server.tenants.models import SubscriptionStatusResponse
 from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
 from ee.onyx.server.tenants.product_gating import store_product_gating
 from onyx.auth.users import User
-from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
-from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
@@ -32,10 +26,6 @@ logger = setup_logger()

 router = APIRouter(prefix="/tenants")

-# Cache for Stripe publishable key to avoid hitting S3 on every request
-_stripe_publishable_key_cache: str | None = None
-_stripe_key_lock = asyncio.Lock()
-

@router.post("/product-gating")
 def gate_product(
@@ -123,67 +113,3 @@ async def create_subscription_session(
    except Exception as e:
        logger.exception("Failed to create subscription session")
        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/stripe-publishable-key")
-async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
-    """
-    Fetch the Stripe publishable key.
-    Priority: env var override (for testing) > S3 bucket (production).
-    This endpoint is public (no auth required) since publishable keys are safe to expose.
-    The key is cached in memory to avoid hitting S3 on every request.
-    """
-    global _stripe_publishable_key_cache
-
-    # Fast path: return cached value without lock
-    if _stripe_publishable_key_cache:
-        return StripePublishableKeyResponse(
-            publishable_key=_stripe_publishable_key_cache
-        )
-
-    # Use lock to prevent concurrent S3 requests
-    async with _stripe_key_lock:
-        # Double-check after acquiring lock (another request may have populated cache)
-        if _stripe_publishable_key_cache:
-            return StripePublishableKeyResponse(
-                publishable_key=_stripe_publishable_key_cache
-            )
-
-        # Check for env var override first (for local testing with pk_test_* keys)
-        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
-            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
-            if not key.startswith("pk_"):
-                raise HTTPException(
-                    status_code=500,
-                    detail="Invalid Stripe publishable key format",
-                )
-            _stripe_publishable_key_cache = key
-            return StripePublishableKeyResponse(publishable_key=key)
-
-        # Fall back to S3 bucket
-        if not STRIPE_PUBLISHABLE_KEY_URL:
-            raise HTTPException(
-                status_code=500,
-                detail="Stripe publishable key is not configured",
-            )
-
-        try:
-            async with httpx.AsyncClient() as client:
-                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
-                response.raise_for_status()
-                key = response.text.strip()
-
-                # Validate key format
-                if not key.startswith("pk_"):
-                    raise HTTPException(
-                        status_code=500,
-                        detail="Invalid Stripe publishable key format",
-                    )
-
-                _stripe_publishable_key_cache = key
-                return StripePublishableKeyResponse(publishable_key=key)
-        except httpx.HTTPError:
-            raise HTTPException(
-                status_code=500,
-                detail="Failed to fetch Stripe publishable key",
-            )
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -105,7 +105,3 @@ class PendingUserSnapshot(BaseModel):

 class ApproveUserRequest(BaseModel):
    email: str
-
-
-class StripePublishableKeyResponse(BaseModel):
-    publishable_key: str
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -11,7 +11,6 @@ from typing import Any
 from typing import cast
 from typing import Dict
 from typing import List
-from typing import Literal
 from typing import Optional
 from typing import Protocol
 from typing import Tuple
@@ -1457,9 +1456,6 @@ def get_default_admin_user_emails_() -> list[str]:


 STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
-STATE_TOKEN_LIFETIME_SECONDS = 3600
-CSRF_TOKEN_KEY = "csrftoken"
-CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"


 class OAuth2AuthorizeResponse(BaseModel):
@@ -1467,19 +1463,13 @@ class OAuth2AuthorizeResponse(BaseModel):


 def generate_state_token(
-    data: Dict[str, str],
-    secret: SecretType,
-    lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
+    data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
 ) -> str:
    data["aud"] = STATE_TOKEN_AUDIENCE

    return generate_jwt(data, secret, lifetime_seconds)


-def generate_csrf_token() -> str:
-    return secrets.token_urlsafe(32)
-
-
 # refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
 def create_onyx_oauth_router(
    oauth_client: BaseOAuth2,
@@ -1508,13 +1498,6 @@ def get_oauth_router(
    redirect_url: Optional[str] = None,
    associate_by_email: bool = False,
    is_verified_by_default: bool = False,
-    *,
-    csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
-    csrf_token_cookie_path: str = "/",
-    csrf_token_cookie_domain: Optional[str] = None,
-    csrf_token_cookie_secure: Optional[bool] = None,
-    csrf_token_cookie_httponly: bool = True,
-    csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
 ) -> APIRouter:
    """Generate a router with the OAuth routes."""
    router = APIRouter()
@@ -1531,9 +1514,6 @@ def get_oauth_router(
            route_name=callback_route_name,
        )

-    if csrf_token_cookie_secure is None:
-        csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
-
    @router.get(
        "/authorize",
        name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1541,10 +1521,8 @@ def get_oauth_router(
    )
    async def authorize(
        request: Request,
-        response: Response,
-        redirect: bool = Query(False),
        scopes: List[str] = Query(None),
-    ) -> Response | OAuth2AuthorizeResponse:
+    ) -> OAuth2AuthorizeResponse:
        referral_source = request.cookies.get("referral_source", None)

        if redirect_url is not None:
@@ -1554,11 +1532,9 @@ def get_oauth_router(

        next_url = request.query_params.get("next", "/")

-        csrf_token = generate_csrf_token()
        state_data: Dict[str, str] = {
            "next_url": next_url,
            "referral_source": referral_source or "default_referral",
-            CSRF_TOKEN_KEY: csrf_token,
        }
        state = generate_state_token(state_data, state_secret)

@@ -1575,31 +1551,6 @@ def get_oauth_router(
                authorization_url, {"access_type": "offline", "prompt": "consent"}
            )

-        if redirect:
-            redirect_response = RedirectResponse(authorization_url, status_code=302)
-            redirect_response.set_cookie(
-                key=csrf_token_cookie_name,
-                value=csrf_token,
-                max_age=STATE_TOKEN_LIFETIME_SECONDS,
-                path=csrf_token_cookie_path,
-                domain=csrf_token_cookie_domain,
-                secure=csrf_token_cookie_secure,
-                httponly=csrf_token_cookie_httponly,
-                samesite=csrf_token_cookie_samesite,
-            )
-            return redirect_response
-
-        response.set_cookie(
-            key=csrf_token_cookie_name,
-            value=csrf_token,
-            max_age=STATE_TOKEN_LIFETIME_SECONDS,
-            path=csrf_token_cookie_path,
-            domain=csrf_token_cookie_domain,
-            secure=csrf_token_cookie_secure,
-            httponly=csrf_token_cookie_httponly,
-            samesite=csrf_token_cookie_samesite,
-        )
-
        return OAuth2AuthorizeResponse(authorization_url=authorization_url)

    @log_function_time(print_only=True)
@@ -1649,33 +1600,7 @@ def get_oauth_router(
        try:
            state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
        except jwt.DecodeError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(
-                    ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
-                ),
-            )
-        except jwt.ExpiredSignatureError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(
-                    ErrorCode,
-                    "ACCESS_TOKEN_ALREADY_EXPIRED",
-                    "ACCESS_TOKEN_ALREADY_EXPIRED",
-                ),
-            )
-
-        cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
-        state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
-        if (
-            not cookie_csrf_token
-            or not state_csrf_token
-            or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
-        ):
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
-            )
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)

        next_url = state_data.get("next_url", "/")
        referral_source = state_data.get("referral_source", None)
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -26,13 +26,10 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
 from onyx.background.celery.celery_utils import make_probe_path
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
-from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
 from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_sqlalchemy_engine
-from onyx.document_index.opensearch.client import (
-    wait_for_opensearch_with_timeout,
-)
 from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_connector import RedisConnector
@@ -519,17 +516,15 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""

+    if ENABLE_OPENSEARCH_FOR_ONYX:
+        # TODO(andrei): Do some similar liveness checking for OpenSearch.
+        return
+
    if not wait_for_vespa_with_timeout():
-        msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
+        msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

-    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
-        if not wait_for_opensearch_with_timeout():
-            msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
-            logger.error(msg)
-            raise WorkerShutdown(msg)
-

 # File for validating worker liveness
 class LivenessProbe(bootsteps.StartStopStep):
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
                callback=callback,
            )

-            document_indices = get_all_document_indices(
+            document_index = get_default_document_index(
                index_attempt.search_settings,
                None,
                httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
-                document_indices=document_indices,
+                document_index=document_index,
                ignore_time_skip=True,  # Documents are already filtered during extraction
                db_session=db_session,
                tenant_id=tenant_id,
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.relationships import delete_document_references_from_kg
 from onyx.db.search_settings import get_active_search_settings
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_pool import get_redis_client
@@ -97,17 +97,13 @@ def document_by_cc_pair_cleanup_task(
            action = "skip"

            active_search_settings = get_active_search_settings(db_session)
-            # This flow is for updates and deletion so we get all indices.
-            document_indices = get_all_document_indices(
+            doc_index = get_default_document_index(
                active_search_settings.primary,
                active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

-            retry_document_indices: list[RetryDocumentIndex] = [
-                RetryDocumentIndex(document_index)
-                for document_index in document_indices
-            ]
+            retry_index = RetryDocumentIndex(doc_index)

            count = get_document_connector_count(db_session, document_id)
            if count == 1:
@@ -117,12 +113,11 @@ def document_by_cc_pair_cleanup_task(

                chunk_count = fetch_chunk_count_for_document(document_id, db_session)

-                for retry_document_index in retry_document_indices:
-                    _ = retry_document_index.delete_single(
-                        document_id,
-                        tenant_id=tenant_id,
-                        chunk_count=chunk_count,
-                    )
+                _ = retry_index.delete_single(
+                    document_id,
+                    tenant_id=tenant_id,
+                    chunk_count=chunk_count,
+                )

                delete_document_references_from_kg(
                    db_session=db_session,
@@ -160,18 +155,14 @@ def document_by_cc_pair_cleanup_task(
                    hidden=doc.hidden,
                )

-                for retry_document_index in retry_document_indices:
-                    # TODO(andrei): Previously there was a comment here saying
-                    # it was ok if a doc did not exist in the document index. I
-                    # don't agree with that claim, so keep an eye on this task
-                    # to see if this raises.
-                    retry_document_index.update_single(
-                        document_id,
-                        tenant_id=tenant_id,
-                        chunk_count=doc.chunk_count,
-                        fields=fields,
-                        user_fields=None,
-                    )
+                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
+                retry_index.update_single(
+                    document_id,
+                    tenant_id=tenant_id,
+                    chunk_count=doc.chunk_count,
+                    fields=fields,
+                    user_fields=None,
+                )

                # there are still other cc_pair references to the doc, so just resync to Vespa
                delete_document_by_connector_credential_pair__no_commit(
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -12,6 +12,7 @@ from retry import retry
 from sqlalchemy import select

 from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
 from onyx.configs.app_configs import MANAGED_VESPA
@@ -19,12 +20,14 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
+from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
 from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
+from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
 from onyx.connectors.file.connector import LocalFileConnector
 from onyx.connectors.models import Document
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -32,7 +35,7 @@ from onyx.db.enums import UserFileStatus
 from onyx.db.models import UserFile
 from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_active_search_settings_list
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentUserFields
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.file_store.file_store import get_default_file_store
@@ -53,6 +56,17 @@ def _user_file_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"


+def _user_file_queued_key(user_file_id: str | UUID) -> str:
+    """Key that exists while a process_single_user_file task is sitting in the queue.
+
+    The beat generator sets this with a TTL equal to CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
+    before enqueuing and the worker deletes it as its first action.  This prevents
+    the beat from adding duplicate tasks for files that already have a live task
+    in flight.
+    """
+    return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"
+
+
 def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"

@@ -116,7 +130,24 @@ def _get_document_chunk_count(
 def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
    """Scan for user files with PROCESSING status and enqueue per-file tasks.

-    Uses direct Redis locks to avoid overlapping runs.
+    Three mechanisms prevent queue runaway:
+
+    1. **Queue depth backpressure** – if the broker queue already has more than
+       USER_FILE_PROCESSING_MAX_QUEUE_DEPTH items we skip this beat cycle
+       entirely.  Workers are clearly behind; adding more tasks would only make
+       the backlog worse.
+
+    2. **Per-file queued guard** – before enqueuing a task we set a short-lived
+       Redis key (TTL = CELERY_USER_FILE_PROCESSING_TASK_EXPIRES).  If that key
+       already exists the file already has a live task in the queue, so we skip
+       it.  The worker deletes the key the moment it picks up the task so the
+       next beat cycle can re-enqueue if the file is still PROCESSING.
+
+    3. **Task expiry** – every enqueued task carries an `expires` value equal to
+       CELERY_USER_FILE_PROCESSING_TASK_EXPIRES.  If a task is still sitting in
+       the queue after that deadline, Celery discards it without touching the DB.
+       This is a belt-and-suspenders defence: even if the guard key is lost (e.g.
+       Redis restart), stale tasks evict themselves rather than piling up forever.
    """
    task_logger.info("check_user_file_processing - Starting")

@@ -131,7 +162,21 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
        return None

    enqueued = 0
+    skipped_guard = 0
    try:
+        # --- Protection 1: queue depth backpressure ---
+        r_celery = self.app.broker_connection().channel().client  # type: ignore
+        queue_len = celery_get_queue_length(
+            OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
+        )
+        if queue_len > USER_FILE_PROCESSING_MAX_QUEUE_DEPTH:
+            task_logger.warning(
+                f"check_user_file_processing - Queue depth {queue_len} exceeds "
+                f"{USER_FILE_PROCESSING_MAX_QUEUE_DEPTH}, skipping enqueue for "
+                f"tenant={tenant_id}"
+            )
+            return None
+
        with get_session_with_current_tenant() as db_session:
            user_file_ids = (
                db_session.execute(
@@ -144,12 +189,35 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
            )

            for user_file_id in user_file_ids:
-                self.app.send_task(
-                    OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
-                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
-                    queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
-                    priority=OnyxCeleryPriority.HIGH,
+                # --- Protection 2: per-file queued guard ---
+                queued_key = _user_file_queued_key(user_file_id)
+                guard_set = redis_client.set(
+                    queued_key,
+                    1,
+                    ex=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
+                    nx=True,
                )
+                if not guard_set:
+                    skipped_guard += 1
+                    continue
+
+                # --- Protection 3: task expiry ---
+                # If task submission fails, clear the guard immediately so the
+                # next beat cycle can retry enqueuing this file.
+                try:
+                    self.app.send_task(
+                        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
+                        kwargs={
+                            "user_file_id": str(user_file_id),
+                            "tenant_id": tenant_id,
+                        },
+                        queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
+                        priority=OnyxCeleryPriority.HIGH,
+                        expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
+                    )
+                except Exception:
+                    redis_client.delete(queued_key)
+                    raise
                enqueued += 1

    finally:
@@ -157,7 +225,8 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
            lock.release()

    task_logger.info(
-        f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
+        f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} "
+        f"tasks for tenant={tenant_id}"
    )
    return None

@@ -172,6 +241,12 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
    start = time.monotonic()

    redis_client = get_redis_client(tenant_id=tenant_id)
+
+    # Clear the "queued" guard set by the beat generator so that the next beat
+    # cycle can re-enqueue this file if it is still in PROCESSING state after
+    # this task completes or fails.
+    redis_client.delete(_user_file_queued_key(user_file_id))
+
    file_lock: RedisLock = redis_client.lock(
        _user_file_lock_key(user_file_id),
        timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
@@ -244,8 +319,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
                    search_settings=current_search_settings,
                )

-                # This flow is for indexing so we get all indices.
-                document_indices = get_all_document_indices(
+                document_index = get_default_document_index(
                    current_search_settings,
                    None,
                    httpx_client=HttpxPool.get("vespa"),
@@ -259,7 +333,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
                # real work happens here!
                index_pipeline_result = run_indexing_pipeline(
                    embedder=embedding_model,
-                    document_indices=document_indices,
+                    document_index=document_index,
                    ignore_time_skip=True,
                    db_session=db_session,
                    tenant_id=tenant_id,
@@ -413,16 +487,12 @@ def process_single_user_file_delete(
                httpx_init_vespa_pool(20)

            active_search_settings = get_active_search_settings(db_session)
-            # This flow is for deletion so we get all indices.
-            document_indices = get_all_document_indices(
+            document_index = get_default_document_index(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )
-            retry_document_indices: list[RetryDocumentIndex] = [
-                RetryDocumentIndex(document_index)
-                for document_index in document_indices
-            ]
+            retry_index = RetryDocumentIndex(document_index)
            index_name = active_search_settings.primary.index_name
            selection = f"{index_name}.document_id=='{user_file_id}'"

@@ -443,12 +513,11 @@ def process_single_user_file_delete(
            else:
                chunk_count = user_file.chunk_count

-            for retry_document_index in retry_document_indices:
-                retry_document_index.delete_single(
-                    doc_id=user_file_id,
-                    tenant_id=tenant_id,
-                    chunk_count=chunk_count,
-                )
+            retry_index.delete_single(
+                doc_id=user_file_id,
+                tenant_id=tenant_id,
+                chunk_count=chunk_count,
+            )

            # 2) Delete the user-uploaded file content from filestore (blob + metadata)
            file_store = get_default_file_store()
@@ -570,16 +639,12 @@ def process_single_user_file_project_sync(
                httpx_init_vespa_pool(20)

            active_search_settings = get_active_search_settings(db_session)
-            # This flow is for updates so we get all indices.
-            document_indices = get_all_document_indices(
+            doc_index = get_default_document_index(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )
-            retry_document_indices: list[RetryDocumentIndex] = [
-                RetryDocumentIndex(document_index)
-                for document_index in document_indices
-            ]
+            retry_index = RetryDocumentIndex(doc_index)

            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
            if not user_file:
@@ -589,14 +654,13 @@ def process_single_user_file_project_sync(
                return None

            project_ids = [project.id for project in user_file.projects]
-            for retry_document_index in retry_document_indices:
-                retry_document_index.update_single(
-                    doc_id=str(user_file.id),
-                    tenant_id=tenant_id,
-                    chunk_count=user_file.chunk_count,
-                    fields=None,
-                    user_fields=VespaDocumentUserFields(user_projects=project_ids),
-                )
+            retry_index.update_single(
+                doc_id=str(user_file.id),
+                tenant_id=tenant_id,
+                chunk_count=user_file.chunk_count,
+                fields=None,
+                user_fields=VespaDocumentUserFields(user_projects=project_ids),
+            )

            task_logger.info(
                f"process_single_user_file_project_sync - User file id={user_file_id}"
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
 from onyx.db.sync_record import cleanup_sync_records
 from onyx.db.sync_record import insert_sync_record
 from onyx.db.sync_record import update_sync_record_status
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_document_set import RedisDocumentSet
@@ -70,8 +70,6 @@ logger = setup_logger()

 # celery auto associates tasks created inside another task,
 # which bloats the result metadata considerably. trail=False prevents this.
-# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
-# generic *document_index*.
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
    ignore_result=True,
@@ -467,17 +465,13 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
    try:
        with get_session_with_current_tenant() as db_session:
            active_search_settings = get_active_search_settings(db_session)
-            # This flow is for updates so we get all indices.
-            document_indices = get_all_document_indices(
+            doc_index = get_default_document_index(
                search_settings=active_search_settings.primary,
                secondary_search_settings=active_search_settings.secondary,
                httpx_client=HttpxPool.get("vespa"),
            )

-            retry_document_indices: list[RetryDocumentIndex] = [
-                RetryDocumentIndex(document_index)
-                for document_index in document_indices
-            ]
+            retry_index = RetryDocumentIndex(doc_index)

            doc = get_document(document_id, db_session)
            if not doc:
@@ -506,18 +500,14 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
                    # aggregated_boost_factor=doc.aggregated_boost_factor,
                )

-                for retry_document_index in retry_document_indices:
-                    # TODO(andrei): Previously there was a comment here saying
-                    # it was ok if a doc did not exist in the document index. I
-                    # don't agree with that claim, so keep an eye on this task
-                    # to see if this raises.
-                    retry_document_index.update_single(
-                        document_id,
-                        tenant_id=tenant_id,
-                        chunk_count=doc.chunk_count,
-                        fields=fields,
-                        user_fields=None,
-                    )
+                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
+                retry_index.update_single(
+                    document_id,
+                    tenant_id=tenant_id,
+                    chunk_count=doc.chunk_count,
+                    fields=fields,
+                    user_fields=None,
+                )

                # update db last. Worst case = we crash right before this and
                # the sync might repeat again later
--- a/backend/onyx/chat/chat_state.py
+++ b/backend/onyx/chat/chat_state.py
@@ -7,7 +7,6 @@ from typing import Any

 from onyx.chat.citation_processor import CitationMapping
 from onyx.chat.emitter import Emitter
-from onyx.context.search.models import SearchDoc
 from onyx.server.query_and_chat.placement import Placement
 from onyx.server.query_and_chat.streaming_models import OverallStop
 from onyx.server.query_and_chat.streaming_models import Packet
@@ -16,11 +15,6 @@ from onyx.tools.models import ToolCallInfo
 from onyx.utils.threadpool_concurrency import run_in_background
 from onyx.utils.threadpool_concurrency import wait_on_background

-# Type alias for search doc deduplication key
-# Simple key: just document_id (str)
-# Full key: (document_id, chunk_ind, match_highlights)
-SearchDocKey = str | tuple[str, int, tuple[str, ...]]
-

 class ChatStateContainer:
    """Container for accumulating state during LLM loop execution.
@@ -46,10 +40,6 @@ class ChatStateContainer:
        # True if this turn is a clarification question (deep research flow)
        self.is_clarification: bool = False
        # Note: LLM cost tracking is now handled in multi_llm.py
-        # Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
-        self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
-        # Track which citation numbers were actually emitted during streaming
-        self._emitted_citations: set[int] = set()

    def add_tool_call(self, tool_call: ToolCallInfo) -> None:
        """Add a tool call to the accumulated state."""
@@ -101,54 +91,6 @@ class ChatStateContainer:
        with self._lock:
            return self.is_clarification

-    @staticmethod
-    def create_search_doc_key(
-        search_doc: SearchDoc, use_simple_key: bool = True
-    ) -> SearchDocKey:
-        """Create a unique key for a SearchDoc for deduplication.
-
-        Args:
-            search_doc: The SearchDoc to create a key for
-            use_simple_key: If True (default), use only document_id for deduplication.
-                If False, include chunk_ind and match_highlights so that the same
-                document/chunk with different highlights are stored separately.
-        """
-        if use_simple_key:
-            return search_doc.document_id
-        match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
-        return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
-
-    def add_search_docs(
-        self, search_docs: list[SearchDoc], use_simple_key: bool = True
-    ) -> None:
-        """Add search docs to the accumulated collection with deduplication.
-
-        Args:
-            search_docs: List of SearchDoc objects to add
-            use_simple_key: If True (default), deduplicate by document_id only.
-                If False, deduplicate by document_id + chunk_ind + match_highlights.
-        """
-        with self._lock:
-            for doc in search_docs:
-                key = self.create_search_doc_key(doc, use_simple_key)
-                if key not in self._all_search_docs:
-                    self._all_search_docs[key] = doc
-
-    def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
-        """Thread-safe getter for all accumulated search docs (returns a copy)."""
-        with self._lock:
-            return self._all_search_docs.copy()
-
-    def add_emitted_citation(self, citation_num: int) -> None:
-        """Add a citation number that was actually emitted during streaming."""
-        with self._lock:
-            self._emitted_citations.add(citation_num)
-
-    def get_emitted_citations(self) -> set[int]:
-        """Thread-safe getter for emitted citations (returns a copy)."""
-        with self._lock:
-            return self._emitted_citations.copy()
-

 def run_chat_loop_with_state_containers(
    func: Callable[..., None],
--- a/backend/onyx/chat/citation_utils.py
+++ b/backend/onyx/chat/citation_utils.py
@@ -53,50 +53,6 @@ def update_citation_processor_from_tool_response(
            citation_processor.update_citation_mapping(citation_to_doc)


-def extract_citation_order_from_text(text: str) -> list[int]:
-    """Extract citation numbers from text in order of first appearance.
-
-    Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
-    the citation numbers in the order they first appear in the text.
-
-    Args:
-        text: The text containing citations
-
-    Returns:
-        List of citation numbers in order of first appearance (no duplicates)
-    """
-    # Same pattern used in collapse_citations and DynamicCitationProcessor
-    # Group 2 captures the number in double bracket format: [[1]], 【【1】】
-    # Group 4 captures the numbers in single bracket format: [1], [1, 2]
-    citation_pattern = re.compile(
-        r"([\[【［]{2}(\d+)[\]】］]{2})|([\[【［]([\d]+(?: *, *\d+)*)[\]】］])"
-    )
-    seen: set[int] = set()
-    order: list[int] = []
-
-    for match in citation_pattern.finditer(text):
-        # Group 2 is for double bracket single number, group 4 is for single bracket
-        if match.group(2):
-            nums_str = match.group(2)
-        elif match.group(4):
-            nums_str = match.group(4)
-        else:
-            continue
-
-        for num_str in nums_str.split(","):
-            num_str = num_str.strip()
-            if num_str:
-                try:
-                    num = int(num_str)
-                    if num not in seen:
-                        seen.add(num)
-                        order.append(num)
-                except ValueError:
-                    continue
-
-    return order
-
-
 def collapse_citations(
    answer_text: str,
    existing_citation_mapping: CitationMapping,
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -45,7 +45,6 @@ from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
-from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
 from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
 from onyx.tools.tool_runner import run_tool_calls
 from onyx.tracing.framework.create import trace
@@ -454,16 +453,12 @@ def run_llm_loop(

            # The section below calculates the available tokens for history a bit more accurately
            # now that project files are loaded in.
-            if persona and persona.replace_base_system_prompt:
+            if persona and persona.replace_base_system_prompt and persona.system_prompt:
                # Handles the case where user has checked off the "Replace base system prompt" checkbox
-                system_prompt = (
-                    ChatMessageSimple(
-                        message=persona.system_prompt,
-                        token_count=token_counter(persona.system_prompt),
-                        message_type=MessageType.SYSTEM,
-                    )
-                    if persona.system_prompt
-                    else None
+                system_prompt = ChatMessageSimple(
+                    message=persona.system_prompt,
+                    token_count=token_counter(persona.system_prompt),
+                    message_type=MessageType.SYSTEM,
                )
                custom_agent_prompt_msg = None
            else:
@@ -617,7 +612,6 @@ def run_llm_loop(
                next_citation_num=citation_processor.get_next_citation_number(),
                max_concurrent_tools=None,
                skip_search_query_expansion=has_called_search_tool,
-                url_snippet_map=extract_url_snippet_map(gathered_documents or []),
            )
            tool_responses = parallel_tool_call_results.tool_responses
            citation_mapping = parallel_tool_call_results.updated_citation_mapping
@@ -656,15 +650,8 @@ def run_llm_loop(

                # Extract search_docs if this is a search tool response
                search_docs = None
-                displayed_docs = None
                if isinstance(tool_response.rich_response, SearchDocsResponse):
                    search_docs = tool_response.rich_response.search_docs
-                    displayed_docs = tool_response.rich_response.displayed_docs
-
-                    # Add ALL search docs to state container for DB persistence
-                    if search_docs:
-                        state_container.add_search_docs(search_docs)
-
                    if gathered_documents:
                        gathered_documents.extend(search_docs)
                    else:
@@ -698,7 +685,7 @@ def run_llm_loop(
                    reasoning_tokens=llm_step_result.reasoning,  # All tool calls from this loop share the same reasoning
                    tool_call_arguments=tool_call.tool_args,
                    tool_call_response=saved_response,
-                    search_docs=displayed_docs or search_docs,
+                    search_docs=search_docs,
                    generated_images=generated_images,
                )
                # Add to state container for partial save support
--- a/backend/onyx/chat/llm_step.py
+++ b/backend/onyx/chat/llm_step.py
@@ -14,7 +14,6 @@ from onyx.chat.emitter import Emitter
 from onyx.chat.models import ChatMessageSimple
 from onyx.chat.models import LlmStepResult
 from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
-from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDoc
 from onyx.file_store.models import ChatFileType
@@ -433,7 +432,7 @@ def translate_history_to_llm_format(

    for idx, msg in enumerate(history):
        # if the message is being added to the history
-        if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
+        if msg.message_type in [
            MessageType.SYSTEM,
            MessageType.USER,
            MessageType.ASSISTANT,
@@ -860,11 +859,6 @@ def run_llm_step_pkt_generator(
                                    ),
                                    obj=result,
                                )
-                                # Track emitted citation for saving
-                                if state_container:
-                                    state_container.add_emitted_citation(
-                                        result.citation_number
-                                    )
                    else:
                        # When citation_processor is None, use delta.content directly without modification
                        accumulated_answer += delta.content
@@ -991,9 +985,6 @@ def run_llm_step_pkt_generator(
                    ),
                    obj=result,
                )
-                # Track emitted citation for saving
-                if state_container:
-                    state_container.add_emitted_citation(result.citation_number)

    # Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
    # Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -42,6 +42,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import MilestoneRecordType
 from onyx.context.search.models import BaseFilters
+from onyx.context.search.models import CitationDocInfo
 from onyx.context.search.models import SearchDoc
 from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_chat_session_by_id
@@ -85,10 +86,6 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
 from onyx.utils.timing import log_function_time
-from onyx.utils.variable_functionality import (
-    fetch_versioned_implementation_with_fallback,
-)
-from onyx.utils.variable_functionality import noop_fallback
 from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()
@@ -361,21 +358,20 @@ def handle_stream_message_objects(
            event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
        )

-        # Track user message in PostHog for analytics
-        fetch_versioned_implementation_with_fallback(
-            module="onyx.utils.telemetry",
-            attribute="event_telemetry",
-            fallback=noop_fallback,
-        )(
-            distinct_id=user.email if user else tenant_id,
-            event="user_message_sent",
+        mt_cloud_telemetry(
+            tenant_id=tenant_id,
+            distinct_id=(
+                user.email
+                if user and not getattr(user, "is_anonymous", False)
+                else tenant_id
+            ),
+            event=MilestoneRecordType.USER_MESSAGE_SENT,
            properties={
                "origin": new_msg_req.origin.value,
                "has_files": len(new_msg_req.file_descriptors) > 0,
                "has_project": chat_session.project_id is not None,
                "has_persona": persona is not None and persona.id != DEFAULT_PERSONA_ID,
                "deep_research": new_msg_req.deep_research,
-                "tenant_id": tenant_id,
            },
        )

@@ -743,16 +739,27 @@ def llm_loop_completion_handle(
        else:
            final_answer = "The generation was stopped by the user."

+    # Build citation_docs_info from accumulated citations in state container
+    citation_docs_info: list[CitationDocInfo] = []
+    seen_citation_nums: set[int] = set()
+    for citation_num, search_doc in state_container.citation_to_doc.items():
+        if citation_num not in seen_citation_nums:
+            seen_citation_nums.add(citation_num)
+            citation_docs_info.append(
+                CitationDocInfo(
+                    search_doc=search_doc,
+                    citation_number=citation_num,
+                )
+            )
+
    save_chat_turn(
        message_text=final_answer,
        reasoning_tokens=state_container.reasoning_tokens,
-        citation_to_doc=state_container.citation_to_doc,
+        citation_docs_info=citation_docs_info,
        tool_calls=state_container.tool_calls,
-        all_search_docs=state_container.get_all_search_docs(),
        db_session=db_session,
        assistant_message=assistant_message,
        is_clarification=state_container.is_clarification,
-        emitted_citations=state_container.get_emitted_citations(),
    )


--- a/backend/onyx/chat/save_chat.py
+++ b/backend/onyx/chat/save_chat.py
@@ -2,9 +2,8 @@ import json

 from sqlalchemy.orm import Session

-from onyx.chat.chat_state import ChatStateContainer
-from onyx.chat.chat_state import SearchDocKey
 from onyx.configs.constants import DocumentSource
+from onyx.context.search.models import CitationDocInfo
 from onyx.context.search.models import SearchDoc
 from onyx.db.chat import add_search_docs_to_chat_message
 from onyx.db.chat import add_search_docs_to_tool_call
@@ -20,6 +19,22 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


+def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
+    """
+    Create a unique key for a SearchDoc that accounts for different versions of the same
+    document/chunk with different match_highlights.
+
+    Args:
+        search_doc: The SearchDoc pydantic model to create a key for
+
+    Returns:
+        A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
+        this specific version of the document
+    """
+    match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
+    return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
+
+
 def _create_and_link_tool_calls(
    tool_calls: list[ToolCallInfo],
    assistant_message: ChatMessage,
@@ -139,36 +154,38 @@ def save_chat_turn(
    message_text: str,
    reasoning_tokens: str | None,
    tool_calls: list[ToolCallInfo],
-    citation_to_doc: dict[int, SearchDoc],
-    all_search_docs: dict[SearchDocKey, SearchDoc],
+    citation_docs_info: list[CitationDocInfo],
    db_session: Session,
    assistant_message: ChatMessage,
    is_clarification: bool = False,
-    emitted_citations: set[int] | None = None,
 ) -> None:
    """
    Save a chat turn by populating the assistant_message and creating related entities.

    This function:
    1. Updates the ChatMessage with text, reasoning tokens, and token count
-    2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
-    3. Builds tool_call -> search_doc mapping for displayed docs
-    4. Builds citation mapping from citation_to_doc
-    5. Links all unique SearchDocs to the ChatMessage
+    2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
+    3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
+    4. Builds citation mapping from citation_docs_info
+    5. Links all unique SearchDocs from tool calls to the ChatMessage
    6. Creates ToolCall entries and links SearchDocs to them
    7. Builds the citations mapping for the ChatMessage

+    Deduplication Logic:
+    - SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
+    - This ensures that the same document/chunk with different match_highlights (from different
+      queries) are stored as separate SearchDoc entries
+    - Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
+      matches its specific query highlights
+
    Args:
        message_text: The message content to save
        reasoning_tokens: Optional reasoning tokens for the message
        tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
-        citation_to_doc: Mapping from citation number to SearchDoc for building citations
-        all_search_docs: Pre-deduplicated search docs from ChatStateContainer
+        citation_docs_info: List of citation document information for building citations mapping
        db_session: Database session for persistence
        assistant_message: The ChatMessage object to populate (should already exist in DB)
        is_clarification: Whether this assistant message is a clarification question (deep research flow)
-        emitted_citations: Set of citation numbers that were actually emitted during streaming.
-            If provided, only citations in this set will be saved; others are filtered out.
    """
    # 1. Update ChatMessage with message content, reasoning tokens, and token count
    assistant_message.message = message_text
@@ -183,53 +200,53 @@ def save_chat_turn(
    else:
        assistant_message.token_count = 0

-    # 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
-    search_doc_key_to_id: dict[SearchDocKey, int] = {}
-    for key, search_doc_py in all_search_docs.items():
-        db_search_doc = create_db_search_doc(
-            server_search_doc=search_doc_py,
-            db_session=db_session,
-            commit=False,
-        )
-        search_doc_key_to_id[key] = db_search_doc.id
-
-    # 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
+    # 2. Create SearchDoc entries from tool_calls
+    # Build mapping from SearchDoc to DB SearchDoc ID
+    # Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
+    # while ensuring different versions with different highlights are stored separately
+    search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
    tool_call_to_search_doc_ids: dict[str, list[int]] = {}
+
+    # Process tool calls and their search docs
    for tool_call_info in tool_calls:
        if tool_call_info.search_docs:
            search_doc_ids_for_tool: list[int] = []
            for search_doc_py in tool_call_info.search_docs:
-                key = ChatStateContainer.create_search_doc_key(search_doc_py)
-                if key in search_doc_key_to_id:
-                    search_doc_ids_for_tool.append(search_doc_key_to_id[key])
+                # Create a unique key for this SearchDoc version
+                search_doc_key = _create_search_doc_key(search_doc_py)
+
+                # Check if we've already created this exact SearchDoc version
+                if search_doc_key in search_doc_key_to_id:
+                    search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
                else:
-                    # Displayed doc not in all_search_docs - create it
-                    # This can happen if displayed_docs contains docs not in search_docs
+                    # Create new DB SearchDoc entry
                    db_search_doc = create_db_search_doc(
                        server_search_doc=search_doc_py,
                        db_session=db_session,
                        commit=False,
                    )
-                    search_doc_key_to_id[key] = db_search_doc.id
+                    search_doc_key_to_id[search_doc_key] = db_search_doc.id
                    search_doc_ids_for_tool.append(db_search_doc.id)
+
            tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
                set(search_doc_ids_for_tool)
            )

-    # Collect all search doc IDs for ChatMessage linking
-    all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())
+    # 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
+    # Use a set to deduplicate by ID (since we've already deduplicated by key above)
+    all_search_doc_ids_set: set[int] = set()
+    for search_doc_ids in tool_call_to_search_doc_ids.values():
+        all_search_doc_ids_set.update(search_doc_ids)

-    # 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
-    # Only include citations that were actually emitted during streaming
+    # 4. Build citation mapping from citation_docs_info
    citation_number_to_search_doc_id: dict[int, int] = {}

-    for citation_num, search_doc_py in citation_to_doc.items():
-        # Skip citations that weren't actually emitted (if emitted_citations is provided)
-        if emitted_citations is not None and citation_num not in emitted_citations:
-            continue
+    for citation_doc_info in citation_docs_info:
+        # Extract SearchDoc pydantic model
+        search_doc_py = citation_doc_info.search_doc

        # Create the unique key for this SearchDoc version
-        search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)
+        search_doc_key = _create_search_doc_key(search_doc_py)

        # Get the search doc ID (should already exist from processing tool_calls)
        if search_doc_key in search_doc_key_to_id:
@@ -266,7 +283,10 @@ def save_chat_turn(
                all_search_doc_ids_set.add(db_search_doc_id)

        # Build mapping from citation number to search doc ID
-        citation_number_to_search_doc_id[citation_num] = db_search_doc_id
+        if citation_doc_info.citation_number is not None:
+            citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
+                db_search_doc_id
+            )

    # 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
    final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
@@ -286,10 +306,23 @@ def save_chat_turn(
        tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
    )

-    # 7. Build citations mapping - use the mapping we already built in step 4
-    assistant_message.citations = (
-        citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
-    )
+    # 7. Build citations mapping from citation_docs_info
+    # Any citation_doc_info with a citation_number appeared in the text and should be mapped
+    citations: dict[int, int] = {}
+    for citation_doc_info in citation_docs_info:
+        if citation_doc_info.citation_number is not None:
+            search_doc_id = citation_number_to_search_doc_id.get(
+                citation_doc_info.citation_number
+            )
+            if search_doc_id is not None:
+                citations[citation_doc_info.citation_number] = search_doc_id
+            else:
+                logger.warning(
+                    f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
+                    f"but no matching search doc ID in mapping"
+                )
+
+    assistant_message.citations = citations if citations else None

    # Finally save the messages, tool calls, and docs
    db_session.commit()
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -208,19 +208,8 @@ OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 920
 OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
 OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")

-# This is the "base" config for now, the idea is that at least for our dev
-# environments we always want to be dual indexing into both OpenSearch and Vespa
-# to stress test the new codepaths. Only enable this if there is some instance
-# of OpenSearch running for the relevant Onyx instance.
-ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
-    os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
-)
-# Given that the "base" config above is true, this enables whether we want to
-# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
-# in the event we see issues with OpenSearch retrieval in our dev environments.
-ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
-    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
-    and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
+ENABLE_OPENSEARCH_FOR_ONYX = (
+    os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
 )

 VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -749,10 +738,6 @@ JOB_TIMEOUT = 60 * 60 * 6  # 6 hours default
 LOG_ONYX_MODEL_INTERACTIONS = (
    os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
 )
-
-PROMPT_CACHE_CHAT_HISTORY = (
-    os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
-)
 # If set to `true` will enable additional logs about Vespa query performance
 # (time spent on finding the right docs + time spent fetching summaries from disk)
 LOG_VESPA_TIMING_INFORMATION = (
@@ -1031,14 +1016,3 @@ INSTANCE_TYPE = (
 ## Discord Bot Configuration
 DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
 DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
-
-
-## Stripe Configuration
-# URL to fetch the Stripe publishable key from a public S3 bucket.
-# Publishable keys are safe to expose publicly - they can only initialize
-# Stripe.js and tokenize payment info, not make charges or access data.
-STRIPE_PUBLISHABLE_KEY_URL = (
-    "https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
-)
-# Override for local testing with Stripe test keys (pk_test_*)
-STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -1,5 +1,6 @@
 import os

+INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
 PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
 PERSONAS_YAML = "./onyx/seeding/personas.yaml"
 NUM_RETURNED_HITS = 50
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -153,6 +153,17 @@ CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300  # 5 min

 CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT = 30 * 60  # 30 minutes (in seconds)

+# How long a queued user-file task is valid before workers discard it.
+# Should be longer than the beat interval (20 s) but short enough to prevent
+# indefinite queue growth.  Workers drop tasks older than this without touching
+# the DB, so a shorter value = faster drain of stale duplicates.
+CELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60  # 1 minute (in seconds)
+
+# Maximum number of tasks allowed in the user-file-processing queue before the
+# beat generator stops adding more.  Prevents unbounded queue growth when workers
+# fall behind.
+USER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500
+
 CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)

 DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
@@ -341,6 +352,7 @@ class MilestoneRecordType(str, Enum):
    CREATED_CONNECTOR = "created_connector"
    CONNECTOR_SUCCEEDED = "connector_succeeded"
    RAN_QUERY = "ran_query"
+    USER_MESSAGE_SENT = "user_message_sent"
    MULTIPLE_ASSISTANTS = "multiple_assistants"
    CREATED_ASSISTANT = "created_assistant"
    CREATED_ONYX_BOT = "created_onyx_bot"
@@ -423,6 +435,9 @@ class OnyxRedisLocks:
    # User file processing
    USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
    USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
+    # Short-lived key set when a task is enqueued; cleared when the worker picks it up.
+    # Prevents the beat from re-enqueuing the same file while a task is already queued.
+    USER_FILE_QUEUED_PREFIX = "da_lock:user_file_queued"
    USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
    USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
    USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
--- a/backend/onyx/connectors/asana/connector.py
+++ b/backend/onyx/connectors/asana/connector.py
@@ -25,11 +25,17 @@ class AsanaConnector(LoadConnector, PollConnector):
        batch_size: int = INDEX_BATCH_SIZE,
        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
    ) -> None:
-        self.workspace_id = asana_workspace_id
-        self.project_ids_to_index: list[str] | None = (
-            asana_project_ids.split(",") if asana_project_ids is not None else None
-        )
-        self.asana_team_id = asana_team_id
+        self.workspace_id = asana_workspace_id.strip()
+        if asana_project_ids:
+            project_ids = [
+                project_id.strip()
+                for project_id in asana_project_ids.split(",")
+                if project_id.strip()
+            ]
+            self.project_ids_to_index = project_ids or None
+        else:
+            self.project_ids_to_index = None
+        self.asana_team_id = (asana_team_id.strip() or None) if asana_team_id else None
        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure
        logger.info(
--- a/backend/onyx/context/search/federated/slack_search_utils.py
+++ b/backend/onyx/context/search/federated/slack_search_utils.py
@@ -15,7 +15,6 @@ from onyx.federated_connectors.slack.models import SlackEntities
 from onyx.llm.interfaces import LLM
 from onyx.llm.models import UserMessage
 from onyx.llm.utils import llm_response_to_string
-from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
 from onyx.onyxbot.slack.models import ChannelType
 from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
 from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
@@ -114,7 +113,7 @@ def is_recency_query(query: str) -> bool:
    if not has_recency_keyword:
        return False

-    # Get combined stop words (English + Slack-specific)
+    # Get combined stop words (NLTK + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    # Extract content words (excluding stop words)
@@ -489,7 +488,7 @@ def build_channel_override_query(channel_references: set[str], time_filter: str)
    return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"


-# Slack-specific stop words (in addition to standard English stop words)
+# Slack-specific stop words (in addition to standard NLTK stop words)
 # These include Slack-specific terms and temporal/recency keywords
 SLACK_SPECIFIC_STOP_WORDS = frozenset(
    RECENCY_KEYWORDS
@@ -509,16 +508,27 @@ SLACK_SPECIFIC_STOP_WORDS = frozenset(
 )


-def _get_combined_stop_words() -> frozenset[str]:
-    """Get combined English + Slack-specific stop words.
+def _get_combined_stop_words() -> set[str]:
+    """Get combined NLTK + Slack-specific stop words.

-    Returns a frozenset of stop words for filtering content words.
+    Returns a set of stop words for filtering content words.
+    Falls back to just Slack-specific stop words if NLTK is unavailable.

    Note: Currently only supports English stop words. Non-English queries
    may have suboptimal content word extraction. Future enhancement could
    detect query language and load appropriate stop words.
    """
-    return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS
+    try:
+        from nltk.corpus import stopwords  # type: ignore
+
+        # TODO: Support multiple languages - currently hardcoded to English
+        # Could detect language or allow configuration
+        nltk_stop_words = set(stopwords.words("english"))
+    except Exception:
+        # Fallback if NLTK not available
+        nltk_stop_words = set()
+
+    return nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS


 def extract_content_words_from_recency_query(
@@ -526,7 +536,7 @@ def extract_content_words_from_recency_query(
 ) -> list[str]:
    """Extract meaningful content words from a recency query.

-    Filters out English stop words, Slack-specific terms, channel references, and proper nouns.
+    Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.

    Args:
        query_text: The user's query text
@@ -535,7 +545,7 @@ def extract_content_words_from_recency_query(
    Returns:
        List of content words (up to MAX_CONTENT_WORDS)
    """
-    # Get combined stop words (English + Slack-specific)
+    # Get combined stop words (NLTK + Slack-specific)
    all_stop_words = _get_combined_stop_words()

    words = query_text.split()
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -144,6 +144,10 @@ class BasicChunkRequest(BaseModel):
    # In case some queries favor recency more than other queries.
    recency_bias_multiplier: float = 1.0

+    # Sometimes we may want to extract specific keywords from a more semantic query for
+    # a better keyword search.
+    query_keywords: list[str] | None = None  # Not used currently
+
    limit: int | None = None
    offset: int | None = None  # This one is not set currently

@@ -162,8 +166,6 @@ class ChunkIndexRequest(BasicChunkRequest):
    # Calculated final filters
    filters: IndexFilters

-    query_keywords: list[str] | None = None
-

 class ContextExpansionType(str, Enum):
    NOT_RELEVANT = "not_relevant"
@@ -370,10 +372,6 @@ class SearchDocsResponse(BaseModel):
    # document id is  the most staightforward way.
    citation_mapping: dict[int, str]

-    # For cases where the frontend only needs to display a subset of the search docs
-    # The whole list is typically still needed for later steps but this set should be saved separately
-    displayed_docs: list[SearchDoc] | None = None
-

 class SavedSearchDoc(SearchDoc):
    db_doc_id: int
@@ -432,6 +430,11 @@ class SavedSearchDoc(SearchDoc):
        return self_score < other_score


+class CitationDocInfo(BaseModel):
+    search_doc: SearchDoc
+    citation_number: int | None
+
+
 class SavedSearchDocWithContent(SavedSearchDoc):
    """Used for endpoints that need to return the actual contents of the retrieved
    section in addition to the match_highlights."""
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -19,7 +19,6 @@ from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.llm.interfaces import LLM
-from onyx.natural_language_processing.english_stopwords import strip_stopwords
 from onyx.secondary_llm_flows.source_filter import extract_source_filter
 from onyx.secondary_llm_flows.time_filter import extract_time_filter
 from onyx.utils.logger import setup_logger
@@ -279,16 +278,12 @@ def search_pipeline(
        bypass_acl=chunk_search_request.bypass_acl,
    )

-    query_keywords = strip_stopwords(chunk_search_request.query)
-
    query_request = ChunkIndexRequest(
        query=chunk_search_request.query,
        hybrid_alpha=chunk_search_request.hybrid_alpha,
        recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
-        query_keywords=query_keywords,
+        query_keywords=chunk_search_request.query_keywords,
        filters=filters,
-        limit=chunk_search_request.limit,
-        offset=chunk_search_request.offset,
    )

    retrieved_chunks = search_chunks(
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -23,6 +23,45 @@ from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 logger = setup_logger()


+def _dedupe_chunks(
+    chunks: list[InferenceChunk],
+) -> list[InferenceChunk]:
+    used_chunks: dict[tuple[str, int], InferenceChunk] = {}
+    for chunk in chunks:
+        key = (chunk.document_id, chunk.chunk_id)
+        if key not in used_chunks:
+            used_chunks[key] = chunk
+        else:
+            stored_chunk_score = used_chunks[key].score or 0
+            this_chunk_score = chunk.score or 0
+            if stored_chunk_score < this_chunk_score:
+                used_chunks[key] = chunk
+
+    return list(used_chunks.values())
+
+
+def download_nltk_data() -> None:
+    import nltk  # type: ignore[import-untyped]
+
+    resources = {
+        "stopwords": "corpora/stopwords",
+        # "wordnet": "corpora/wordnet",  # Not in use
+        "punkt_tab": "tokenizers/punkt_tab",
+    }
+
+    for resource_name, resource_path in resources.items():
+        try:
+            nltk.data.find(resource_path)
+            logger.info(f"{resource_name} is already downloaded.")
+        except LookupError:
+            try:
+                logger.info(f"Downloading {resource_name}...")
+                nltk.download(resource_name, quiet=True)
+                logger.info(f"{resource_name} downloaded successfully.")
+            except Exception as e:
+                logger.error(f"Failed to download {resource_name}. Error: {e}")
+
+
 def combine_retrieval_results(
    chunk_sets: list[list[InferenceChunk]],
 ) -> list[InferenceChunk]:
--- a/backend/onyx/db/input_prompt.py
+++ b/backend/onyx/db/input_prompt.py
@@ -3,8 +3,6 @@ from uuid import UUID
 from fastapi import HTTPException
 from sqlalchemy import or_
 from sqlalchemy import select
-from sqlalchemy.dialects.postgresql import insert as pg_insert
-from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import aliased
 from sqlalchemy.orm import Session

@@ -20,6 +18,45 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


+def insert_input_prompt_if_not_exists(
+    user: User | None,
+    input_prompt_id: int | None,
+    prompt: str,
+    content: str,
+    active: bool,
+    is_public: bool,
+    db_session: Session,
+    commit: bool = True,
+) -> InputPrompt:
+    if input_prompt_id is not None:
+        input_prompt = (
+            db_session.query(InputPrompt).filter_by(id=input_prompt_id).first()
+        )
+    else:
+        query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt)
+        if user:
+            query = query.filter(InputPrompt.user_id == user.id)
+        else:
+            query = query.filter(InputPrompt.user_id.is_(None))
+        input_prompt = query.first()
+
+    if input_prompt is None:
+        input_prompt = InputPrompt(
+            id=input_prompt_id,
+            prompt=prompt,
+            content=content,
+            active=active,
+            is_public=is_public or user is None,
+            user_id=user.id if user else None,
+        )
+        db_session.add(input_prompt)
+
+    if commit:
+        db_session.commit()
+
+    return input_prompt
+
+
 def insert_input_prompt(
    prompt: str,
    content: str,
@@ -27,41 +64,16 @@ def insert_input_prompt(
    user: User | None,
    db_session: Session,
 ) -> InputPrompt:
-    user_id = user.id if user else None
-
-    # Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
-    # to avoid race conditions with the uniqueness check
-    stmt = pg_insert(InputPrompt).values(
+    input_prompt = InputPrompt(
        prompt=prompt,
        content=content,
        active=True,
        is_public=is_public,
-        user_id=user_id,
+        user_id=user.id if user is not None else None,
    )
-
-    # Use the appropriate constraint based on whether this is a user-owned or public prompt
-    if user_id is not None:
-        stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
-    else:
-        # Partial unique indexes cannot be targeted by constraint name;
-        # must use index_elements + index_where
-        stmt = stmt.on_conflict_do_nothing(
-            index_elements=[InputPrompt.prompt],
-            index_where=InputPrompt.user_id.is_(None),
-        )
-
-    stmt = stmt.returning(InputPrompt)
-
-    result = db_session.execute(stmt)
-    input_prompt = result.scalar_one_or_none()
-
-    if input_prompt is None:
-        raise HTTPException(
-            status_code=409,
-            detail=f"A prompt shortcut with the name '{prompt}' already exists",
-        )
-
+    db_session.add(input_prompt)
    db_session.commit()
+
    return input_prompt


@@ -86,40 +98,23 @@ def update_input_prompt(
    input_prompt.content = content
    input_prompt.active = active

-    try:
-        db_session.commit()
-    except IntegrityError:
-        db_session.rollback()
-        raise HTTPException(
-            status_code=409,
-            detail=f"A prompt shortcut with the name '{prompt}' already exists",
-        )
-
+    db_session.commit()
    return input_prompt


 def validate_user_prompt_authorization(
    user: User | None, input_prompt: InputPrompt
 ) -> bool:
-    """
-    Check if the user is authorized to modify the given input prompt.
-    Returns True only if the user owns the prompt.
-    Returns False for public prompts (only admins can modify those),
-    unless auth is disabled (then anyone can manage public prompts).
-    """
    prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)

-    # Public prompts cannot be modified via the user API (unless auth is disabled)
-    if prompt.is_public or prompt.user_id is None:
-        return AUTH_TYPE == AuthType.DISABLED
+    if prompt.user_id is not None:
+        if user is None:
+            return False

-    # User must be logged in
-    if user is None:
-        return False
-
-    # User must own the prompt
-    user_details = UserInfo.from_model(user)
-    return str(user_details.id) == str(prompt.user_id)
+        user_details = UserInfo.from_model(user)
+        if str(user_details.id) != str(prompt.user_id):
+            return False
+    return True


 def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:
--- a/backend/onyx/db/memory.py
+++ b/backend/onyx/db/memory.py
@@ -9,9 +9,6 @@ def get_memories(user: User | None, db_session: Session) -> list[str]:
    if user is None:
        return []

-    if not user.use_memories:
-        return []
-
    user_info = [
        f"User's name: {user.personal_name}" if user.personal_name else "",
        f"User's role: {user.personal_role}" if user.personal_role else "",
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -188,7 +188,6 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
        nullable=True,
        default=None,
    )
-    chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
    # personalization fields are exposed via the chat user settings "Personalization" tab
    personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
    personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
@@ -2933,8 +2932,6 @@ class PersonaLabel(Base):
        "Persona",
        secondary=Persona__PersonaLabel.__table__,
        back_populates="labels",
-        cascade="all, delete-orphan",
-        single_parent=True,
    )


@@ -3627,18 +3624,6 @@ class InputPrompt(Base):
        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
    )

-    __table_args__ = (
-        # Unique constraint on (prompt, user_id) for user-owned prompts
-        UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
-        # Partial unique index for public prompts (user_id IS NULL)
-        Index(
-            "uq_inputprompt_prompt_public",
-            "prompt",
-            unique=True,
-            postgresql_where=text("user_id IS NULL"),
-        ),
-    )
-

 class InputPrompt__User(Base):
    __tablename__ = "inputprompt__user"
@@ -3647,7 +3632,7 @@ class InputPrompt__User(Base):
        ForeignKey("inputprompt.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id"), primary_key=True
+        ForeignKey("inputprompt.id"), primary_key=True
    )
    disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -917,7 +917,9 @@ def upsert_persona(
        existing_persona.icon_name = icon_name
        existing_persona.is_visible = is_visible
        existing_persona.search_start_date = search_start_date
-        existing_persona.labels = labels or []
+        if label_ids is not None:
+            existing_persona.labels.clear()
+            existing_persona.labels = labels or []
        existing_persona.is_default_persona = (
            is_default_persona
            if is_default_persona is not None
--- a/backend/onyx/db/swap_index.py
+++ b/backend/onyx/db/swap_index.py
@@ -20,7 +20,7 @@ from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_search_settings_status
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.key_value_store.factory import get_kv_store
 from onyx.utils.logger import setup_logger

@@ -80,43 +80,39 @@ def _perform_index_swap(
        db_session=db_session,
    )

-    # This flow is for checking and possibly creating an index so we get all
-    # indices.
-    document_indices = get_all_document_indices(new_search_settings, None, None)
+    # remove the old index from the vector db
+    document_index = get_default_document_index(new_search_settings, None)

    WAIT_SECONDS = 5

-    for document_index in document_indices:
-        success = False
-        for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
-            try:
-                logger.notice(
-                    f"Document index {document_index.__class__.__name__} swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
-                )
-                document_index.ensure_indices_exist(
-                    primary_embedding_dim=new_search_settings.final_embedding_dim,
-                    primary_embedding_precision=new_search_settings.embedding_precision,
-                    # just finished swap, no more secondary index
-                    secondary_index_embedding_dim=None,
-                    secondary_index_embedding_precision=None,
-                )
-
-                logger.notice("Document index swap complete.")
-                success = True
-                break
-            except Exception:
-                logger.exception(
-                    f"Document index swap for {document_index.__class__.__name__} did not succeed. "
-                    f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
-                )
-                time.sleep(WAIT_SECONDS)
-
-        if not success:
-            logger.error(
-                f"Document index swap for {document_index.__class__.__name__} did not succeed. "
-                f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
+    success = False
+    for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
+        try:
+            logger.notice(
+                f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
            )
-            return None
+            document_index.ensure_indices_exist(
+                primary_embedding_dim=new_search_settings.final_embedding_dim,
+                primary_embedding_precision=new_search_settings.embedding_precision,
+                # just finished swap, no more secondary index
+                secondary_index_embedding_dim=None,
+                secondary_index_embedding_precision=None,
+            )
+
+            logger.notice("Vespa index swap complete.")
+            success = True
+            break
+        except Exception:
+            logger.exception(
+                f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
+            )
+            time.sleep(WAIT_SECONDS)
+
+    if not success:
+        logger.error(
+            f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
+        )
+        return None

    return current_search_settings

--- a/backend/onyx/db/user_preferences.py
+++ b/backend/onyx/db/user_preferences.py
@@ -139,20 +139,6 @@ def update_user_theme_preference(
    db_session.commit()


-def update_user_chat_background(
-    user_id: UUID,
-    chat_background: str | None,
-    db_session: Session,
-) -> None:
-    """Update user's chat background setting."""
-    db_session.execute(
-        update(User)
-        .where(User.id == user_id)  # type: ignore
-        .values(chat_background=chat_background)
-    )
-    db_session.commit()
-
-
 def update_user_personalization(
    user_id: UUID,
    *,
--- a/backend/onyx/deep_research/dr_loop.py
+++ b/backend/onyx/deep_research/dr_loop.py
@@ -287,7 +287,6 @@ def run_deep_research_llm_loop(
                token_count=100,
                message_type=MessageType.USER,
            )
-
            truncated_message_history = construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
--- a/backend/onyx/document_index/chunk_content_enrichment.py
+++ b/backend/onyx/document_index/chunk_content_enrichment.py
@@ -2,18 +2,13 @@ from onyx.configs.app_configs import BLURB_SIZE
 from onyx.configs.constants import RETURN_SEPARATOR
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceChunkUncleaned
-from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk


-def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
+def generate_enriched_content_for_chunk(chunk: DocMetadataAwareIndexChunk) -> str:
    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"


-def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
-    return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
-
-
 def cleanup_content_for_chunks(
    chunks: list[InferenceChunkUncleaned],
 ) -> list[InferenceChunk]:
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -1,8 +1,9 @@
 import httpx
+from sqlalchemy.orm import Session

-from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
-from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
 from onyx.db.models import SearchSettings
+from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
@@ -16,24 +17,17 @@ def get_default_document_index(
    secondary_search_settings: SearchSettings | None,
    httpx_client: httpx.Client | None = None,
 ) -> DocumentIndex:
-    """Gets the default document index from env vars.
+    """Primary index is the index that is used for querying/updating etc.
+    Secondary index is for when both the currently used index and the upcoming
+    index both need to be updated, updates are applied to both indices"""

-    To be used for retrieval only. Indexing should be done through both indices
-    until Vespa is deprecated.
-
-    Pre-existing docstring for this function, although secondary indices are not
-    currently supported:
-    Primary index is the index that is used for querying/updating etc. Secondary
-    index is for when both the currently used index and the upcoming index both
-    need to be updated, updates are applied to both indices.
-    """
    secondary_index_name: str | None = None
    secondary_large_chunks_enabled: bool | None = None
    if secondary_search_settings:
        secondary_index_name = secondary_search_settings.index_name
        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled

-    if ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX:
+    if ENABLE_OPENSEARCH_FOR_ONYX:
        return OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            secondary_index_name=secondary_index_name,
@@ -53,48 +47,12 @@ def get_default_document_index(
        )


-def get_all_document_indices(
-    search_settings: SearchSettings,
-    secondary_search_settings: SearchSettings | None,
-    httpx_client: httpx.Client | None = None,
-) -> list[DocumentIndex]:
-    """Gets all document indices.
-
-    NOTE: Will only return an OpenSearch index interface if
-    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
-    where we know it won't be enabled.
-
-    Used for indexing only. Until Vespa is deprecated we will index into both
-    document indices. Retrieval is done through only one index however.
-
-    Large chunks and secondary indices are not currently supported so we
-    hardcode appropriate values.
+def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
    """
-    vespa_document_index = VespaIndex(
-        index_name=search_settings.index_name,
-        secondary_index_name=(
-            secondary_search_settings.index_name if secondary_search_settings else None
-        ),
-        large_chunks_enabled=search_settings.large_chunks_enabled,
-        secondary_large_chunks_enabled=(
-            secondary_search_settings.large_chunks_enabled
-            if secondary_search_settings
-            else None
-        ),
-        multitenant=MULTI_TENANT,
-        httpx_client=httpx_client,
+    TODO: Use redis to cache this or something
+    """
+    search_settings = get_current_search_settings(db_session)
+    return get_default_document_index(
+        search_settings,
+        None,
    )
-    opensearch_document_index: OpenSearchOldDocumentIndex | None = None
-    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
-        opensearch_document_index = OpenSearchOldDocumentIndex(
-            index_name=search_settings.index_name,
-            secondary_index_name=None,
-            large_chunks_enabled=False,
-            secondary_large_chunks_enabled=None,
-            multitenant=MULTI_TENANT,
-            httpx_client=httpx_client,
-        )
-    result: list[DocumentIndex] = [vespa_document_index]
-    if opensearch_document_index:
-        result.append(opensearch_document_index)
-    return result
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -1,5 +1,4 @@
 import logging
-import time
 from typing import Any
 from typing import Generic
 from typing import TypeVar
@@ -570,9 +569,6 @@ class OpenSearchClient:
    def close(self) -> None:
        """Closes the client.

-        TODO(andrei): Can we have some way to auto close when the client no
-        longer has any references?
-
        Raises:
            Exception: There was an error closing the client.
        """
@@ -600,55 +596,3 @@ class OpenSearchClient:
            )
        hits_second_layer: list[Any] = hits_first_layer.get("hits", [])
        return hits_second_layer
-
-
-def wait_for_opensearch_with_timeout(
-    wait_interval_s: int = 5,
-    wait_limit_s: int = 60,
-    client: OpenSearchClient | None = None,
-) -> bool:
-    """Waits for OpenSearch to become ready subject to a timeout.
-
-    Will create a new dummy client if no client is provided. Will close this
-    client at the end of the function. Will not close the client if it was
-    supplied.
-
-    Args:
-        wait_interval_s: The interval in seconds to wait between checks.
-            Defaults to 5.
-        wait_limit_s: The total timeout in seconds to wait for OpenSearch to
-            become ready. Defaults to 60.
-        client: The OpenSearch client to use for pinging. If None, a new dummy
-            client will be created. Defaults to None.
-
-    Returns:
-        True if OpenSearch is ready, False otherwise.
-    """
-    made_client = False
-    try:
-        if client is None:
-            # NOTE: index_name does not matter because we are only using this object
-            # to ping.
-            # TODO(andrei): Make this better.
-            client = OpenSearchClient(index_name="")
-            made_client = True
-        time_start = time.monotonic()
-        while True:
-            if client.ping():
-                logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
-                return True
-            time_elapsed = time.monotonic() - time_start
-            if time_elapsed > wait_limit_s:
-                logger.info(
-                    f"[OpenSearch] Readiness probe did not succeed within the timeout "
-                    f"({wait_limit_s} seconds)."
-                )
-                return False
-            logger.info(
-                f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
-            )
-            time.sleep(wait_interval_s)
-    finally:
-        if made_client:
-            assert client is not None
-            client.close()
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -17,7 +17,7 @@ from onyx.db.enums import EmbeddingPrecision
 from onyx.db.models import DocumentSource
 from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
 from onyx.document_index.chunk_content_enrichment import (
-    generate_enriched_content_for_chunk_text,
+    generate_enriched_content_for_chunk,
 )
 from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
 from onyx.document_index.interfaces import (
@@ -140,12 +140,9 @@ def _convert_onyx_chunk_to_opensearch_document(
    return DocumentChunk(
        document_id=chunk.source_document.id,
        chunk_index=chunk.chunk_id,
-        # Use get_title_for_document_index to match the logic used when creating
-        # the title_embedding in the embedder. This method falls back to
-        # semantic_identifier when title is None (but not empty string).
-        title=chunk.source_document.get_title_for_document_index(),
+        title=chunk.source_document.title,
        title_vector=chunk.title_embedding,
-        content=generate_enriched_content_for_chunk_text(chunk),
+        content=generate_enriched_content_for_chunk(chunk),
        content_vector=chunk.embeddings.full_embedding,
        source_type=chunk.source_document.source.value,
        metadata_list=chunk.source_document.get_metadata_str_attributes(),
@@ -424,24 +421,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
    def verify_and_create_index_if_necessary(
        self, embedding_dim: int, embedding_precision: EmbeddingPrecision
    ) -> None:
-        """Verifies and creates the index if necessary.
-
-        Also puts the desired search pipeline state, creating the pipelines if
-        they do not exist and updating them otherwise.
-
-        Args:
-            embedding_dim: Vector dimensionality for the vector similarity part
-                of the search.
-            embedding_precision: Precision of the values of the vectors for the
-                similarity part of the search.
-
-        Raises:
-            RuntimeError: There was an error verifying or creating the index or
-                search pipelines.
-        """
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary."
-        )
        expected_mappings = DocumentSchema.get_document_schema(
            embedding_dim, self._tenant_state.multitenant
        )
@@ -471,9 +450,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
-        )
        # Set of doc IDs.
        unique_docs_to_be_indexed: set[str] = set()
        document_indexing_results: list[DocumentInsertionRecord] = []
@@ -518,8 +494,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
    def delete(self, document_id: str, chunk_count: int | None = None) -> int:
        """Deletes all chunks for a given document.

-        Does nothing if the specified document ID does not exist.
-
        TODO(andrei): Make this method require supplying source type.
        TODO(andrei): Consider implementing this method to delete on document
        chunk IDs vs querying for matching document chunks.
@@ -536,9 +510,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
        Returns:
            The number of chunks successfully deleted.
        """
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
-        )
        query_body = DocumentQuery.delete_from_document_id_query(
            document_id=document_id,
            tenant_state=self._tenant_state,
@@ -552,7 +523,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
    ) -> None:
        """Updates some set of chunks.

-        NOTE: Will raise if the specified document chunks do not exist.
        NOTE: Requires document chunk count be known; will raise if it is not.
        NOTE: Each update request must have some field to update; if not it is
        assumed there is a bug in the caller and this will raise.
@@ -569,9 +539,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
            RuntimeError: Failed to update some or all of the chunks for the
                specified documents.
        """
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
-        )
        for update_request in update_requests:
            properties_to_update: dict[str, Any] = dict()
            # TODO(andrei): Nit but consider if we can use DocumentChunk
@@ -637,9 +604,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
        TODO(andrei): Consider implementing this method to retrieve on document
        chunk IDs vs querying for matching document chunks.
        """
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
-        )
        results: list[InferenceChunk] = []
        for chunk_request in chunk_requests:
            search_hits: list[SearchHit[DocumentChunk]] = []
@@ -679,9 +643,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
        num_to_retrieve: int,
        offset: int = 0,
    ) -> list[InferenceChunk]:
-        logger.debug(
-            f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
-        )
        query_body = DocumentQuery.get_hybrid_search_query(
            query_text=query,
            query_vector=query_embedding,
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -17,7 +17,7 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
 )
 from onyx.document_index.chunk_content_enrichment import (
-    generate_enriched_content_for_chunk_text,
+    generate_enriched_content_for_chunk,
 )
 from onyx.document_index.document_index_utils import get_uuid_from_chunk
 from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
@@ -186,7 +186,7 @@ def _index_vespa_chunk(
        # For the BM25 index, the keyword suffix is used, the vector is already generated with the more
        # natural language representation of the metadata section
        CONTENT: remove_invalid_unicode_chars(
-            generate_enriched_content_for_chunk_text(chunk)
+            generate_enriched_content_for_chunk(chunk)
        ),
        # This duplication of `content` is needed for keyword highlighting
        # Note that it's not exactly the same as the actual content
--- a/backend/onyx/indexing/embedder.py
+++ b/backend/onyx/indexing/embedder.py
@@ -7,9 +7,6 @@ from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorStopSignal
 from onyx.connectors.models import DocumentFailure
 from onyx.db.models import SearchSettings
-from onyx.document_index.chunk_content_enrichment import (
-    generate_enriched_content_for_chunk_embedding,
-)
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.indexing.models import ChunkEmbedding
 from onyx.indexing.models import DocAwareChunk
@@ -129,7 +126,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
            if chunk.large_chunk_reference_ids:
                large_chunks_present = True
            chunk_text = (
-                generate_enriched_content_for_chunk_embedding(chunk)
+                f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
            ) or chunk.source_document.get_title_for_document_index()

            if not chunk_text:
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -37,7 +37,6 @@ from onyx.document_index.document_index_utils import (
    get_multipass_config,
 )
 from onyx.document_index.interfaces import DocumentIndex
-from onyx.document_index.interfaces import DocumentInsertionRecord
 from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
@@ -164,7 +163,7 @@ def index_doc_batch_with_handler(
    *,
    chunker: Chunker,
    embedder: IndexingEmbedder,
-    document_indices: list[DocumentIndex],
+    document_index: DocumentIndex,
    document_batch: list[Document],
    request_id: str | None,
    tenant_id: str,
@@ -177,7 +176,7 @@ def index_doc_batch_with_handler(
        index_pipeline_result = index_doc_batch(
            chunker=chunker,
            embedder=embedder,
-            document_indices=document_indices,
+            document_index=document_index,
            document_batch=document_batch,
            request_id=request_id,
            tenant_id=tenant_id,
@@ -628,7 +627,7 @@ def index_doc_batch(
    document_batch: list[Document],
    chunker: Chunker,
    embedder: IndexingEmbedder,
-    document_indices: list[DocumentIndex],
+    document_index: DocumentIndex,
    request_id: str | None,
    tenant_id: str,
    adapter: IndexingBatchAdapter,
@@ -744,57 +743,47 @@ def index_doc_batch(
        short_descriptor_log = str(short_descriptor_list)[:1024]
        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")

-        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
-        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
-        for document_index in document_indices:
-            # A document will not be spread across different batches, so all the
-            # documents with chunks in this set, are fully represented by the chunks
-            # in this set
-            (
-                insertion_records,
-                vector_db_write_failures,
-            ) = write_chunks_to_vector_db_with_backoff(
-                document_index=document_index,
-                chunks=result.chunks,
-                index_batch_params=IndexBatchParams(
-                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
-                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
-                    tenant_id=tenant_id,
-                    large_chunks_enabled=chunker.enable_large_chunks,
-                ),
-            )
+        # A document will not be spread across different batches, so all the
+        # documents with chunks in this set, are fully represented by the chunks
+        # in this set
+        (
+            insertion_records,
+            vector_db_write_failures,
+        ) = write_chunks_to_vector_db_with_backoff(
+            document_index=document_index,
+            chunks=result.chunks,
+            index_batch_params=IndexBatchParams(
+                doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
+                doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
+                tenant_id=tenant_id,
+                large_chunks_enabled=chunker.enable_large_chunks,
+            ),
+        )

-            all_returned_doc_ids: set[str] = (
-                {record.document_id for record in insertion_records}
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in vector_db_write_failures
-                        if record.failed_document
-                    }
-                )
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in embedding_failures
-                        if record.failed_document
-                    }
-                )
+        all_returned_doc_ids = (
+            {record.document_id for record in insertion_records}
+            .union(
+                {
+                    record.failed_document.document_id
+                    for record in vector_db_write_failures
+                    if record.failed_document
+                }
+            )
+            .union(
+                {
+                    record.failed_document.document_id
+                    for record in embedding_failures
+                    if record.failed_document
+                }
+            )
+        )
+        if all_returned_doc_ids != set(updatable_ids):
+            raise RuntimeError(
+                f"Some documents were not successfully indexed. "
+                f"Updatable IDs: {updatable_ids}, "
+                f"Returned IDs: {all_returned_doc_ids}. "
+                "This should never happen."
            )
-            if all_returned_doc_ids != set(updatable_ids):
-                raise RuntimeError(
-                    f"Some documents were not successfully indexed. "
-                    f"Updatable IDs: {updatable_ids}, "
-                    f"Returned IDs: {all_returned_doc_ids}. "
-                    "This should never happen."
-                    f"This occured for document index {document_index.__class__.__name__}"
-                )
-            # We treat the first document index we got as the primary one used
-            # for reporting the state of indexing.
-            if primary_doc_idx_insertion_records is None:
-                primary_doc_idx_insertion_records = insertion_records
-            if primary_doc_idx_vector_db_write_failures is None:
-                primary_doc_idx_vector_db_write_failures = vector_db_write_failures

        adapter.post_index(
            context=context,
@@ -803,15 +792,11 @@ def index_doc_batch(
            result=result,
        )

-    assert primary_doc_idx_insertion_records is not None
-    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=len(
-            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
-        ),
+        new_docs=len([r for r in insertion_records if not r.already_existed]),
        total_docs=len(filtered_documents),
        total_chunks=len(chunks_with_embeddings),
-        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
+        failures=vector_db_write_failures + embedding_failures,
    )


@@ -820,7 +805,7 @@ def run_indexing_pipeline(
    document_batch: list[Document],
    request_id: str | None,
    embedder: IndexingEmbedder,
-    document_indices: list[DocumentIndex],
+    document_index: DocumentIndex,
    db_session: Session,
    tenant_id: str,
    adapter: IndexingBatchAdapter,
@@ -861,7 +846,7 @@ def run_indexing_pipeline(
    return index_doc_batch_with_handler(
        chunker=chunker,
        embedder=embedder,
-        document_indices=document_indices,
+        document_index=document_index,
        document_batch=document_batch,
        request_id=request_id,
        tenant_id=tenant_id,
--- a/backend/onyx/kg/clustering/normalizations.py
+++ b/backend/onyx/kg/clustering/normalizations.py
@@ -41,11 +41,6 @@ alphanum_regex = re.compile(r"[^a-z0-9]+")
 rem_email_regex = re.compile(r"(?<=\S)@([a-z0-9-]+)\.([a-z]{2,6})$")


-def _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:
-    """Generate n-grams from a sequence."""
-    return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]
-
-
 def _clean_name(entity_name: str) -> str:
    """
    Clean an entity string by removing non-alphanumeric characters and email addresses.
@@ -63,6 +58,8 @@ def _normalize_one_entity(
    attributes: dict[str, str],
    allowed_docs_temp_view_name: str | None = None,
 ) -> str | None:
+    from nltk import ngrams  # type: ignore
+
    """
    Matches a single entity to the best matching entity of the same type.
    """
@@ -153,16 +150,16 @@ def _normalize_one_entity(

    # step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank
    n1, n2, n3 = (
-        set(_ngrams(cleaned_entity, 1)),
-        set(_ngrams(cleaned_entity, 2)),
-        set(_ngrams(cleaned_entity, 3)),
+        set(ngrams(cleaned_entity, 1)),
+        set(ngrams(cleaned_entity, 2)),
+        set(ngrams(cleaned_entity, 3)),
    )
    for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):
        cleaned_candidate = _clean_name(candidate_name)
        h_n1, h_n2, h_n3 = (
-            set(_ngrams(cleaned_candidate, 1)),
-            set(_ngrams(cleaned_candidate, 2)),
-            set(_ngrams(cleaned_candidate, 3)),
+            set(ngrams(cleaned_candidate, 1)),
+            set(ngrams(cleaned_candidate, 2)),
+            set(ngrams(cleaned_candidate, 3)),
        )

        # compute ngram overlap, renormalize scores if the names are too short for larger ngrams
--- a/backend/onyx/llm/litellm_singleton/monkey_patches.py
+++ b/backend/onyx/llm/litellm_singleton/monkey_patches.py
@@ -369,6 +369,8 @@ def _patch_openai_responses_chunk_parser() -> None:
            # New output item added
            output_item = parsed_chunk.get("item", {})
            if output_item.get("type") == "function_call":
+                # Track that we've received tool calls via streaming
+                self._has_streamed_tool_calls = True
                return GenericStreamingChunk(
                    text="",
                    tool_use=ChatCompletionToolCallChunk(
@@ -394,6 +396,8 @@ def _patch_openai_responses_chunk_parser() -> None:
        elif event_type == "response.function_call_arguments.delta":
            content_part: Optional[str] = parsed_chunk.get("delta", None)
            if content_part:
+                # Track that we've received tool calls via streaming
+                self._has_streamed_tool_calls = True
                return GenericStreamingChunk(
                    text="",
                    tool_use=ChatCompletionToolCallChunk(
@@ -491,22 +495,72 @@ def _patch_openai_responses_chunk_parser() -> None:

        elif event_type == "response.completed":
            # Final event signaling all output items (including parallel tool calls) are done
+            # Check if we already received tool calls via streaming events
+            # There is an issue where OpenAI (not via Azure) will give back the tool calls streamed out as tokens
+            # But on Azure, it's only given out all at once. OpenAI also happens to give back the tool calls in the
+            # response.completed event so we need to throw it out here or there are duplicate tool calls.
+            has_streamed_tool_calls = getattr(self, "_has_streamed_tool_calls", False)
+
            response_data = parsed_chunk.get("response", {})
-            # Determine finish reason based on response content
-            finish_reason = "stop"
-            if response_data.get("output"):
-                for item in response_data["output"]:
-                    if isinstance(item, dict) and item.get("type") == "function_call":
-                        finish_reason = "tool_calls"
-                        break
-            return GenericStreamingChunk(
-                text="",
-                tool_use=None,
-                is_finished=True,
-                finish_reason=finish_reason,
-                usage=None,
+            output_items = response_data.get("output", [])
+
+            # Check if there are function_call items in the output
+            has_function_calls = any(
+                isinstance(item, dict) and item.get("type") == "function_call"
+                for item in output_items
            )

+            if has_function_calls and not has_streamed_tool_calls:
+                # Azure's Responses API returns all tool calls in response.completed
+                # without streaming them incrementally. Extract them here.
+                from litellm.types.utils import (
+                    Delta,
+                    ModelResponseStream,
+                    StreamingChoices,
+                )
+
+                tool_calls = []
+                for idx, item in enumerate(output_items):
+                    if isinstance(item, dict) and item.get("type") == "function_call":
+                        tool_calls.append(
+                            ChatCompletionToolCallChunk(
+                                id=item.get("call_id"),
+                                index=idx,
+                                type="function",
+                                function=ChatCompletionToolCallFunctionChunk(
+                                    name=item.get("name"),
+                                    arguments=item.get("arguments", ""),
+                                ),
+                            )
+                        )
+
+                return ModelResponseStream(
+                    choices=[
+                        StreamingChoices(
+                            index=0,
+                            delta=Delta(tool_calls=tool_calls),
+                            finish_reason="tool_calls",
+                        )
+                    ]
+                )
+            elif has_function_calls:
+                # Tool calls were already streamed, just signal completion
+                return GenericStreamingChunk(
+                    text="",
+                    tool_use=None,
+                    is_finished=True,
+                    finish_reason="tool_calls",
+                    usage=None,
+                )
+            else:
+                return GenericStreamingChunk(
+                    text="",
+                    tool_use=None,
+                    is_finished=True,
+                    finish_reason="stop",
+                    usage=None,
+                )
+
        else:
            pass

@@ -631,6 +685,40 @@ def _patch_openai_responses_transform_response() -> None:
    LiteLLMResponsesTransformationHandler.transform_response = _patched_transform_response  # type: ignore[method-assign]


+def _patch_azure_responses_should_fake_stream() -> None:
+    """
+    Patches AzureOpenAIResponsesAPIConfig.should_fake_stream to always return False.
+
+    By default, LiteLLM uses "fake streaming" (MockResponsesAPIStreamingIterator) for models
+    not in its database. This causes Azure custom model deployments to buffer the entire
+    response before yielding, resulting in poor time-to-first-token.
+
+    Azure's Responses API supports native streaming, so we override this to always use
+    real streaming (SyncResponsesAPIStreamingIterator).
+    """
+    from litellm.llms.azure.responses.transformation import (
+        AzureOpenAIResponsesAPIConfig,
+    )
+
+    if (
+        getattr(AzureOpenAIResponsesAPIConfig.should_fake_stream, "__name__", "")
+        == "_patched_should_fake_stream"
+    ):
+        return
+
+    def _patched_should_fake_stream(
+        self: Any,
+        model: Optional[str],
+        stream: Optional[bool],
+        custom_llm_provider: Optional[str] = None,
+    ) -> bool:
+        # Azure Responses API supports native streaming - never fake it
+        return False
+
+    _patched_should_fake_stream.__name__ = "_patched_should_fake_stream"
+    AzureOpenAIResponsesAPIConfig.should_fake_stream = _patched_should_fake_stream  # type: ignore[method-assign]
+
+
 def apply_monkey_patches() -> None:
    """
    Apply all necessary monkey patches to LiteLLM for compatibility.
@@ -640,12 +728,13 @@ def apply_monkey_patches() -> None:
    - Patching OllamaChatCompletionResponseIterator.chunk_parser for streaming content
    - Patching OpenAiResponsesToChatCompletionStreamIterator.chunk_parser for OpenAI Responses API
    - Patching LiteLLMResponsesTransformationHandler.transform_response for non-streaming responses
-    - Patching LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text for tool content types
+    - Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming
    """
    _patch_ollama_transform_request()
    _patch_ollama_chunk_parser()
    _patch_openai_responses_chunk_parser()
    _patch_openai_responses_transform_response()
+    _patch_azure_responses_should_fake_stream()


 def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:
--- a/backend/onyx/llm/model_metadata_enrichments.json
+++ b/backend/onyx/llm/model_metadata_enrichments.json
@@ -54,6 +54,11 @@
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
+  "anthropic.claude-3-5-haiku-20241022-v1:0": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022-v1:0"
+  },
  "anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -1460,6 +1465,11 @@
    "model_vendor": "mistral",
    "model_version": "v0:1"
  },
+  "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022-v1:0"
+  },
  "chat-bison": {
    "display_name": "Chat Bison",
    "model_vendor": "google",
@@ -1490,6 +1500,16 @@
    "model_vendor": "openai",
    "model_version": "latest"
  },
+  "claude-3-5-haiku-20241022": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022"
+  },
+  "claude-3-5-haiku-latest": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "latest"
+  },
  "claude-3-5-sonnet-20240620": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -1695,6 +1715,11 @@
    "model_vendor": "amazon",
    "model_version": "v1:0"
  },
+  "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022-v1:0"
+  },
  "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -3226,6 +3251,15 @@
    "model_vendor": "anthropic",
    "model_version": "latest"
  },
+  "openrouter/anthropic/claude-3-5-haiku": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic"
+  },
+  "openrouter/anthropic/claude-3-5-haiku-20241022": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022"
+  },
  "openrouter/anthropic/claude-3-haiku": {
    "display_name": "Claude Haiku 3",
    "model_vendor": "anthropic"
@@ -3740,6 +3774,11 @@
    "model_vendor": "amazon",
    "model_version": "1:0"
  },
+  "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022"
+  },
  "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic",
@@ -3860,6 +3899,15 @@
    "model_vendor": "twelvelabs",
    "model_version": "v1:0"
  },
+  "vertex_ai/claude-3-5-haiku": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic"
+  },
+  "vertex_ai/claude-3-5-haiku@20241022": {
+    "display_name": "Claude Haiku 3.5",
+    "model_vendor": "anthropic",
+    "model_version": "20241022"
+  },
  "vertex_ai/claude-3-5-sonnet": {
    "display_name": "Claude Sonnet 3.5",
    "model_vendor": "anthropic"
--- a/backend/onyx/natural_language_processing/english_stopwords.py
+++ b/backend/onyx/natural_language_processing/english_stopwords.py
@@ -1,225 +0,0 @@
-import re
-
-ENGLISH_STOPWORDS = [
-    "a",
-    "about",
-    "above",
-    "after",
-    "again",
-    "against",
-    "ain",
-    "all",
-    "am",
-    "an",
-    "and",
-    "any",
-    "are",
-    "aren",
-    "aren't",
-    "as",
-    "at",
-    "be",
-    "because",
-    "been",
-    "before",
-    "being",
-    "below",
-    "between",
-    "both",
-    "but",
-    "by",
-    "can",
-    "couldn",
-    "couldn't",
-    "d",
-    "did",
-    "didn",
-    "didn't",
-    "do",
-    "does",
-    "doesn",
-    "doesn't",
-    "doing",
-    "don",
-    "don't",
-    "down",
-    "during",
-    "each",
-    "few",
-    "for",
-    "from",
-    "further",
-    "had",
-    "hadn",
-    "hadn't",
-    "has",
-    "hasn",
-    "hasn't",
-    "have",
-    "haven",
-    "haven't",
-    "having",
-    "he",
-    "he'd",
-    "he'll",
-    "he's",
-    "her",
-    "here",
-    "hers",
-    "herself",
-    "him",
-    "himself",
-    "his",
-    "how",
-    "i",
-    "i'd",
-    "i'll",
-    "i'm",
-    "i've",
-    "if",
-    "in",
-    "into",
-    "is",
-    "isn",
-    "isn't",
-    "it",
-    "it'd",
-    "it'll",
-    "it's",
-    "its",
-    "itself",
-    "just",
-    "ll",
-    "m",
-    "ma",
-    "me",
-    "mightn",
-    "mightn't",
-    "more",
-    "most",
-    "mustn",
-    "mustn't",
-    "my",
-    "myself",
-    "needn",
-    "needn't",
-    "no",
-    "nor",
-    "not",
-    "now",
-    "o",
-    "of",
-    "off",
-    "on",
-    "once",
-    "only",
-    "or",
-    "other",
-    "our",
-    "ours",
-    "ourselves",
-    "out",
-    "over",
-    "own",
-    "re",
-    "s",
-    "same",
-    "shan",
-    "shan't",
-    "she",
-    "she'd",
-    "she'll",
-    "she's",
-    "should",
-    "should've",
-    "shouldn",
-    "shouldn't",
-    "so",
-    "some",
-    "such",
-    "t",
-    "than",
-    "that",
-    "that'll",
-    "the",
-    "their",
-    "theirs",
-    "them",
-    "themselves",
-    "then",
-    "there",
-    "these",
-    "they",
-    "they'd",
-    "they'll",
-    "they're",
-    "they've",
-    "this",
-    "those",
-    "through",
-    "to",
-    "too",
-    "under",
-    "until",
-    "up",
-    "ve",
-    "very",
-    "was",
-    "wasn",
-    "wasn't",
-    "we",
-    "we'd",
-    "we'll",
-    "we're",
-    "we've",
-    "were",
-    "weren",
-    "weren't",
-    "what",
-    "when",
-    "where",
-    "which",
-    "while",
-    "who",
-    "whom",
-    "why",
-    "will",
-    "with",
-    "won",
-    "won't",
-    "wouldn",
-    "wouldn't",
-    "y",
-    "you",
-    "you'd",
-    "you'll",
-    "you're",
-    "you've",
-    "your",
-    "yours",
-    "yourself",
-    "yourselves",
-]
-
-ENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)
-
-
-def strip_stopwords(text: str) -> list[str]:
-    """Remove English stopwords from text.
-
-    Matching is case-insensitive and ignores leading/trailing punctuation
-    on each word. Internal punctuation (like apostrophes in contractions)
-    is preserved for matching, so "you're" matches the stopword "you're"
-    but "youre" would not.
-    """
-    words = text.split()
-    result = []
-
-    for word in words:
-        # Strip leading/trailing punctuation to get the core word for comparison
-        # This preserves internal punctuation like apostrophes
-        core = re.sub(r"^[^\w']+|[^\w']+$", "", word)
-        if core.lower() not in ENGLISH_STOPWORDS_SET:
-            result.append(word)
-
-    return result
--- a/backend/onyx/onyxbot/discord/DISCORD_MULTITENANT_README.md
+++ b/backend/onyx/onyxbot/discord/DISCORD_MULTITENANT_README.md
@@ -0,0 +1,287 @@
+# Discord Bot Multitenant Architecture
+
+This document analyzes how the Discord cache manager and API client coordinate to handle multitenant API keys from a single Discord client.
+
+## Overview
+
+The Discord bot uses a **single-client, multi-tenant** architecture where one `OnyxDiscordClient` instance serves multiple tenants (organizations) simultaneously. Tenant isolation is achieved through:
+
+- **Cache Manager**: Maps Discord guilds to tenants and stores per-tenant API keys
+- **API Client**: Stateless HTTP client that accepts dynamic API keys per request
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                      OnyxDiscordClient                              │
+│                                                                     │
+│  ┌─────────────────────────┐    ┌─────────────────────────────┐    │
+│  │   DiscordCacheManager   │    │      OnyxAPIClient          │    │
+│  │                         │    │                             │    │
+│  │  guild_id → tenant_id   │───▶│  send_chat_message(         │    │
+│  │  tenant_id → api_key    │    │    message,                 │    │
+│  │                         │    │    api_key=<per-tenant>,    │    │
+│  └─────────────────────────┘    │    persona_id=...           │    │
+│                                 │  )                          │    │
+│                                 └─────────────────────────────┘    │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Component Details
+
+### 1. Cache Manager (`backend/onyx/onyxbot/discord/cache.py`)
+
+The `DiscordCacheManager` maintains two critical in-memory mappings:
+
+```python
+class DiscordCacheManager:
+    _guild_tenants: dict[int, str]   # guild_id → tenant_id
+    _api_keys: dict[str, str]        # tenant_id → api_key
+    _lock: asyncio.Lock              # Concurrency control
+```
+
+#### Key Responsibilities
+
+| Function | Purpose |
+|----------|---------|
+| `get_tenant(guild_id)` | O(1) lookup: guild → tenant |
+| `get_api_key(tenant_id)` | O(1) lookup: tenant → API key |
+| `refresh_all()` | Full cache rebuild from database |
+| `refresh_guild()` | Incremental update for single guild |
+
+#### API Key Provisioning Strategy
+
+API keys are **lazily provisioned** - only created when first needed:
+
+```python
+async def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:
+    needs_key = tenant_id not in self._api_keys
+
+    with get_session_with_tenant(tenant_id) as db:
+        # Load guild configs
+        configs = get_discord_bot_configs(db)
+        guild_ids = [c.guild_id for c in configs if c.enabled]
+
+        # Only provision API key if not already cached
+        api_key = None
+        if needs_key:
+            api_key = get_or_create_discord_service_api_key(db, tenant_id)
+
+    return guild_ids, api_key
+```
+
+This optimization avoids repeated database calls for API key generation.
+
+#### Concurrency Control
+
+All write operations acquire an async lock to prevent race conditions:
+
+```python
+async def refresh_all(self) -> None:
+    async with self._lock:
+        # Safe to modify _guild_tenants and _api_keys
+        for tenant_id in get_all_tenant_ids():
+            guild_ids, api_key = await self._load_tenant_data(tenant_id)
+            # Update mappings...
+```
+
+Read operations (`get_tenant`, `get_api_key`) are lock-free since Python dict lookups are atomic.
+
+---
+
+### 2. API Client (`backend/onyx/onyxbot/discord/api_client.py`)
+
+The `OnyxAPIClient` is a **stateless async HTTP client** that communicates with Onyx API pods.
+
+#### Key Design: Per-Request API Key Injection
+
+```python
+class OnyxAPIClient:
+    async def send_chat_message(
+        self,
+        message: str,
+        api_key: str,           # Injected per-request
+        persona_id: int | None,
+        ...
+    ) -> ChatFullResponse:
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",  # Tenant-specific auth
+        }
+        # Make request...
+```
+
+The client accepts `api_key` as a parameter to each method, enabling **dynamic tenant selection at request time**. This design allows a single client instance to serve multiple tenants:
+
+```python
+# Same client, different tenants
+await api_client.send_chat_message(msg, api_key=key_for_tenant_1, ...)
+await api_client.send_chat_message(msg, api_key=key_for_tenant_2, ...)
+```
+
+---
+
+## Coordination Flow
+
+### Message Processing Pipeline
+
+When a Discord message arrives, the client coordinates cache and API client:
+
+```python
+async def on_message(self, message: Message) -> None:
+    guild_id = message.guild.id
+
+    # Step 1: Cache lookup - guild → tenant
+    tenant_id = self.cache.get_tenant(guild_id)
+    if not tenant_id:
+        return  # Guild not registered
+
+    # Step 2: Cache lookup - tenant → API key
+    api_key = self.cache.get_api_key(tenant_id)
+    if not api_key:
+        logger.warning(f"No API key for tenant {tenant_id}")
+        return
+
+    # Step 3: API call with tenant-specific credentials
+    await process_chat_message(
+        message=message,
+        api_key=api_key,              # Tenant-specific
+        persona_id=persona_id,         # Tenant-specific
+        api_client=self.api_client,
+    )
+```
+
+### Startup Sequence
+
+```python
+async def setup_hook(self) -> None:
+    # 1. Initialize API client (create aiohttp session)
+    await self.api_client.initialize()
+
+    # 2. Populate cache with all tenants
+    await self.cache.refresh_all()
+
+    # 3. Start background refresh task
+    self._cache_refresh_task = self.loop.create_task(
+        self._periodic_cache_refresh()  # Every 60 seconds
+    )
+```
+
+### Shutdown Sequence
+
+```python
+async def close(self) -> None:
+    # 1. Cancel background refresh
+    if self._cache_refresh_task:
+        self._cache_refresh_task.cancel()
+
+    # 2. Close Discord connection
+    await super().close()
+
+    # 3. Close API client session
+    await self.api_client.close()
+
+    # 4. Clear cache
+    self.cache.clear()
+```
+
+---
+
+## Tenant Isolation Mechanisms
+
+### 1. Per-Tenant API Keys
+
+Each tenant has a dedicated service API key:
+
+```python
+# backend/onyx/db/discord_bot.py
+def get_or_create_discord_service_api_key(db_session: Session, tenant_id: str) -> str:
+    existing = get_discord_service_api_key(db_session)
+    if existing:
+        return regenerate_key(existing)
+
+    # Create LIMITED role key (chat-only permissions)
+    return insert_api_key(
+        db_session=db_session,
+        api_key_args=APIKeyArgs(
+            name=DISCORD_SERVICE_API_KEY_NAME,
+            role=UserRole.LIMITED,  # Minimal permissions
+        ),
+        user_id=None,  # Service account (system-owned)
+    ).api_key
+```
+
+### 2. Database Context Variables
+
+The cache uses context variables for proper tenant-scoped DB sessions:
+
+```python
+context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+try:
+    with get_session_with_tenant(tenant_id) as db:
+        # All DB operations scoped to this tenant
+        ...
+finally:
+    CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)
+```
+
+### 3. Enterprise Gating Support
+
+Gated tenants are filtered during cache refresh:
+
+```python
+gated_tenants = fetch_ee_implementation_or_noop(
+    "onyx.server.tenants.product_gating",
+    "get_gated_tenants",
+    set(),
+)()
+
+for tenant_id in get_all_tenant_ids():
+    if tenant_id in gated_tenants:
+        continue  # Skip gated tenants
+```
+
+---
+
+## Cache Refresh Strategy
+
+| Trigger | Method | Scope |
+|---------|--------|-------|
+| Startup | `refresh_all()` | All tenants |
+| Periodic (60s) | `refresh_all()` | All tenants |
+| Guild registration | `refresh_guild()` | Single tenant |
+
+### Error Handling
+
+- **Tenant-level errors**: Logged and skipped (doesn't stop other tenants)
+- **Missing API key**: Bot silently ignores messages from that guild
+- **Network errors**: Logged, cache continues with stale data until next refresh
+
+---
+
+## Key Design Insights
+
+1. **Single Client, Multiple Tenants**: One `OnyxAPIClient` and one `DiscordCacheManager` instance serves all tenants via dynamic API key injection.
+
+2. **Cache-First Architecture**: Guild lookups are O(1) in-memory; API keys are cached after first provisioning to avoid repeated DB calls.
+
+3. **Graceful Degradation**: If an API key is missing or stale, the bot simply doesn't respond (no crash or error propagation).
+
+4. **Thread Safety Without Blocking**: `asyncio.Lock` prevents race conditions while maintaining async concurrency for reads.
+
+5. **Lazy Provisioning**: API keys are only created when first needed, then cached for performance.
+
+6. **Stateless API Client**: The HTTP client holds no tenant state - all tenant context is injected per-request via the `api_key` parameter.
+
+---
+
+## File References
+
+| Component | Path |
+|-----------|------|
+| Cache Manager | `backend/onyx/onyxbot/discord/cache.py` |
+| API Client | `backend/onyx/onyxbot/discord/api_client.py` |
+| Discord Client | `backend/onyx/onyxbot/discord/client.py` |
+| API Key DB Operations | `backend/onyx/db/discord_bot.py` |
+| Cache Manager Tests | `backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py` |
+| API Client Tests | `backend/tests/unit/onyx/onyxbot/discord/test_api_client.py` |
--- a/backend/onyx/onyxbot/slack/blocks.py
+++ b/backend/onyx/onyxbot/slack/blocks.py
@@ -592,11 +592,8 @@ def build_slack_response_blocks(
        )

    citations_blocks = []
-    document_blocks = []
    if answer.citation_info:
        citations_blocks = _build_citations_blocks(answer)
-    else:
-        document_blocks = _priority_ordered_documents_blocks(answer)

    citations_divider = [DividerBlock()] if citations_blocks else []
    buttons_divider = [DividerBlock()] if web_follow_up_block or follow_up_block else []
@@ -608,7 +605,6 @@ def build_slack_response_blocks(
        + ai_feedback_block
        + citations_divider
        + citations_blocks
-        + document_blocks
        + buttons_divider
        + web_follow_up_block
        + follow_up_block
--- a/backend/onyx/onyxbot/slack/formatting.py
+++ b/backend/onyx/onyxbot/slack/formatting.py
@@ -1,65 +1,270 @@
-from mistune import Markdown  # type: ignore[import-untyped]
-from mistune import Renderer
+import re
+from collections.abc import Callable
+from typing import Any
+
+from mistune import create_markdown
+from mistune import HTMLRenderer
+
+# Tags that should be replaced with a newline (line-break and block-level elements)
+_HTML_NEWLINE_TAG_PATTERN = re.compile(
+    r"<br\s*/?>|</(?:p|div|li|h[1-6]|tr|blockquote|section|article)>",
+    re.IGNORECASE,
+)
+
+# Strips HTML tags but excludes autolinks like <https://...> and <mailto:...>
+_HTML_TAG_PATTERN = re.compile(
+    r"<(?!https?://|mailto:)/?[a-zA-Z][^>]*>",
+)
+
+# Matches fenced code blocks (``` ... ```) so we can skip sanitization inside them
+_FENCED_CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```")
+
+# Matches the start of any markdown link: [text]( or [[n]](
+# The inner group handles nested brackets for citation links like [[1]](.
+_MARKDOWN_LINK_PATTERN = re.compile(r"\[(?:[^\[\]]|\[[^\]]*\])*\]\(")
+
+# Matches Slack-style links <url|text> that LLMs sometimes output directly.
+# Mistune doesn't recognise this syntax, so text() would escape the angle
+# brackets and Slack would render them as literal text instead of links.
+_SLACK_LINK_PATTERN = re.compile(r"<(https?://[^|>]+)\|([^>]+)>")
+
+
+def _sanitize_html(text: str) -> str:
+    """Strip HTML tags from a text fragment.
+
+    Block-level closing tags and <br> are converted to newlines.
+    All other HTML tags are removed. Autolinks (<https://...>) are preserved.
+    """
+    text = _HTML_NEWLINE_TAG_PATTERN.sub("\n", text)
+    text = _HTML_TAG_PATTERN.sub("", text)
+    return text
+
+
+def _transform_outside_code_blocks(
+    message: str, transform: Callable[[str], str]
+) -> str:
+    """Apply *transform* only to text outside fenced code blocks."""
+    parts = _FENCED_CODE_BLOCK_PATTERN.split(message)
+    code_blocks = _FENCED_CODE_BLOCK_PATTERN.findall(message)
+
+    result: list[str] = []
+    for i, part in enumerate(parts):
+        result.append(transform(part))
+        if i < len(code_blocks):
+            result.append(code_blocks[i])
+
+    return "".join(result)
+
+
+def _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:
+    """Extract markdown link destination, allowing nested parentheses in the URL."""
+    depth = 0
+    i = start_idx
+
+    while i < len(message):
+        curr = message[i]
+        if curr == "\\":
+            i += 2
+            continue
+
+        if curr == "(":
+            depth += 1
+        elif curr == ")":
+            if depth == 0:
+                return message[start_idx:i], i
+            depth -= 1
+        i += 1
+
+    return message[start_idx:], None
+
+
+def _normalize_link_destinations(message: str) -> str:
+    """Wrap markdown link URLs in angle brackets so the parser handles special chars safely.
+
+    Markdown link syntax [text](url) breaks when the URL contains unescaped
+    parentheses, spaces, or other special characters. Wrapping the URL in angle
+    brackets — [text](<url>) — tells the parser to treat everything inside as
+    a literal URL. This applies to all links, not just citations.
+    """
+    if "](" not in message:
+        return message
+
+    normalized_parts: list[str] = []
+    cursor = 0
+
+    while match := _MARKDOWN_LINK_PATTERN.search(message, cursor):
+        normalized_parts.append(message[cursor : match.end()])
+        destination_start = match.end()
+        destination, end_idx = _extract_link_destination(message, destination_start)
+        if end_idx is None:
+            normalized_parts.append(message[destination_start:])
+            return "".join(normalized_parts)
+
+        already_wrapped = destination.startswith("<") and destination.endswith(">")
+        if destination and not already_wrapped:
+            destination = f"<{destination}>"
+
+        normalized_parts.append(destination)
+        normalized_parts.append(")")
+        cursor = end_idx + 1
+
+    normalized_parts.append(message[cursor:])
+    return "".join(normalized_parts)
+
+
+def _convert_slack_links_to_markdown(message: str) -> str:
+    """Convert Slack-style <url|text> links to standard markdown [text](url).
+
+    LLMs sometimes emit Slack mrkdwn link syntax directly. Mistune doesn't
+    recognise it, so the angle brackets would be escaped by text() and Slack
+    would render the link as literal text instead of a clickable link.
+    """
+    return _transform_outside_code_blocks(
+        message, lambda text: _SLACK_LINK_PATTERN.sub(r"[\2](\1)", text)
+    )


 def format_slack_message(message: str | None) -> str:
-    return Markdown(renderer=SlackRenderer()).render(message)
+    if message is None:
+        return ""
+    message = _transform_outside_code_blocks(message, _sanitize_html)
+    message = _convert_slack_links_to_markdown(message)
+    normalized_message = _normalize_link_destinations(message)
+    md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough", "table"])
+    result = md(normalized_message)
+    # With HTMLRenderer, result is always str (not AST list)
+    assert isinstance(result, str)
+    return result.rstrip("\n")


-class SlackRenderer(Renderer):
+class SlackRenderer(HTMLRenderer):
+    """Renders markdown as Slack mrkdwn format instead of HTML.
+
+    Overrides all HTMLRenderer methods that produce HTML tags to ensure
+    no raw HTML ever appears in Slack messages.
+    """
+
    SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}

+    def __init__(self) -> None:
+        super().__init__()
+        self._table_headers: list[str] = []
+        self._current_row_cells: list[str] = []
+
    def escape_special(self, text: str) -> str:
        for special, replacement in self.SPECIALS.items():
            text = text.replace(special, replacement)
        return text

-    def header(self, text: str, level: int, raw: str | None = None) -> str:
-        return f"*{text}*\n"
+    def heading(self, text: str, level: int, **attrs: Any) -> str:  # noqa: ARG002
+        return f"*{text}*\n\n"

    def emphasis(self, text: str) -> str:
        return f"_{text}_"

-    def double_emphasis(self, text: str) -> str:
+    def strong(self, text: str) -> str:
        return f"*{text}*"

    def strikethrough(self, text: str) -> str:
        return f"~{text}~"

-    def list(self, body: str, ordered: bool = True) -> str:
-        lines = body.split("\n")
+    def list(self, text: str, ordered: bool, **attrs: Any) -> str:
+        lines = text.split("\n")
        count = 0
        for i, line in enumerate(lines):
            if line.startswith("li: "):
                count += 1
                prefix = f"{count}. " if ordered else "• "
                lines[i] = f"{prefix}{line[4:]}"
-        return "\n".join(lines)
+        return "\n".join(lines) + "\n"

    def list_item(self, text: str) -> str:
        return f"li: {text}\n"

-    def link(self, link: str, title: str | None, content: str | None) -> str:
-        escaped_link = self.escape_special(link)
-        if content:
-            return f"<{escaped_link}|{content}>"
+    def link(self, text: str, url: str, title: str | None = None) -> str:
+        escaped_url = self.escape_special(url)
+        if text:
+            return f"<{escaped_url}|{text}>"
        if title:
-            return f"<{escaped_link}|{title}>"
-        return f"<{escaped_link}>"
+            return f"<{escaped_url}|{title}>"
+        return f"<{escaped_url}>"

-    def image(self, src: str, title: str | None, text: str | None) -> str:
-        escaped_src = self.escape_special(src)
+    def image(self, text: str, url: str, title: str | None = None) -> str:
+        escaped_url = self.escape_special(url)
        display_text = title or text
-        return f"<{escaped_src}|{display_text}>" if display_text else f"<{escaped_src}>"
+        return f"<{escaped_url}|{display_text}>" if display_text else f"<{escaped_url}>"

    def codespan(self, text: str) -> str:
        return f"`{text}`"

-    def block_code(self, text: str, lang: str | None) -> str:
-        return f"```\n{text}\n```\n"
+    def block_code(self, code: str, info: str | None = None) -> str:  # noqa: ARG002
+        return f"```\n{code.rstrip(chr(10))}\n```\n\n"
+
+    def linebreak(self) -> str:
+        return "\n"
+
+    def thematic_break(self) -> str:
+        return "---\n\n"
+
+    def block_quote(self, text: str) -> str:
+        lines = text.strip().split("\n")
+        quoted = "\n".join(f">{line}" for line in lines)
+        return quoted + "\n\n"
+
+    def block_html(self, html: str) -> str:
+        return _sanitize_html(html) + "\n\n"
+
+    def block_error(self, text: str) -> str:
+        return f"```\n{text}\n```\n\n"
+
+    def text(self, text: str) -> str:
+        # Only escape the three entities Slack recognizes: & < >
+        # HTMLRenderer.text() also escapes " to &quot; which Slack renders
+        # as literal &quot; text since Slack doesn't recognize that entity.
+        return self.escape_special(text)
+
+    # -- Table rendering (converts markdown tables to vertical cards) --
+
+    def table_cell(
+        self, text: str, align: str | None = None, head: bool = False  # noqa: ARG002
+    ) -> str:
+        if head:
+            self._table_headers.append(text.strip())
+        else:
+            self._current_row_cells.append(text.strip())
+        return ""
+
+    def table_head(self, text: str) -> str:  # noqa: ARG002
+        self._current_row_cells = []
+        return ""
+
+    def table_row(self, text: str) -> str:  # noqa: ARG002
+        cells = self._current_row_cells
+        self._current_row_cells = []
+        # First column becomes the bold title, remaining columns are bulleted fields
+        lines: list[str] = []
+        if cells:
+            title = cells[0]
+            if title:
+                # Avoid double-wrapping if cell already contains bold markup
+                if title.startswith("*") and title.endswith("*") and len(title) > 1:
+                    lines.append(title)
+                else:
+                    lines.append(f"*{title}*")
+            for i, cell in enumerate(cells[1:], start=1):
+                if i < len(self._table_headers):
+                    lines.append(f"  • {self._table_headers[i]}: {cell}")
+                else:
+                    lines.append(f"  • {cell}")
+        return "\n".join(lines) + "\n\n"
+
+    def table_body(self, text: str) -> str:
+        return text
+
+    def table(self, text: str) -> str:
+        self._table_headers = []
+        self._current_row_cells = []
+        return text + "\n"

    def paragraph(self, text: str) -> str:
-        return f"{text}\n"
-
-    def autolink(self, link: str, is_email: bool) -> str:
-        return link if is_email else self.link(link, None, None)
+        return f"{text}\n\n"
--- a/backend/onyx/onyxbot/slack/listener.py
+++ b/backend/onyx/onyxbot/slack/listener.py
@@ -32,6 +32,9 @@ from onyx.configs.constants import MessageType
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
 from onyx.connectors.slack.utils import expert_info_from_slack_id
+from onyx.context.search.retrieval.search_runner import (
+    download_nltk_data,
+)
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.sql_engine import get_session_with_tenant
 from onyx.db.engine.sql_engine import SqlEngine
@@ -1126,6 +1129,9 @@ if __name__ == "__main__":

    set_is_ee_based_on_env_variable()

+    logger.info("Verifying query preprocessing (NLTK) data is downloaded")
+    download_nltk_data()
+
    try:
        # Keep the main thread alive
        while tenant_handler.running:
--- a/backend/onyx/prompts/chat_prompts.py
+++ b/backend/onyx/prompts/chat_prompts.py
@@ -96,7 +96,7 @@ ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."

 CHAT_NAMING_SYSTEM_PROMPT = """
 Given the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \
-Make sure the name is in the same language as the user's first message.
+Make sure the name is in the same language as the user's language.

 IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
 """.strip()
--- a/backend/onyx/prompts/deep_research/orchestration_layer.py
+++ b/backend/onyx/prompts/deep_research/orchestration_layer.py
@@ -19,7 +19,7 @@ If you need to ask questions, follow these guidelines:
 - Be concise and do not ask more than 5 questions.
 - If there are ambiguous terms or questions, ask the user to clarify.
 - Your questions should be a numbered list for clarity.
- Respond in the same language as the user's query.
+- Respond in the user's language.
 - Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}
 - Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
 """.strip()
@@ -44,9 +44,9 @@ For context, the date is {current_datetime}.

 The research plan should be formatted as a numbered list of steps and have 6 or less individual steps.

-Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.
+Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps.

-Output only the numbered list of steps with no additional prefix or suffix.
+Output only the numbered list of steps with no additional prefix or suffix. Respond in the user's language.
 """.strip()


@@ -76,11 +76,10 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c

 ## {RESEARCH_AGENT_TOOL_NAME}
 The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
-It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
-The research task should be in the same language as the overall research plan.
+It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.

 CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \
-You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
+You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}

 You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

@@ -130,7 +129,7 @@ For context, the date is {current_datetime}.

 Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long.

-You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.
+You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible. Respond in the user's language.

 Not every fact retrieved will be relevant to the user's query.

@@ -166,11 +165,10 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c

 ## {RESEARCH_AGENT_TOOL_NAME}
 The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
-It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
-The research task should be in the same language as the overall research plan.
+It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.

 CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \
-You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
+You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}

 You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

--- a/backend/onyx/prompts/tool_prompts.py
+++ b/backend/onyx/prompts/tool_prompts.py
@@ -48,7 +48,7 @@ Do not use the "site:" operator in your web search queries.
 OPEN_URLS_GUIDANCE = """

 ## open_url
-Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs.
+Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches.
 You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources.
 You should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.
 """
--- a/backend/onyx/redis/redis_pool.py
+++ b/backend/onyx/redis/redis_pool.py
@@ -109,6 +109,7 @@ class TenantRedis(redis.Redis):
            "unlock",
            "get",
            "set",
+            "setex",
            "delete",
            "exists",
            "incrby",
--- a/backend/onyx/seeding/input_prompts.yaml
+++ b/backend/onyx/seeding/input_prompts.yaml
@@ -0,0 +1,24 @@
+input_prompts:
+  - id: -5
+    prompt: "Elaborate"
+    content: "Elaborate on the above, give me a more in depth explanation."
+    active: true
+    is_public: true
+
+  - id: -4
+    prompt: "Reword"
+    content: "Help me rewrite the following politely and concisely for professional communication:\n"
+    active: true
+    is_public: true
+
+  - id: -3
+    prompt: "Email"
+    content: "Write a professional email for me including a subject line, signature, etc. Template the parts that need editing with [ ]. The email should cover the following points:\n"
+    active: true
+    is_public: true
+
+  - id: -2
+    prompt: "Debug"
+    content: "Provide step-by-step troubleshooting instructions for the following issue:\n"
+    active: true
+    is_public: true
--- a/backend/onyx/seeding/load_yamls.py
+++ b/backend/onyx/seeding/load_yamls.py
@@ -0,0 +1,40 @@
+import yaml
+from sqlalchemy.orm import Session
+
+from onyx.configs.chat_configs import INPUT_PROMPT_YAML
+from onyx.db.input_prompt import insert_input_prompt_if_not_exists
+from onyx.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+def load_input_prompts_from_yaml(
+    db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
+) -> None:
+    with open(input_prompts_yaml, "r") as file:
+        data = yaml.safe_load(file)
+
+    all_input_prompts = data.get("input_prompts", [])
+    for input_prompt in all_input_prompts:
+        # If these prompts are deleted (which is a hard delete in the DB), on server startup
+        # they will be recreated, but the user can always just deactivate them, just a light inconvenience
+
+        insert_input_prompt_if_not_exists(
+            user=None,
+            input_prompt_id=input_prompt.get("id"),
+            prompt=input_prompt["prompt"],
+            content=input_prompt["content"],
+            is_public=input_prompt["is_public"],
+            active=input_prompt.get("active", True),
+            db_session=db_session,
+            commit=True,
+        )
+
+
+def load_chat_yamls(
+    db_session: Session,
+    input_prompts_yaml: str = INPUT_PROMPT_YAML,
+) -> None:
+    """Load all chat-related YAML configurations (such as the prompt shortcuts which are called input prompts on the backend)"""
+    load_input_prompts_from_yaml(db_session, input_prompts_yaml)
--- a/backend/onyx/server/documents/document.py
+++ b/backend/onyx/server/documents/document.py
@@ -32,7 +32,6 @@ def get_document_info(
    db_session: Session = Depends(get_session),
 ) -> DocumentInfo:
    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    user_acl_filters = build_access_filters_for_user(user, db_session)
@@ -77,7 +76,6 @@ def get_chunk_info(
    db_session: Session = Depends(get_session),
 ) -> ChunkInfo:
    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    user_acl_filters = build_access_filters_for_user(user, db_session)
--- a/backend/onyx/server/features/mcp/api.py
+++ b/backend/onyx/server/features/mcp/api.py
@@ -821,36 +821,20 @@ def _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User | None) ->


 def _db_mcp_server_to_api_mcp_server(
-    db_server: DbMCPServer,
-    db: Session,
-    request_user: User | None,
-    include_auth_config: bool = False,
+    db_server: DbMCPServer, email: str, db: Session, include_auth_config: bool = False
 ) -> MCPServer:
    """Convert database MCP server to API model"""

-    email = request_user.email if request_user else ""
-
    # Check if user has authentication configured and extract credentials
    auth_performer = db_server.auth_performer
    user_authenticated: bool | None = None
    user_credentials = None
    admin_credentials = None
-    can_view_admin_credentials = bool(include_auth_config) and (
-        request_user is not None
-        and (
-            request_user.role == UserRole.ADMIN
-            or (request_user.email and request_user.email == db_server.owner)
-        )
-    )
    if db_server.auth_type == MCPAuthenticationType.NONE:
        user_authenticated = True  # No auth required
    elif auth_performer == MCPAuthenticationPerformer.ADMIN:
        user_authenticated = db_server.admin_connection_config is not None
-        if (
-            can_view_admin_credentials
-            and db_server.admin_connection_config is not None
-            and include_auth_config
-        ):
+        if include_auth_config and db_server.admin_connection_config is not None:
            if db_server.auth_type == MCPAuthenticationType.API_TOKEN:
                admin_credentials = {
                    "api_key": db_server.admin_connection_config.config["headers"][
@@ -906,12 +890,11 @@ def _db_mcp_server_to_api_mcp_server(
            if client_info:
                if not client_info.client_id or not client_info.client_secret:
                    raise ValueError("Stored client info had empty client ID or secret")
-                if can_view_admin_credentials:
-                    admin_credentials = {
-                        "client_id": client_info.client_id,
-                        "client_secret": client_info.client_secret,
-                    }
-            elif can_view_admin_credentials:
+                admin_credentials = {
+                    "client_id": client_info.client_id,
+                    "client_secret": client_info.client_secret,
+                }
+            else:
                admin_credentials = {}
                logger.warning(f"No client info found for server {db_server.name}")

@@ -978,13 +961,14 @@ def get_mcp_servers_for_assistant(

    logger.info(f"Fetching MCP servers for assistant: {assistant_id}")

+    email = user.email if user else ""
    try:
        persona_id = int(assistant_id)
        db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)

        # Convert to API model format with opportunistic token refresh for OAuth
        mcp_servers = [
-            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
+            _db_mcp_server_to_api_mcp_server(db_server, email, db)
            for db_server in db_mcp_servers
        ]

@@ -997,25 +981,6 @@ def get_mcp_servers_for_assistant(
        raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")


-@router.get("/servers", response_model=MCPServersResponse)
-def get_mcp_servers_for_user(
-    db: Session = Depends(get_session),
-    user: User | None = Depends(current_user),
-) -> MCPServersResponse:
-    """List all MCP servers for use in agent configuration and chat UI.
-
-    This endpoint is intentionally available to all authenticated users so they
-    can attach MCP actions to assistants. Sensitive admin credentials are never
-    returned.
-    """
-    db_mcp_servers = get_all_mcp_servers(db)
-    mcp_servers = [
-        _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
-        for db_server in db_mcp_servers
-    ]
-    return MCPServersResponse(mcp_servers=mcp_servers)
-
-
 def _get_connection_config(
    mcp_server: DbMCPServer, is_admin: bool, user: User | None, db_session: Session
 ) -> MCPConnectionConfig | None:
@@ -1563,6 +1528,8 @@ def get_mcp_server_detail(

    _ensure_mcp_server_owner_or_admin(server, user)

+    email = user.email if user else ""
+
    # TODO: user permissions per mcp server not yet implemented, for now
    # permissions are based on access to assistants
    # # Quick permission check – admin or user has access
@@ -1570,10 +1537,7 @@ def get_mcp_server_detail(
    #     raise HTTPException(status_code=403, detail="Forbidden")

    return _db_mcp_server_to_api_mcp_server(
-        server,
-        db_session,
-        include_auth_config=True,
-        request_user=user,
+        server, email, db_session, include_auth_config=True
    )


@@ -1632,12 +1596,13 @@ def get_mcp_servers_for_admin(

    logger.info("Fetching all MCP servers for admin display")

+    email = user.email if user else ""
    try:
        db_mcp_servers = get_all_mcp_servers(db)

        # Convert to API model format
        mcp_servers = [
-            _db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
+            _db_mcp_server_to_api_mcp_server(db_server, email, db)
            for db_server in db_mcp_servers
        ]

@@ -1880,9 +1845,7 @@ def update_mcp_server_simple(
    db_session.commit()

    # Return the updated server in API format
-    return _db_mcp_server_to_api_mcp_server(
-        updated_server, db_session, request_user=user
-    )
+    return _db_mcp_server_to_api_mcp_server(updated_server, user.email, db_session)


@admin_router.delete("/server/{server_id}")
--- a/backend/onyx/server/manage/get_state.py
+++ b/backend/onyx/server/manage/get_state.py
@@ -13,7 +13,6 @@ from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
 from onyx.configs.constants import DEV_VERSION_PATTERN
 from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.configs.constants import STABLE_VERSION_PATTERN
-from onyx.db.auth import get_user_count
 from onyx.server.manage.models import AllVersions
 from onyx.server.manage.models import AuthTypeResponse
 from onyx.server.manage.models import ContainerVersions
@@ -29,14 +28,12 @@ def healthcheck() -> StatusResponse:


@router.get("/auth/type", tags=PUBLIC_API_TAGS)
-async def get_auth_type() -> AuthTypeResponse:
-    user_count = await get_user_count()
+def get_auth_type() -> AuthTypeResponse:
    return AuthTypeResponse(
        auth_type=AUTH_TYPE,
        requires_verification=user_needs_to_be_verified(),
        anonymous_user_enabled=anonymous_user_enabled(),
        password_min_length=PASSWORD_MIN_LENGTH,
-        has_users=user_count > 0,
    )


--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -410,26 +410,20 @@ def list_llm_provider_basics(

    all_providers = fetch_existing_llm_providers(db_session)
    user_group_ids = fetch_user_group_ids(db_session, user) if user else set()
-    is_admin = user and user.role == UserRole.ADMIN
+    is_admin = user is not None and user.role == UserRole.ADMIN

    accessible_providers = []

    for provider in all_providers:
-        # Include all public providers
-        if provider.is_public:
-            accessible_providers.append(LLMProviderDescriptor.from_model(provider))
-            continue
-
-        # Include restricted providers user has access to via groups
-        if is_admin:
-            # Admins see all providers
-            accessible_providers.append(LLMProviderDescriptor.from_model(provider))
-        elif provider.groups:
-            # User must be in at least one of the provider's groups
-            if user_group_ids.intersection({g.id for g in provider.groups}):
-                accessible_providers.append(LLMProviderDescriptor.from_model(provider))
-        elif not provider.personas:
-            # No restrictions = accessible
+        # Use centralized access control logic with persona=None since we're
+        # listing providers without a specific persona context. This correctly:
+        # - Includes all public providers
+        # - Includes providers user can access via group membership
+        # - Excludes persona-only restricted providers (requires specific persona)
+        # - Excludes non-public providers with no restrictions (admin-only)
+        if can_user_access_llm_provider(
+            provider, user_group_ids, persona=None, is_admin=is_admin
+        ):
            accessible_providers.append(LLMProviderDescriptor.from_model(provider))

    end_time = datetime.now(timezone.utc)
--- a/backend/onyx/server/manage/models.py
+++ b/backend/onyx/server/manage/models.py
@@ -44,8 +44,6 @@ class AuthTypeResponse(BaseModel):
    requires_verification: bool
    anonymous_user_enabled: bool | None = None
    password_min_length: int
-    # whether there are any users in the system
-    has_users: bool = True


 class UserSpecificAssistantPreference(BaseModel):
@@ -67,7 +65,6 @@ class UserPreferences(BaseModel):
    auto_scroll: bool | None = None
    temperature_override_enabled: bool | None = None
    theme_preference: ThemePreference | None = None
-    chat_background: str | None = None

    # controls which tools are enabled for the user for a specific assistant
    assistant_specific_configs: UserSpecificAssistantPreferences | None = None
@@ -139,7 +136,6 @@ class UserInfo(BaseModel):
                    auto_scroll=user.auto_scroll,
                    temperature_override_enabled=user.temperature_override_enabled,
                    theme_preference=user.theme_preference,
-                    chat_background=user.chat_background,
                    assistant_specific_configs=assistant_specific_configs,
                )
            ),
@@ -203,10 +199,6 @@ class ThemePreferenceRequest(BaseModel):
    theme_preference: ThemePreference


-class ChatBackgroundRequest(BaseModel):
-    chat_background: str | None
-
-
 class PersonalizationUpdateRequest(BaseModel):
    name: str | None = None
    role: str | None = None
--- a/backend/onyx/server/manage/search_settings.py
+++ b/backend/onyx/server/manage/search_settings.py
@@ -6,25 +6,33 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
+from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.context.search.models import SavedSearchSettings
 from onyx.context.search.models import SearchSettingsCreationRequest
+from onyx.db.connector_credential_pair import get_connector_credential_pairs
+from onyx.db.connector_credential_pair import resync_cc_pair
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.index_attempt import expire_index_attempts
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import User
+from onyx.db.search_settings import create_search_settings
 from onyx.db.search_settings import delete_search_settings
 from onyx.db.search_settings import get_current_search_settings
+from onyx.db.search_settings import get_embedding_provider_from_provider_type
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_search_settings_status
+from onyx.document_index.document_index_utils import get_multipass_config
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_processing.unstructured import delete_unstructured_api_key
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import update_unstructured_api_key
+from onyx.natural_language_processing.search_nlp_models import clean_model_name
 from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
 from onyx.server.manage.models import FullModelVersionResponse
 from onyx.server.models import IdReturn
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import ALT_INDEX_SUFFIX
 from shared_configs.configs import MULTI_TENANT

 router = APIRouter(prefix="/search-settings")
@@ -40,97 +48,91 @@ def set_new_search_settings(
    """Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
    Gives an error if the same model name is used as the current or secondary index
    """
-    # TODO(andrei): Re-enable.
-    logger.error("Setting new search settings is temporarily disabled.")
-    raise HTTPException(
-        status_code=status.HTTP_501_NOT_IMPLEMENTED,
-        detail="Setting new search settings is temporarily disabled.",
+    if search_settings_new.index_name:
+        logger.warning("Index name was specified by request, this is not suggested")
+
+    # Disallow contextual RAG for cloud deployments
+    if MULTI_TENANT and search_settings_new.enable_contextual_rag:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Contextual RAG disabled in Onyx Cloud",
+        )
+
+    # Validate cloud provider exists or create new LiteLLM provider
+    if search_settings_new.provider_type is not None:
+        cloud_provider = get_embedding_provider_from_provider_type(
+            db_session, provider_type=search_settings_new.provider_type
+        )
+
+        if cloud_provider is None:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
+            )
+
+    search_settings = get_current_search_settings(db_session)
+
+    if search_settings_new.index_name is None:
+        # We define index name here
+        index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
+        if (
+            search_settings_new.model_name == search_settings.model_name
+            and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
+        ):
+            index_name += ALT_INDEX_SUFFIX
+        search_values = search_settings_new.model_dump()
+        search_values["index_name"] = index_name
+        new_search_settings_request = SavedSearchSettings(**search_values)
+    else:
+        new_search_settings_request = SavedSearchSettings(
+            **search_settings_new.model_dump()
+        )
+
+    secondary_search_settings = get_secondary_search_settings(db_session)
+
+    if secondary_search_settings:
+        # Cancel any background indexing jobs
+        expire_index_attempts(
+            search_settings_id=secondary_search_settings.id, db_session=db_session
+        )
+
+        # Mark previous model as a past model directly
+        update_search_settings_status(
+            search_settings=secondary_search_settings,
+            new_status=IndexModelStatus.PAST,
+            db_session=db_session,
+        )
+
+    new_search_settings = create_search_settings(
+        search_settings=new_search_settings_request, db_session=db_session
    )
-    # if search_settings_new.index_name:
-    #     logger.warning("Index name was specified by request, this is not suggested")

-    # # Disallow contextual RAG for cloud deployments
-    # if MULTI_TENANT and search_settings_new.enable_contextual_rag:
-    #     raise HTTPException(
-    #         status_code=status.HTTP_400_BAD_REQUEST,
-    #         detail="Contextual RAG disabled in Onyx Cloud",
-    #     )
+    # Ensure Vespa has the new index immediately
+    get_multipass_config(search_settings)
+    get_multipass_config(new_search_settings)
+    document_index = get_default_document_index(search_settings, new_search_settings)

-    # # Validate cloud provider exists or create new LiteLLM provider
-    # if search_settings_new.provider_type is not None:
-    #     cloud_provider = get_embedding_provider_from_provider_type(
-    #         db_session, provider_type=search_settings_new.provider_type
-    #     )
+    document_index.ensure_indices_exist(
+        primary_embedding_dim=search_settings.final_embedding_dim,
+        primary_embedding_precision=search_settings.embedding_precision,
+        secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
+        secondary_index_embedding_precision=new_search_settings.embedding_precision,
+    )

-    #     if cloud_provider is None:
-    #         raise HTTPException(
-    #             status_code=status.HTTP_400_BAD_REQUEST,
-    #             detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
-    #         )
+    # Pause index attempts for the currently in use index to preserve resources
+    if DISABLE_INDEX_UPDATE_ON_SWAP:
+        expire_index_attempts(
+            search_settings_id=search_settings.id, db_session=db_session
+        )
+        for cc_pair in get_connector_credential_pairs(db_session):
+            resync_cc_pair(
+                cc_pair=cc_pair,
+                search_settings_id=new_search_settings.id,
+                db_session=db_session,
+            )

-    # search_settings = get_current_search_settings(db_session)
-
-    # if search_settings_new.index_name is None:
-    #     # We define index name here
-    #     index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
-    #     if (
-    #         search_settings_new.model_name == search_settings.model_name
-    #         and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
-    #     ):
-    #         index_name += ALT_INDEX_SUFFIX
-    #     search_values = search_settings_new.model_dump()
-    #     search_values["index_name"] = index_name
-    #     new_search_settings_request = SavedSearchSettings(**search_values)
-    # else:
-    #     new_search_settings_request = SavedSearchSettings(
-    #         **search_settings_new.model_dump()
-    #     )
-
-    # secondary_search_settings = get_secondary_search_settings(db_session)
-
-    # if secondary_search_settings:
-    #     # Cancel any background indexing jobs
-    #     expire_index_attempts(
-    #         search_settings_id=secondary_search_settings.id, db_session=db_session
-    #     )
-
-    #     # Mark previous model as a past model directly
-    #     update_search_settings_status(
-    #         search_settings=secondary_search_settings,
-    #         new_status=IndexModelStatus.PAST,
-    #         db_session=db_session,
-    #     )
-
-    # new_search_settings = create_search_settings(
-    #     search_settings=new_search_settings_request, db_session=db_session
-    # )
-
-    # # Ensure Vespa has the new index immediately
-    # get_multipass_config(search_settings)
-    # get_multipass_config(new_search_settings)
-    # document_index = get_default_document_index(search_settings, new_search_settings)
-
-    # document_index.ensure_indices_exist(
-    #     primary_embedding_dim=search_settings.final_embedding_dim,
-    #     primary_embedding_precision=search_settings.embedding_precision,
-    #     secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
-    #     secondary_index_embedding_precision=new_search_settings.embedding_precision,
-    # )
-
-    # # Pause index attempts for the currently in use index to preserve resources
-    # if DISABLE_INDEX_UPDATE_ON_SWAP:
-    #     expire_index_attempts(
-    #         search_settings_id=search_settings.id, db_session=db_session
-    #     )
-    #     for cc_pair in get_connector_credential_pairs(db_session):
-    #         resync_cc_pair(
-    #             cc_pair=cc_pair,
-    #             search_settings_id=new_search_settings.id,
-    #             db_session=db_session,
-    #         )
-
-    # db_session.commit()
-    # return IdReturn(id=new_search_settings.id)
+    db_session.commit()
+    return IdReturn(id=new_search_settings.id)


@router.post("/cancel-new-embedding")
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -56,7 +56,6 @@ from onyx.db.user_preferences import get_latest_access_token_for_user
 from onyx.db.user_preferences import update_assistant_preferences
 from onyx.db.user_preferences import update_user_assistant_visibility
 from onyx.db.user_preferences import update_user_auto_scroll
-from onyx.db.user_preferences import update_user_chat_background
 from onyx.db.user_preferences import update_user_default_model
 from onyx.db.user_preferences import update_user_personalization
 from onyx.db.user_preferences import update_user_pinned_assistants
@@ -76,7 +75,6 @@ from onyx.server.documents.models import PaginatedReturn
 from onyx.server.features.projects.models import UserFileSnapshot
 from onyx.server.manage.models import AllUsersResponse
 from onyx.server.manage.models import AutoScrollRequest
-from onyx.server.manage.models import ChatBackgroundRequest
 from onyx.server.manage.models import PersonalizationUpdateRequest
 from onyx.server.manage.models import TenantInfo
 from onyx.server.manage.models import TenantSnapshot
@@ -786,25 +784,6 @@ def update_user_theme_preference_api(
    update_user_theme_preference(user.id, request.theme_preference, db_session)


-@router.patch("/user/chat-background")
-def update_user_chat_background_api(
-    request: ChatBackgroundRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> None:
-    if user is None:
-        if AUTH_TYPE == AuthType.DISABLED:
-            store = get_kv_store()
-            no_auth_user = fetch_no_auth_user(store)
-            no_auth_user.preferences.chat_background = request.chat_background
-            set_no_auth_user_preferences(store, no_auth_user.preferences)
-            return
-        else:
-            raise RuntimeError("This should never happen")
-
-    update_user_chat_background(user.id, request.chat_background, db_session)
-
-
@router.patch("/user/default-model")
 def update_user_default_model_api(
    request: ChosenDefaultModelRequest,
--- a/backend/onyx/server/onyx_api/ingestion.py
+++ b/backend/onyx/server/onyx_api/ingestion.py
@@ -22,7 +22,7 @@ from onyx.db.models import User
 from onyx.db.search_settings import get_active_search_settings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.indexing.adapters.document_indexing_adapter import (
    DocumentIndexingBatchAdapter,
 )
@@ -103,11 +103,9 @@ def upsert_ingestion_doc(

    # Need to index for both the primary and secondary index if possible
    active_search_settings = get_active_search_settings(db_session)
-    # This flow is for indexing so we get all indices.
-    document_indices = get_all_document_indices(
+    curr_doc_index = get_default_document_index(
        active_search_settings.primary,
        None,
-        None,
    )

    search_settings = get_current_search_settings(db_session)
@@ -130,7 +128,7 @@ def upsert_ingestion_doc(

    indexing_pipeline_result = run_indexing_pipeline(
        embedder=index_embedding_model,
-        document_indices=document_indices,
+        document_index=curr_doc_index,
        ignore_time_skip=True,
        db_session=db_session,
        tenant_id=tenant_id,
@@ -153,14 +151,13 @@ def upsert_ingestion_doc(
            search_settings=sec_search_settings
        )

-        # This flow is for indexing so we get all indices.
-        sec_document_indices = get_all_document_indices(
-            active_search_settings.secondary, None, None
+        sec_doc_index = get_default_document_index(
+            active_search_settings.secondary, None
        )

        run_indexing_pipeline(
            embedder=new_index_embedding_model,
-            document_indices=sec_document_indices,
+            document_index=sec_doc_index,
            ignore_time_skip=True,
            db_session=db_session,
            tenant_id=tenant_id,
@@ -195,18 +192,15 @@ def delete_ingestion_doc(
        )

    active_search_settings = get_active_search_settings(db_session)
-    # This flow is for deletion so we get all indices.
-    document_indices = get_all_document_indices(
+    doc_index = get_default_document_index(
        active_search_settings.primary,
        active_search_settings.secondary,
-        None,
    )
-    for document_index in document_indices:
-        document_index.delete_single(
-            doc_id=document_id,
-            tenant_id=tenant_id,
-            chunk_count=document.chunk_count,
-        )
+    doc_index.delete_single(
+        doc_id=document_id,
+        tenant_id=tenant_id,
+        chunk_count=document.chunk_count,
+    )

    # Delete from database
    delete_documents_complete__no_commit(db_session, [document_id])
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -58,6 +58,7 @@ from onyx.db.engine.sql_engine import get_session
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.feedback import create_chat_message_feedback
 from onyx.db.feedback import remove_chat_message_feedback
+from onyx.db.models import ChatSessionSharedStatus
 from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.db.persona import get_persona_by_id
@@ -266,7 +267,35 @@ def get_chat_session(
            include_deleted=include_deleted,
        )
    except ValueError:
-        raise ValueError("Chat session does not exist or has been deleted")
+        try:
+            # If we failed to get a chat session, try to retrieve the session with
+            # less restrictive filters in order to identify what exactly mismatched
+            # so we can bubble up an accurate error code andmessage.
+            existing_chat_session = get_chat_session_by_id(
+                chat_session_id=session_id,
+                user_id=None,
+                db_session=db_session,
+                is_shared=False,
+                include_deleted=True,
+            )
+        except ValueError:
+            raise HTTPException(status_code=404, detail="Chat session not found")
+
+        if not include_deleted and existing_chat_session.deleted:
+            raise HTTPException(status_code=404, detail="Chat session has been deleted")
+
+        if is_shared:
+            if existing_chat_session.shared_status != ChatSessionSharedStatus.PUBLIC:
+                raise HTTPException(
+                    status_code=403, detail="Chat session is not shared"
+                )
+        elif user_id is not None and existing_chat_session.user_id not in (
+            user_id,
+            None,
+        ):
+            raise HTTPException(status_code=403, detail="Access denied")
+
+        raise HTTPException(status_code=404, detail="Chat session not found")

    # for chat-seeding: if the session is unassigned, assign it now. This is done here
    # to avoid another back and forth between FE -> BE before starting the first
@@ -530,30 +559,7 @@ def handle_new_chat_message(
    return StreamingResponse(stream_generator(), media_type="text/event-stream")


-@router.post(
-    "/send-chat-message",
-    response_model=ChatFullResponse,
-    tags=PUBLIC_API_TAGS,
-    responses={
-        200: {
-            "description": (
-                "If `stream=true`, returns `text/event-stream`.\n"
-                "If `stream=false`, returns `application/json` (ChatFullResponse)."
-            ),
-            "content": {
-                "text/event-stream": {
-                    "schema": {"type": "string"},
-                    "examples": {
-                        "stream": {
-                            "summary": "Stream of NDJSON AnswerStreamPart's",
-                            "value": "string",
-                        }
-                    },
-                },
-            },
-        }
-    },
-)
+@router.post("/send-chat-message", response_model=None, tags=PUBLIC_API_TAGS)
 def handle_send_chat_message(
    chat_message_req: SendMessageRequest,
    request: Request,
--- a/backend/onyx/server/query_and_chat/query_backend.py
+++ b/backend/onyx/server/query_and_chat/query_backend.py
@@ -51,7 +51,6 @@ def admin_search(
        tenant_id=tenant_id,
    )
    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None)

    if not isinstance(document_index, VespaIndex):
--- a/backend/onyx/server/query_and_chat/session_loading.py
+++ b/backend/onyx/server/query_and_chat/session_loading.py
@@ -4,7 +4,6 @@ from typing import cast

 from sqlalchemy.orm import Session

-from onyx.chat.citation_utils import extract_citation_order_from_text
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SavedSearchDoc
 from onyx.context.search.models import SearchDoc
@@ -522,13 +521,6 @@ def translate_assistant_message_to_packets(
                    )
                )

-        # Sort citations by order of appearance in message text
-        citation_order = extract_citation_order_from_text(chat_message.message or "")
-        order_map = {num: idx for idx, num in enumerate(citation_order)}
-        citation_info_list.sort(
-            key=lambda c: order_map.get(c.citation_number, float("inf"))
-        )
-
    # Message comes after tool calls, with optional reasoning step beforehand
    message_turn_index = max_tool_turn + 1
    if chat_message.reasoning_tokens:
--- a/backend/onyx/setup.py
+++ b/backend/onyx/setup.py
@@ -6,6 +6,7 @@ from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
+from onyx.configs.chat_configs import INPUT_PROMPT_YAML
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.configs.constants import KV_SEARCH_SETTINGS
 from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
@@ -13,6 +14,9 @@ from onyx.configs.embedding_configs import SupportedEmbeddingModel
 from onyx.configs.model_configs import GEN_AI_API_KEY
 from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
 from onyx.context.search.models import SavedSearchSettings
+from onyx.context.search.retrieval.search_runner import (
+    download_nltk_data,
+)
 from onyx.db.connector import check_connectors_exist
 from onyx.db.connector import create_initial_default_connector
 from onyx.db.connector_credential_pair import associate_default_cc_pair
@@ -32,7 +36,7 @@ from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.models import IndexingSetting
@@ -42,6 +46,7 @@ from onyx.llm.constants import LlmProviderNames
 from onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
+from onyx.seeding.load_yamls import load_input_prompts_from_yaml
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.server.settings.store import load_settings
@@ -111,6 +116,9 @@ def setup_onyx(
                f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}."
            )

+    logger.notice("Verifying query preprocessing (NLTK) data is downloaded")
+    download_nltk_data()
+
    # setup Postgres with default credential, llm providers, etc.
    setup_postgres(db_session)

@@ -124,15 +132,13 @@ def setup_onyx(
    # Ensure Vespa is setup correctly, this step is relatively near the end because Vespa
    # takes a bit of time to start up
    logger.notice("Verifying Document Index(s) is/are available.")
-    # This flow is for setting up the document index so we get all indices here.
-    document_indices = get_all_document_indices(
+    document_index = get_default_document_index(
        search_settings,
        secondary_search_settings,
-        None,
    )

-    success = setup_document_indices(
-        document_indices,
+    success = setup_vespa(
+        document_index,
        IndexingSetting.from_db_model(search_settings),
        (
            IndexingSetting.from_db_model(secondary_search_settings)
@@ -141,9 +147,7 @@ def setup_onyx(
        ),
    )
    if not success:
-        raise RuntimeError(
-            "Could not connect to a document index within the specified timeout."
-        )
+        raise RuntimeError("Could not connect to Vespa within the specified timeout.")

    logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}")
    if search_settings.provider_type is None:
@@ -225,62 +229,44 @@ def mark_reindex_flag(db_session: Session) -> None:
        kv_store.store(KV_REINDEX_KEY, False)


-def setup_document_indices(
-    document_indices: list[DocumentIndex],
+def setup_vespa(
+    document_index: DocumentIndex,
    index_setting: IndexingSetting,
    secondary_index_setting: IndexingSetting | None,
    num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,
 ) -> bool:
-    """Sets up all input document indices.
-
-    If any document index setup fails, the function will return False. Otherwise
-    returns True.
-    """
-    for document_index in document_indices:
-        # Document index startup is a bit slow, so give it a few seconds.
-        WAIT_SECONDS = 5
-        document_index_setup_success = False
-        for x in range(num_attempts):
-            try:
-                logger.notice(
-                    f"Setting up document index {document_index.__class__.__name__} (attempt {x+1}/{num_attempts})..."
-                )
-                document_index.ensure_indices_exist(
-                    primary_embedding_dim=index_setting.final_embedding_dim,
-                    primary_embedding_precision=index_setting.embedding_precision,
-                    secondary_index_embedding_dim=(
-                        secondary_index_setting.final_embedding_dim
-                        if secondary_index_setting
-                        else None
-                    ),
-                    secondary_index_embedding_precision=(
-                        secondary_index_setting.embedding_precision
-                        if secondary_index_setting
-                        else None
-                    ),
-                )
-
-                logger.notice(
-                    f"Document index {document_index.__class__.__name__} setup complete."
-                )
-                document_index_setup_success = True
-                break
-            except Exception:
-                logger.exception(
-                    f"Document index {document_index.__class__.__name__} setup did not succeed. "
-                    "The relevant service may not be ready yet. "
-                    f"Retrying in {WAIT_SECONDS} seconds."
-                )
-                time.sleep(WAIT_SECONDS)
-
-        if not document_index_setup_success:
-            logger.error(
-                f"Document index {document_index.__class__.__name__} setup did not succeed. "
-                f"Attempt limit reached. ({num_attempts})"
+    # Vespa startup is a bit slow, so give it a few seconds
+    WAIT_SECONDS = 5
+    for x in range(num_attempts):
+        try:
+            logger.notice(f"Setting up Vespa (attempt {x+1}/{num_attempts})...")
+            document_index.ensure_indices_exist(
+                primary_embedding_dim=index_setting.final_embedding_dim,
+                primary_embedding_precision=index_setting.embedding_precision,
+                secondary_index_embedding_dim=(
+                    secondary_index_setting.final_embedding_dim
+                    if secondary_index_setting
+                    else None
+                ),
+                secondary_index_embedding_precision=(
+                    secondary_index_setting.embedding_precision
+                    if secondary_index_setting
+                    else None
+                ),
            )
-            return False

-    return True
+            logger.notice("Vespa setup complete.")
+            return True
+        except Exception:
+            logger.exception(
+                f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
+            )
+            time.sleep(WAIT_SECONDS)
+
+    logger.error(
+        f"Vespa setup did not succeed. Attempt limit reached. ({num_attempts})"
+    )
+    return False


 def setup_postgres(db_session: Session) -> None:
@@ -289,6 +275,10 @@ def setup_postgres(db_session: Session) -> None:
    create_initial_default_connector(db_session)
    associate_default_cc_pair(db_session)

+    # Load input prompts and user folders from YAML
+    logger.notice("Loading input prompts and user folders")
+    load_input_prompts_from_yaml(db_session, INPUT_PROMPT_YAML)
+
    if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
        # Only for dev flows
        logger.notice("Setting up default OpenAI LLM for dev.")
@@ -357,8 +347,6 @@ def setup_multitenant_onyx() -> None:


 def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:
-    # TODO(andrei): We don't yet support OpenSearch for multi-tenant instances
-    # so this function remains unchanged.
    # This is for local testing
    WAIT_SECONDS = 5
    VESPA_ATTEMPTS = 5
--- a/backend/onyx/tools/fake_tools/research_agent.py
+++ b/backend/onyx/tools/fake_tools/research_agent.py
@@ -60,7 +60,6 @@ from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
-from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
 from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
 from onyx.tools.tool_runner import run_tool_calls
 from onyx.tools.utils import generate_tools_description
@@ -432,14 +431,6 @@ def run_research_agent_call(
                        max_concurrent_tools=1,
                        # May be better to not do this step, hard to say, needs to be tested
                        skip_search_query_expansion=False,
-                        url_snippet_map=extract_url_snippet_map(
-                            [
-                                search_doc
-                                for tool_call in state_container.get_tool_calls()
-                                if tool_call.search_docs
-                                for search_doc in tool_call.search_docs
-                            ]
-                        ),
                    )
                    tool_responses = parallel_tool_call_results.tool_responses
                    citation_mapping = (
@@ -474,14 +465,8 @@ def run_research_agent_call(
                            )

                        search_docs = None
-                        displayed_docs = None
                        if isinstance(tool_response.rich_response, SearchDocsResponse):
                            search_docs = tool_response.rich_response.search_docs
-                            displayed_docs = tool_response.rich_response.displayed_docs
-
-                            # Add ALL search docs to state container for DB persistence
-                            if search_docs:
-                                state_container.add_search_docs(search_docs)

                            # This is used for the Open URL reminder in the next cycle
                            # only do this if the web search tool yielded results
@@ -514,7 +499,7 @@ def run_research_agent_call(
                            or most_recent_reasoning,
                            tool_call_arguments=tool_call.tool_args,
                            tool_call_response=tool_response.llm_facing_response,
-                            search_docs=displayed_docs or search_docs,
+                            search_docs=search_docs,
                            generated_images=None,
                        )
                        state_container.add_tool_call(tool_call_info)
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -36,15 +36,6 @@ class ToolCallException(Exception):
        self.llm_facing_message = llm_facing_message


-class ToolExecutionException(Exception):
-    """Exception raise for errors during tool execution."""
-
-    def __init__(self, message: str, emit_error_packet: bool = False):
-        super().__init__(message)
-
-        self.emit_error_packet = emit_error_packet
-
-
 class SearchToolUsage(str, Enum):
    DISABLED = "disabled"
    ENABLED = "enabled"
@@ -151,7 +142,6 @@ class OpenURLToolOverrideKwargs(BaseModel):
    # To know what citation number to start at for constructing the string to the LLM
    starting_citation_num: int
    citation_mapping: dict[str, int]
-    url_snippet_map: dict[str, str]


 # None indicates that the default value should be used
--- a/backend/onyx/tools/tool_constructor.py
+++ b/backend/onyx/tools/tool_constructor.py
@@ -19,6 +19,7 @@ from onyx.db.oauth_config import get_oauth_config
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.tools import get_builtin_tool
 from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.interfaces import DocumentIndex
 from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
 from onyx.llm.interfaces import LLM
 from onyx.llm.interfaces import LLMConfig
@@ -119,9 +120,18 @@ def construct_tools(
    if user and user.oauth_accounts:
        user_oauth_token = user.oauth_accounts[0].access_token

-    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
-    document_index = get_default_document_index(search_settings, None)
+    document_index_cache: DocumentIndex | None = None
+    search_settings_cache = None
+
+    def _get_document_index() -> DocumentIndex:
+        nonlocal document_index_cache, search_settings_cache
+        if document_index_cache is None:
+            if search_settings_cache is None:
+                search_settings_cache = get_current_search_settings(db_session)
+            document_index_cache = get_default_document_index(
+                search_settings_cache, None
+            )
+        return document_index_cache

    added_search_tool = False
    for db_tool_model in persona.tools:
@@ -164,7 +174,7 @@ def construct_tools(
                    user=user,
                    persona=persona,
                    llm=llm,
-                    document_index=document_index,
+                    document_index=_get_document_index(),
                    user_selected_filters=search_tool_config.user_selected_filters,
                    project_id=search_tool_config.project_id,
                    bypass_acl=search_tool_config.bypass_acl,
@@ -218,7 +228,7 @@ def construct_tools(
                        OpenURLTool(
                            tool_id=db_tool_model.id,
                            emitter=emitter,
-                            document_index=document_index,
+                            document_index=_get_document_index(),
                            user=user,
                        )
                    ]
@@ -377,6 +387,9 @@ def construct_tools(
        if not search_tool_config:
            search_tool_config = SearchToolConfig()

+        search_settings = get_current_search_settings(db_session)
+        document_index = get_default_document_index(search_settings, None)
+
        search_tool = SearchTool(
            tool_id=search_tool_db_model.id,
            db_session=db_session,
--- a/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
+++ b/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
@@ -23,7 +23,6 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
 from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
 from onyx.server.query_and_chat.streaming_models import Packet
 from onyx.tools.interface import Tool
-from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.images.models import (
    FinalImageGenerationResponse,
@@ -189,9 +188,7 @@ class ImageGenerationTool(Tool[None]):

        except requests.RequestException as e:
            logger.error(f"Error fetching or converting image: {e}")
-            raise ToolExecutionException(
-                "Failed to fetch or convert the generated image", emit_error_packet=True
-            )
+            raise ValueError("Failed to fetch or convert the generated image")
        except Exception as e:
            logger.debug(f"Error occurred during image generation: {e}")

@@ -201,27 +198,18 @@ class ImageGenerationTool(Tool[None]):
                    "Your request was rejected as a result of our safety system"
                    in error_message
                ):
-                    raise ToolExecutionException(
-                        (
-                            "The image generation request was rejected due to OpenAI's content policy. "
-                            "Please try a different prompt."
-                        ),
-                        emit_error_packet=True,
+                    raise ValueError(
+                        "The image generation request was rejected due to OpenAI's content policy. Please try a different prompt."
                    )
                elif "Invalid image URL" in error_message:
-                    raise ToolExecutionException(
-                        "Invalid image URL provided for image generation.",
-                        emit_error_packet=True,
-                    )
+                    raise ValueError("Invalid image URL provided for image generation.")
                elif "invalid_request_error" in error_message:
-                    raise ToolExecutionException(
-                        "Invalid request for image generation. Please check your input.",
-                        emit_error_packet=True,
+                    raise ValueError(
+                        "Invalid request for image generation. Please check your input."
                    )

-            raise ToolExecutionException(
-                f"An error occurred during image generation. error={error_message}",
-                emit_error_packet=True,
+            raise ValueError(
+                "An error occurred during image generation. Please try again later."
            )

    def run(
--- a/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
+++ b/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
@@ -492,7 +492,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
            indexed_result, crawled_result = run_functions_tuples_in_parallel(
                [
                    (_retrieve_indexed_with_filters, (all_requests,)),
-                    (self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),
+                    (self._fetch_web_content, (urls,)),
                ],
                allow_failures=True,
                timeout=OPEN_URL_TIMEOUT_SECONDS,
@@ -800,7 +800,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
        return merged_sections

    def _fetch_web_content(
-        self, urls: list[str], url_snippet_map: dict[str, str]
+        self, urls: list[str]
    ) -> tuple[list[InferenceSection], list[str]]:
        if not urls:
            return [], []
@@ -831,11 +831,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
                and content.full_content
                and not is_insufficient
            ):
-                sections.append(
-                    inference_section_from_internet_page_scrape(
-                        content, url_snippet_map.get(content.link, "")
-                    )
-                )
+                sections.append(inference_section_from_internet_page_scrape(content))
            else:
                # TODO: Slight improvement - if failed URL reasons are passed back to the LLM
                # for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would
--- a/backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py
+++ b/backend/onyx/tools/tool_implementations/open_url/snippet_matcher.py
@@ -1,239 +0,0 @@
-import unicodedata
-
-from pydantic import BaseModel
-from rapidfuzz import fuzz
-from rapidfuzz import utils
-
-from onyx.utils.text_processing import is_zero_width_char
-from onyx.utils.text_processing import normalize_char
-
-
-class SnippetMatchResult(BaseModel):
-    snippet_located: bool
-
-    start_idx: int = -1
-    end_idx: int = -1
-
-
-NegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)
-
-
-def find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:
-    """
-    Finds where the snippet is located in the content.
-
-    Strategy:
-    1. Normalize the snippet & attempt to find it in the content
-    2. Perform a token based fuzzy search for the snippet in the content
-
-    Notes:
-     - If there are multiple matches of snippet, we choose the first normalised occurrence
-    """
-    if not snippet or not content:
-        return NegativeSnippetMatchResult
-
-    result = _normalize_and_match(content, snippet)
-    if result.snippet_located:
-        return result
-
-    result = _token_based_match(content, snippet)
-    if result.snippet_located:
-        return result
-
-    return NegativeSnippetMatchResult
-
-
-def _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:
-    """
-    Normalizes the snippet & content, then performs a direct string match.
-    """
-    normalized_content, content_map = _normalize_text_with_mapping(content)
-    normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)
-
-    if not normalized_content or not normalized_snippet:
-        return NegativeSnippetMatchResult
-
-    pos = normalized_content.find(normalized_snippet)
-    if pos != -1:
-        original_start = content_map[pos]
-
-        # Account for leading characters stripped from snippet during normalization
-        # (e.g., leading punctuation like "[![]![]]" that was removed)
-        if url_snippet_map:
-            first_snippet_orig_pos = url_snippet_map[0]
-            if first_snippet_orig_pos > 0:
-                # There were leading characters stripped from snippet
-                # Extend start position backwards to include them from content
-                original_start = max(original_start - first_snippet_orig_pos, 0)
-
-        # Determine end position, including any trailing characters that were
-        # normalized away (e.g., punctuation)
-        match_end_norm = pos + len(normalized_snippet)
-        if match_end_norm >= len(content_map):
-            # Match extends to end of normalized content - include all trailing chars
-            original_end = len(content) - 1
-        else:
-            # Match is in the middle - end at character before next normalized char
-            original_end = content_map[match_end_norm] - 1
-
-        # Account for trailing characters stripped from snippet during normalization
-        # (e.g., trailing punctuation like "\n[" that was removed)
-        if url_snippet_map:
-            last_snippet_orig_pos = url_snippet_map[-1]
-            trailing_stripped = len(snippet) - last_snippet_orig_pos - 1
-            if trailing_stripped > 0:
-                # Extend end position to include trailing characters from content
-                # that correspond to the stripped trailing snippet characters
-                original_end = min(original_end + trailing_stripped, len(content) - 1)
-
-        return SnippetMatchResult(
-            snippet_located=True,
-            start_idx=original_start,
-            end_idx=original_end,
-        )
-
-    return NegativeSnippetMatchResult
-
-
-def _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:
-    """
-    Text normalization that maintains position mapping.
-
-    Returns:
-        tuple: (normalized_text, position_map)
-        - position_map[i] gives the original position for normalized position i
-    """
-    if not text:
-        return "", []
-
-    original_text = text
-
-    # Step 1: NFC normalization with position mapping
-    nfc_text = unicodedata.normalize("NFC", text)
-
-    # Build mapping from NFC positions to original start positions
-    nfc_to_orig: list[int] = []
-    orig_idx = 0
-    for nfc_char in nfc_text:
-        nfc_to_orig.append(orig_idx)
-        # Find how many original chars contributed to this NFC char
-        for length in range(1, len(original_text) - orig_idx + 1):
-            substr = original_text[orig_idx : orig_idx + length]
-            if unicodedata.normalize("NFC", substr) == nfc_char:
-                orig_idx += length
-                break
-        else:
-            orig_idx += 1  # Fallback
-
-    # Work with NFC text from here
-    text = nfc_text
-
-    html_entities = {
-        "&nbsp;": " ",
-        "&#160;": " ",
-        "&amp;": "&",
-        "&lt;": "<",
-        "&gt;": ">",
-        "&quot;": '"',
-        "&apos;": "'",
-        "&#39;": "'",
-        "&#x27;": "'",
-        "&ndash;": "-",
-        "&mdash;": "-",
-        "&hellip;": "...",
-        "&#xB0;": "°",
-        "&#xBA;": "°",
-        "&zwj;": "",
-    }
-
-    # Sort entities by length (longest first) for greedy matching
-    sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)
-
-    result_chars = []
-    result_map = []
-    i = 0
-    last_was_space = True  # Track to avoid leading spaces
-
-    while i < len(text):
-        # Convert NFC position to original position
-        orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1
-        char = text[i]
-        output = None
-        step = 1
-
-        # Check for HTML entities first (greedy match)
-        for entity in sorted_entities:
-            if text[i : i + len(entity)] == entity:
-                output = html_entities[entity]
-                step = len(entity)
-                break
-
-        # If no entity matched, process single character
-        if output is None:
-            # Skip zero-width characters
-            if is_zero_width_char(char):
-                i += 1
-                continue
-
-            output = normalize_char(char)
-
-        # Add output to result, normalizing each character from entity output
-        if output:
-            for out_char in output:
-                # Normalize entity output the same way as regular chars
-                normalized = normalize_char(out_char)
-
-                # Handle whitespace collapsing
-                if normalized == " ":
-                    if not last_was_space:
-                        result_chars.append(" ")
-                        result_map.append(orig_pos)
-                        last_was_space = True
-                else:
-                    result_chars.append(normalized)
-                    result_map.append(orig_pos)
-                    last_was_space = False
-
-        i += step
-
-    # Remove trailing space if present
-    if result_chars and result_chars[-1] == " ":
-        result_chars.pop()
-        result_map.pop()
-
-    return "".join(result_chars), result_map
-
-
-def _token_based_match(
-    content: str,
-    snippet: str,
-    min_threshold: float = 0.8,
-) -> SnippetMatchResult:
-    """
-    Performs a token based fuzzy search for the snippet in the content.
-
-    min_threshold exists in the range [0, 1]
-    """
-    if not content or not snippet:
-        return NegativeSnippetMatchResult
-
-    res = fuzz.partial_ratio_alignment(
-        content, snippet, processor=utils.default_process
-    )
-
-    if not res:
-        return NegativeSnippetMatchResult
-
-    score = res.score
-
-    if score >= (min_threshold * 100):
-        start_idx = res.src_start
-        end_idx = res.src_end
-
-        return SnippetMatchResult(
-            snippet_located=True,
-            start_idx=start_idx,
-            end_idx=end_idx,
-        )
-
-    return NegativeSnippetMatchResult
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -832,7 +832,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                top_sections=merged_sections,
                citation_start=override_kwargs.starting_citation_num,
                limit=override_kwargs.max_llm_chunks,
-                include_document_id=False,
+                include_document_id=True,
            )

            # End overall timing
@@ -844,12 +844,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                f"document expansion: {document_expansion_elapsed:.3f}s)"
            )

+            # TODO: extension - this can include the smaller set of approved docs to be saved/displayed in the UI
+            # for replaying. Currently the full set is returned and saved.
            return ToolResponse(
                # Typically the rich response will give more docs in case it needs to be displayed in the UI
                rich_response=SearchDocsResponse(
-                    search_docs=search_docs,
-                    citation_mapping=citation_mapping,
-                    displayed_docs=final_ui_docs or None,
+                    search_docs=search_docs, citation_mapping=citation_mapping
                ),
                # The LLM facing response typically includes less docs to cut down on noise and token usage
                llm_facing_response=docs_str,
--- a/backend/onyx/tools/tool_implementations/utils.py
+++ b/backend/onyx/tools/tool_implementations/utils.py
@@ -73,7 +73,7 @@ def convert_inference_sections_to_llm_string(
                link = next(iter(chunk.source_links.values()), None)
            if link:
                result["url"] = link
-        if include_document_id:
+        if include_document_id and "url" not in result:
            result["document_identifier"] = chunk.document_id
        if chunk.metadata:
            result["metadata"] = json.dumps(chunk.metadata)
--- a/backend/onyx/tools/tool_implementations/web_search/utils.py
+++ b/backend/onyx/tools/tool_implementations/web_search/utils.py
@@ -1,19 +1,11 @@
 from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import SearchDoc
 from onyx.tools.tool_implementations.open_url.models import WebContent
-from onyx.tools.tool_implementations.open_url.snippet_matcher import (
-    find_snippet_in_content,
-)
 from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
 from onyx.tools.tool_implementations.web_search.models import WebSearchResult


-TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
-TRUNCATED_CONTENT_PREFIX = "[...truncated] "
-
-
 def filter_web_search_results_with_no_title_or_snippet(
    results: list[WebSearchResult],
 ) -> list[WebSearchResult]:
@@ -34,99 +26,14 @@ def truncate_search_result_content(content: str, max_chars: int = 15000) -> str:
    """Truncate search result content to a maximum number of characters"""
    if len(content) <= max_chars:
        return content
-    return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX
-
-
-def _truncate_content_around_snippet(
-    content: str, snippet: str, max_chars: int = 15000
-) -> str:
-    """
-    Truncates content around snippet with max_chars
-
-    Assumes snippet exists
-    """
-    result = find_snippet_in_content(content, snippet)
-
-    if not result.snippet_located:
-        return ""
-
-    start_idx = result.start_idx
-    end_idx = result.end_idx
-
-    new_start, new_end = _expand_range_centered(
-        start_idx, end_idx + 1, len(content), max_chars
-    )
-
-    truncated_content = content[new_start:new_end]
-
-    # Add the AFFIX to the start and end of truncated content
-    if new_start > 0:
-        truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content
-
-    if new_end < len(content):
-        truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX
-
-    return truncated_content
-
-
-def _expand_range_centered(
-    start_idx: int, end_idx: int, N: int, target_size: int
-) -> tuple[int, int]:
-    """
-    Expands a range [start_idx, end_idx) to be centered within a list of size N
-
-    Args:
-        start_idx: Starting index (inclusive)
-        end_idx: Ending index (exclusive)
-        N: Size of the list
-        target_size: Target size of the range
-
-    Returns:
-        Tuple of (new start index, new end index)
-    """
-    current_size = end_idx - start_idx
-
-    if current_size >= target_size:
-        return start_idx, end_idx
-
-    padding_needed = target_size - current_size
-    padding_top = padding_needed // 2
-    padding_bottom = padding_needed - padding_top
-
-    # Try expand symmetrically
-    new_start = start_idx - padding_top
-    new_end = end_idx + padding_bottom
-
-    # Handle overflow
-    if new_start < 0:
-        overflow = -new_start
-        new_start = 0
-        new_end = min(N, new_end + overflow)
-
-    if new_end > N:
-        overflow = new_end - N
-        new_end = N
-        new_start = max(0, new_start - overflow)
-
-    return new_start, new_end
+    return content[:max_chars] + " [...truncated]"


 def inference_section_from_internet_page_scrape(
    result: WebContent,
-    snippet: str,
    rank: int = 0,
 ) -> InferenceSection:
-    # truncate the content around snippet if snippet exists
-    truncated_content = ""
-    if snippet:
-        truncated_content = _truncate_content_around_snippet(
-            result.full_content, snippet
-        )
-
-    # Fallback if no snippet exists or we failed to find it
-    if not truncated_content:
-        truncated_content = truncate_search_result_content(result.full_content)
-
+    truncated_content = truncate_search_result_content(result.full_content)
    # Calculate score using reciprocal rank to preserve ordering
    score = 1.0 / (rank + 1)

@@ -190,14 +97,3 @@ def inference_section_from_internet_search_result(
        chunks=[chunk],
        combined_content=result.snippet,
    )
-
-
-def extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:
-    """
-    Given a list of SearchDocs, this will extract the url -> summary map.
-    """
-    url_snippet_map: dict[str, str] = {}
-    for document in documents:
-        if document.source_type == DocumentSource.WEB and document.link:
-            url_snippet_map[document.link] = document.blurb
-    return url_snippet_map
--- a/backend/onyx/tools/tool_runner.py
+++ b/backend/onyx/tools/tool_runner.py
@@ -7,7 +7,6 @@ from onyx.chat.models import ChatMessageSimple
 from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDocsResponse
 from onyx.server.query_and_chat.streaming_models import Packet
-from onyx.server.query_and_chat.streaming_models import PacketException
 from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.tools.interface import Tool
 from onyx.tools.models import ChatMinimalTextMessage
@@ -16,7 +15,6 @@ from onyx.tools.models import ParallelToolCallResponse
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolCallKickoff
-from onyx.tools.models import ToolExecutionException
 from onyx.tools.models import ToolResponse
 from onyx.tools.models import WebSearchToolOverrideKwargs
 from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
@@ -154,33 +152,6 @@ def _safe_run_single_tool(
                    },
                )
            )
-        except ToolExecutionException as e:
-            # Unexpected error during tool execution
-            logger.error(f"Unexpected error running tool {tool.name}: {e}")
-            tool_response = ToolResponse(
-                rich_response=None,
-                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
-            )
-            _error_tracing.attach_error_to_current_span(
-                SpanError(
-                    message="Tool execution error (unexpected)",
-                    data={
-                        "tool_name": tool.name,
-                        "tool_call_id": tool_call.tool_call_id,
-                        "tool_args": tool_call.tool_args,
-                        "error": str(e),
-                        "stack_trace": traceback.format_exc(),
-                        "error_type": type(e).__name__,
-                    },
-                )
-            )
-            if e.emit_error_packet:
-                tool.emitter.emit(
-                    Packet(
-                        placement=tool_call.placement,
-                        obj=PacketException(exception=e),
-                    )
-                )
        except Exception as e:
            # Unexpected error during tool execution
            logger.error(f"Unexpected error running tool {tool.name}: {e}")
@@ -229,8 +200,6 @@ def run_tool_calls(
    max_concurrent_tools: int | None = None,
    # Skip query expansion for repeat search tool calls
    skip_search_query_expansion: bool = False,
-    # A map of url -> summary for passing web results to open url tool
-    url_snippet_map: dict[str, str] = {},
 ) -> ParallelToolCallResponse:
    """Run (optionally merged) tool calls in parallel and update citation mappings.

@@ -361,7 +330,6 @@ def run_tool_calls(
            override_kwargs = OpenURLToolOverrideKwargs(
                starting_citation_num=starting_citation_num,
                citation_mapping=url_to_citation,
-                url_snippet_map=url_snippet_map,
            )
            starting_citation_num += 100

--- a/backend/onyx/utils/text_processing.py
+++ b/backend/onyx/utils/text_processing.py
@@ -9,36 +9,6 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger(__name__)

-# Mapping of curly/smart quotes to straight quotes
-CURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {
-    "\u2019": "'",  # Right single quotation mark
-    "\u2018": "'",  # Left single quotation mark
-    "\u201c": '"',  # Left double quotation mark
-    "\u201d": '"',  # Right double quotation mark
-}
-
-# Zero-width characters that should typically be removed during text normalization
-ZERO_WIDTH_CHARS: set[str] = {
-    "\u200b",  # Zero-width space
-    "\u200c",  # Zero-width non-joiner
-    "\u200d",  # Zero-width joiner
-    "\ufeff",  # Byte order mark / zero-width no-break space
-    "\u2060",  # Word joiner
-}
-
-
-def normalize_curly_quotes(text: str) -> str:
-    """Convert curly/smart quotes to straight quotes."""
-    for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():
-        text = text.replace(curly, straight)
-    return text
-
-
-def is_zero_width_char(c: str) -> bool:
-    """Check if a character is a zero-width character."""
-    return c in ZERO_WIDTH_CHARS
-
-
 ESCAPE_SEQUENCE_RE = re.compile(
    r"""
    ( \\U........      # 8-digit hex escapes
@@ -287,15 +257,3 @@ def remove_invalid_unicode_chars(text: str) -> str:
    - Unicode non-characters
    """
    return _INVALID_UNICODE_CHARS_RE.sub("", text)
-
-
-def normalize_char(c: str) -> str:
-    """Normalize a single character (curly quotes, whitespace, punctuation)."""
-    if c in CURLY_TO_STRAIGHT_QUOTES:
-        c = CURLY_TO_STRAIGHT_QUOTES[c]
-    if c.isspace():
-        return " "
-    elif re.match(r"[^\w\s\']", c):
-        return " "
-    else:
-        return c.lower()
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -255,11 +255,11 @@ fastapi==0.116.1
    #   onyx
 fastapi-limiter==0.1.6
    # via onyx
-fastapi-users==15.0.2
+fastapi-users==14.0.1
    # via
    #   fastapi-users-db-sqlalchemy
    #   onyx
-fastapi-users-db-sqlalchemy==7.0.0
+fastapi-users-db-sqlalchemy==5.0.0
    # via onyx
 fastavro==1.12.1
    # via cohere
@@ -573,7 +573,7 @@ mcp==1.25.0
    #   onyx
 mdurl==0.1.2
    # via markdown-it-py
-mistune==0.8.4
+mistune==3.2.0
    # via onyx
 more-itertools==10.8.0
    # via
@@ -608,7 +608,9 @@ mypy-extensions==1.0.0
 nest-asyncio==1.6.0
    # via onyx
 nltk==3.9.1
-    # via unstructured
+    # via
+    #   onyx
+    #   unstructured
 numpy==2.4.1
    # via
    #   magika
@@ -782,7 +784,7 @@ psycopg2-binary==2.9.9
    # via onyx
 puremagic==1.28
    # via onyx
-pwdlib==0.3.0
+pwdlib==0.2.1
    # via fastapi-users
 py==1.11.0
    # via retry
@@ -902,7 +904,7 @@ python-json-logger==4.0.0
    # via pydocket
 python-magic==0.4.27
    # via unstructured
-python-multipart==0.0.21
+python-multipart==0.0.20
    # via
    #   fastapi-users
    #   mcp
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -298,7 +298,7 @@ numpy==2.4.1
    #   pandas-stubs
    #   shapely
    #   voyageai
-onyx-devtools==0.4.0
+onyx-devtools==0.6.2
    # via onyx
 openai==2.14.0
    # via
--- a/backend/scripts/force_delete_connector_by_id.py
+++ b/backend/scripts/force_delete_connector_by_id.py
@@ -45,9 +45,7 @@ from onyx.db.connector_credential_pair import (
    get_connector_credential_pair,
 )
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.document_index.factory import (
-    get_all_document_indices,
-)
+from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.file_store import get_default_file_store

 # pylint: enable=E402
@@ -61,7 +59,7 @@ _DELETION_BATCH_SIZE = 1000

 def _unsafe_deletion(
    db_session: Session,
-    document_indices: list[DocumentIndex],
+    document_index: DocumentIndex,
    cc_pair: ConnectorCredentialPair,
    pair_id: int,
 ) -> int:
@@ -82,12 +80,11 @@ def _unsafe_deletion(
            break

        for document in documents:
-            for document_index in document_indices:
-                document_index.delete_single(
-                    doc_id=document.id,
-                    tenant_id=POSTGRES_DEFAULT_SCHEMA,
-                    chunk_count=document.chunk_count,
-                )
+            document_index.delete_single(
+                doc_id=document.id,
+                tenant_id=POSTGRES_DEFAULT_SCHEMA,
+                chunk_count=document.chunk_count,
+            )

        delete_documents_complete__no_commit(
            db_session=db_session,
@@ -214,16 +211,14 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
    try:
        logger.notice("Deleting information from Vespa and Postgres")
        active_search_settings = get_active_search_settings(db_session)
-        # This flow is for deletion so we get all indices.
-        document_indices = get_all_document_indices(
+        document_index = get_default_document_index(
            active_search_settings.primary,
            active_search_settings.secondary,
-            None,
        )

        files_deleted_count = _unsafe_deletion(
            db_session=db_session,
-            document_indices=document_indices,
+            document_index=document_index,
            cc_pair=cc_pair,
            pair_id=cc_pair_id,
        )
--- a/backend/scripts/restart_opensearch_container.sh
+++ b/backend/scripts/restart_opensearch_container.sh
@@ -3,8 +3,28 @@
 # We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
 source "$(dirname "$0")/../../.vscode/.env"

-cd "$(dirname "$0")/../../deployment/docker_compose"
+OPENSEARCH_CONTAINER_NAME="onyx-opensearch"
+OPENSEARCH_IMAGE="opensearchproject/opensearch:3.4.0"
+# First check the env for OPENSEARCH_REST_API_PORT, else hardcode to 9200.
+OPENSEARCH_REST_API_PORT=${OPENSEARCH_REST_API_PORT:-9200}
+OPENSEARCH_PERFORMANCE_ANALYZER_PORT=9600

-# Start OpenSearch.
-echo "Forcefully starting fresh OpenSearch container..."
-docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch
+function stop_and_remove_opensearch_container() {
+  echo "Stopping and removing the existing OpenSearch container..."
+  docker stop "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
+  docker rm "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
+}
+
+# Set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file.
+if [ -z "$OPENSEARCH_ADMIN_PASSWORD" ]; then
+  echo "Error: OPENSEARCH_ADMIN_PASSWORD environment variable is not set." >&2
+  echo "Please set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file." >&2
+  exit 1
+fi
+
+# Stop and remove the existing container.
+stop_and_remove_opensearch_container
+
+# Start the OpenSearch container.
+echo "Starting OpenSearch container..."
+docker run --detach --name "$OPENSEARCH_CONTAINER_NAME" --publish "$OPENSEARCH_REST_API_PORT:9200" --publish "$OPENSEARCH_PERFORMANCE_ANALYZER_PORT:9600" -e "discovery.type=single-node" -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_ADMIN_PASSWORD" "$OPENSEARCH_IMAGE"
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bo-Onyx	2a7f6e4ffc	fix(api memory): replace glibc with jemalloc for memory allocating (#9196 )	2026-03-25 14:42:24 -07:00
Nikolas Garza	b6b14ffaf5	feat(slack): convert markdown tables to Slack-friendly format (#8999 )	2026-03-04 11:54:38 -08:00
Justin Tahara	9fb76042a2	fix(celery): Guardrail for User File Processing (#8633 )	2026-03-01 10:30:03 -08:00
Nikolas Garza	caad67a34a	fix(slack): sanitize HTML tags and broken citation links in bot responses (#8767 )	2026-02-26 17:27:24 -08:00
dependabot[bot]	c33437488f	chore(deps): Bump mistune from 0.8.4 to 3.1.4 in /backend (#6407 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-02-26 17:27:24 -08:00
Jamison Lahman	9f66ee7240	chore(devtools): upgrade `ods`: v0.6.1->v0.6.2 (#8773 )	2026-02-26 16:26:37 -08:00
justin-tahara	e6ef2b5074	Fixing mypy	2026-02-09 15:47:10 -08:00
justin-tahara	74132175a8	Fixing mypy	2026-02-09 15:47:10 -08:00
Justin Tahara	29f707ee2d	fix(posthog): Chat metrics for Cloud (#8278 )	2026-02-09 15:47:10 -08:00
Justin Tahara	f0eb86fb9f	fix(ui): Updating Dropdown Modal component (#8033 )	2026-02-06 11:59:09 -08:00
Justin Tahara	b422496a4c	fix(agents): Removing Label Dependency (#8189 )	2026-02-06 11:39:09 -08:00
Justin Tahara	31d6a45b23	chore(chat): Cleaning Error Codes + Tests (#8186 )	2026-02-06 11:02:41 -08:00
Justin Tahara	36f3ac1ec5	feat: onyx discord bot - supervisord and kube deployment (#7706 )	2026-02-02 15:05:21 -08:00
Wenxi Onyx	74f5b3025a	fix: discord svg (can't cherry-pick)	2026-02-02 10:03:39 -08:00
Justin Tahara	c18545d74c	feat(desktop): Ensure that UI reflects Light/Dark Toggle (#7684 )	2026-02-02 10:03:39 -08:00
Justin Tahara	48171e3700	fix(ui): Agent Saving with other people files (#8095 )	2026-02-02 10:03:39 -08:00
Wenxi	f5a5709876	feat: onyx discord bot - frontend (#7497 )	2026-02-02 10:03:39 -08:00
Justin Tahara	85868b1b83	fix(desktop): Remove Global Shortcuts (#7914 )	2026-01-30 13:46:20 -08:00
Justin Tahara	8dc14c23e6	fix(asana): Workspace Team ID mismatch (#7674 )	2026-01-30 13:19:02 -08:00
Jamison Lahman	23821cc0e8	chore(mypy): fix mypy cache issues switching between HEAD and release (#7732 )	2026-01-27 15:52:57 -08:00
Jamison Lahman	b359e13281	fix(citations): enable citation sidebar w/ web_search-only assistants (#7888 )	2026-01-27 13:26:29 -08:00
Justin Tahara	717f410a4a	fix(llm): Hide private models from Agent Creation (#7873 )	2026-01-27 12:21:06 -08:00
SubashMohan	ada0946a62	fix(layout): adjust footer margin and prevent page refresh on chatsession drop (#7759 )	2026-01-27 11:57:18 -08:00
Jamison Lahman	eb2ac8f5a3	fix(fe): inline code text wraps (#7574 )	2026-01-27 11:33:03 -08:00
Nikolas Garza	fbeb57c592	fix(slack): Extract person names and filter garbage in query expansion (#7632 )	2026-01-27 11:26:52 -08:00
Nikolas Garza	d6da9c9b85	fix: scroll to bottom when loading existing conversations (#7614 )	2026-01-27 11:26:52 -08:00
Nikolas Garza	5aea2e223e	fix(billing): remove grandfathered pricing option when subscription lapses (#7583 )	2026-01-27 11:26:52 -08:00
Nikolas Garza	1ff91de07e	fix: deflake chat user journey test (#7646 )	2026-01-27 11:18:27 -08:00
Nikolas Garza	b3dbc69faf	fix(tests): use crawler-friendly search query in Exa integration test (#7746 )	2026-01-27 11:13:01 -08:00
Yuhong Sun	431597b0f9	fix: LiteLLM Azure models don't stream (#7761 )	2026-01-27 10:49:17 -08:00
Yuhong Sun	51b4e5f2fb	fix: Azure OpenAI Tool Calls (#7727 )	2026-01-27 10:49:17 -08:00
Justin Tahara	9afa04a26b	fix(ui): Coda Logo (#7656 )	2026-01-26 17:43:54 -08:00
Justin Tahara	70a3a9c0cd	fix(ui): User Groups Connectors Fix (#7658 )	2026-01-26 17:43:45 -08:00
Justin Tahara	080165356c	fix(ui): First Connector Result (#7657 )	2026-01-26 17:43:35 -08:00
Justin Tahara	3ae974bdf6	fix(ui): Fix Token Rate Limits Page (#7659 )	2026-01-26 17:42:57 -08:00
Justin Tahara	1471658151	fix(vertex ai): Extra Args for Opus 4.5 (#7586 )	2026-01-26 17:42:43 -08:00
Justin Tahara	3e85e9c1a3	feat(desktop): Domain Configuration (#7655 )	2026-01-26 17:12:33 -08:00
Justin Tahara	851033be5f	feat(desktop): Properly Sign Mac App (#7608 )	2026-01-26 17:12:24 -08:00
Jamison Lahman	91e974a6cc	chore(desktop): make artifact filename version-agnostic (#7679 )	2026-01-26 16:20:39 -08:00
Jamison Lahman	38ba4f8a1c	chore(deployments): fix region (#7640 )	2026-01-26 16:20:39 -08:00
Jamison Lahman	6f02473064	chore(deployments): fetch secrets from AWS (#7584 )	2026-01-26 16:20:39 -08:00