fix: Anthropic litellm thinking workaround (#9713 )

chore(playwright): deflake settings_pages.spec.ts (#9684 ) to release v3.1 (#9702 )
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-29 11:32:42 +00:00 · 2026-03-27 14:12:15 -07:00 · 2026-03-27 09:08:51 -07:00 · 2026-03-27 08:43:53 -07:00 · 2026-03-26 15:20:56 -07:00 · 2026-03-26 22:12:33 +00:00
749 changed files with 41940 additions and 22012 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -6,3 +6,4 @@

 3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)
 ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190
+7b927e79c25f4ddfd18a067f489e122acd2c89de  # chore(format): format files where `ruff` and `black` agree (#9339)
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -10,6 +10,9 @@ inputs:
  failed-jobs:
    description: "Deprecated alias for details"
    required: false
+  mention:
+    description: "GitHub username to resolve to a Slack @-mention. Replaces {mention} in details."
+    required: false
  title:
    description: "Title for the notification"
    required: false
@@ -26,6 +29,7 @@ runs:
        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
        DETAILS: ${{ inputs.details }}
        FAILED_JOBS: ${{ inputs.failed-jobs }}
+        MENTION_USER: ${{ inputs.mention }}
        TITLE: ${{ inputs.title }}
        REF_NAME: ${{ inputs.ref-name }}
        REPO: ${{ github.repository }}
@@ -52,6 +56,27 @@ runs:
          DETAILS="$FAILED_JOBS"
        fi

+        # Resolve {mention} placeholder if a GitHub username was provided.
+        # Looks up the username in user-mappings.json (co-located with this action)
+        # and replaces {mention} with <@SLACK_ID> for a Slack @-mention.
+        # Falls back to the plain GitHub username if not found in the mapping.
+        if [ -n "$MENTION_USER" ]; then
+          MAPPINGS_FILE="${GITHUB_ACTION_PATH}/user-mappings.json"
+          slack_id="$(jq -r --arg gh "$MENTION_USER" 'to_entries[] | select(.value | ascii_downcase == ($gh | ascii_downcase)) | .key' "$MAPPINGS_FILE" 2>/dev/null | head -1)"
+
+          if [ -n "$slack_id" ]; then
+            mention_text="<@${slack_id}>"
+          else
+            mention_text="${MENTION_USER}"
+          fi
+
+          DETAILS="${DETAILS//\{mention\}/$mention_text}"
+          TITLE="${TITLE//\{mention\}/}"
+        else
+          DETAILS="${DETAILS//\{mention\}/}"
+          TITLE="${TITLE//\{mention\}/}"
+        fi
+
        normalize_multiline() {
          printf '%s' "$1" | awk 'BEGIN { ORS=""; first=1 } { if (!first) printf "\\n"; printf "%s", $0; first=0 }'
        }
--- a/.github/actions/slack-notify/user-mappings.json
+++ b/.github/actions/slack-notify/user-mappings.json
@@ -0,0 +1,18 @@
+{
+  "U05SAGZPEA1": "yuhongsun96",
+  "U05SAH6UGUD": "Weves",
+  "U07PWEQB7A5": "evan-onyx",
+  "U07V1SM68KF": "joachim-danswer",
+  "U08JZ9N3QNN": "raunakab",
+  "U08L24NCLJE": "Subash-Mohan",
+  "U090B9M07B2": "wenxi-onyx",
+  "U094RASDP0Q": "duo-onyx",
+  "U096L8ZQ85B": "justin-tahara",
+  "U09AHV8UBQX": "jessicasingh7",
+  "U09KAL5T3C2": "nmgarza5",
+  "U09KPGVQ70R": "acaprau",
+  "U09QR8KTSJH": "rohoswagger",
+  "U09RB4NTXA4": "jmelahman",
+  "U0A6K9VCY6A": "Danelegend",
+  "U0AGC4KH71A": "Bo-Onyx"
+}
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -44,7 +44,7 @@ jobs:
          fetch-tags: true

      - name: Setup uv
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          version: "0.9.9"
          enable-cache: false
@@ -165,7 +165,7 @@ jobs:
          fetch-depth: 0

      - name: Setup uv
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          version: "0.9.9"
          # NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
@@ -307,7 +307,7 @@ jobs:
            xdg-utils

      - name: setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6.2.0
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6.3.0
        with:
          node-version: 24
          package-manager-cache: false
@@ -455,7 +455,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -529,7 +529,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -607,7 +607,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -615,6 +615,7 @@ jobs:
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('web-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta == 'true' && 'beta' || '' }}

@@ -668,7 +669,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -750,7 +751,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -836,7 +837,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -894,7 +895,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -967,7 +968,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -1044,7 +1045,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -1105,7 +1106,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
@@ -1178,7 +1179,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
@@ -1256,15 +1257,13 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
            latest=false
          tags: |
            type=raw,value=craft-latest
-            # TODO: Consider aligning craft-latest tags with regular backend builds (e.g., latest, edge, beta)
-            # to keep tagging strategy consistent across all backend images

      - name: Create and push manifest
        env:
@@ -1317,7 +1316,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -1397,7 +1396,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -1480,7 +1479,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ needs.determine-builds.outputs.is-test-run == 'true' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
          flavor: |
@@ -1488,6 +1487,7 @@ jobs:
          tags: |
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run == 'true' && format('model-server-{0}', needs.determine-builds.outputs.sanitized-tag) || github.ref_name }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'latest' || '' }}
+            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-latest == 'true' && 'craft-latest' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && env.EDGE_TAG == 'true' && 'edge' || '' }}
            type=raw,value=${{ needs.determine-builds.outputs.is-test-run != 'true' && needs.determine-builds.outputs.is-beta-standalone == 'true' && 'beta' || '' }}

--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -114,7 +114,7 @@ jobs:
          ref: main

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
@@ -207,7 +207,7 @@ jobs:
          CHERRY_PICK_PR_URL: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_pr_url }}
        run: |
          source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
-          details="*Cherry-pick PR opened successfully.*\\n• source PR: ${source_pr_url}"
+          details="*Cherry-pick PR opened successfully.*\\n• author: {mention}\\n• source PR: ${source_pr_url}"
          if [ -n "${CHERRY_PICK_PR_URL}" ]; then
            details="${details}\\n• cherry-pick PR: ${CHERRY_PICK_PR_URL}"
          fi
@@ -221,6 +221,7 @@ jobs:
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
+          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
          details: ${{ steps.success-summary.outputs.details }}
          title: "✅ Automated Cherry-Pick PR Opened"
          ref-name: ${{ github.event.pull_request.base.ref }}
@@ -275,20 +276,21 @@ jobs:
          else
            failed_job_label="cherry-pick-to-latest-release"
          fi
-          failed_jobs="• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
+          details="• author: {mention}\\n• ${failed_job_label}\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
          if [ -n "${MERGE_COMMIT_SHA}" ]; then
-            failed_jobs="${failed_jobs}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
+            details="${details}\\n• merge SHA: ${MERGE_COMMIT_SHA}"
          fi
          if [ -n "${details_excerpt}" ]; then
-            failed_jobs="${failed_jobs}\\n• excerpt: ${details_excerpt}"
+            details="${details}\\n• excerpt: ${details_excerpt}"
          fi

-          echo "jobs=${failed_jobs}" >> "$GITHUB_OUTPUT"
+          echo "details=${details}" >> "$GITHUB_OUTPUT"

      - name: Notify #cherry-pick-prs about cherry-pick failure
        uses: ./.github/actions/slack-notify
        with:
          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
-          details: ${{ steps.failure-summary.outputs.jobs }}
+          mention: ${{ needs.resolve-cherry-pick-request.outputs.merged_by }}
+          details: ${{ steps.failure-summary.outputs.details }}
          title: "🚨 Automated Cherry-Pick Failed"
          ref-name: ${{ github.event.pull_request.base.ref }}
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -50,7 +50,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f
        with:
          node-version: 24
          cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -105,7 +105,7 @@ jobs:

      - name: Upload build artifacts
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
          path: |
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -7,6 +7,15 @@ on:
  merge_group:
  pull_request:
    branches: [main]
+    paths:
+      - "backend/**"
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/pr-external-dependency-unit-tests.yml"
+      - ".github/actions/setup-python-and-install-dependencies/**"
+      - ".github/actions/setup-playwright/**"
+      - "deployment/docker_compose/docker-compose.yml"
+      - "deployment/docker_compose/docker-compose.dev.yml"
  push:
    tags:
      - "v*.*.*"
@@ -174,7 +183,7 @@ jobs:

      - name: Upload Docker logs
        if: failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-${{ matrix.test-dir }}
          path: docker-logs/
--- a/.github/workflows/pr-golang-tests.yml
+++ b/.github/workflows/pr-golang-tests.yml
@@ -25,7 +25,7 @@ jobs:
    outputs:
      modules: ${{ steps.set-modules.outputs.modules }}
    steps:
-      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
        with:
          persist-credentials: false
      - id: set-modules
@@ -39,7 +39,7 @@ jobs:
      matrix:
        modules: ${{ fromJSON(needs.detect-modules.outputs.modules) }}
    steps:
-      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
      - uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # zizmor: ignore[cache-poisoning]
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -466,7 +466,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-${{ matrix.edition }}-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
@@ -587,7 +587,7 @@ jobs:

      - name: Upload logs (onyx-lite)
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-onyx-lite
          path: ${{ github.workspace }}/docker-compose-onyx-lite.log
@@ -725,7 +725,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -28,7 +28,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning] test-only workflow; no deploy artifacts
@@ -44,7 +44,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -272,7 +272,7 @@ jobs:

      - name: Setup node
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -445,7 +445,7 @@ jobs:
        run: |
          npx playwright test --project ${PROJECT}

-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          # Includes test results and trace.zip files
@@ -454,7 +454,7 @@ jobs:
          retention-days: 30

      - name: Upload screenshots
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: playwright-screenshots-${{ matrix.project }}-${{ github.run_id }}
@@ -471,7 +471,7 @@ jobs:

      - name: Install the latest version of uv
        if: always()
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
@@ -534,7 +534,7 @@ jobs:
            "s3://${PLAYWRIGHT_S3_BUCKET}/reports/pr-${PR_NUMBER}/${RUN_ID}/${PROJECT}/"

      - name: Upload visual diff summary
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: screenshot-diff-summary-${{ matrix.project }}
@@ -543,7 +543,7 @@ jobs:
          retention-days: 5

      - name: Upload visual diff report artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: screenshot-diff-report-${{ matrix.project }}-${{ github.run_id }}
@@ -590,7 +590,7 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log
@@ -614,7 +614,7 @@ jobs:

      - name: Setup node
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -674,7 +674,7 @@ jobs:
        working-directory: ./web
        run: npx playwright test --project lite

-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        if: always()
        with:
          name: playwright-test-results-lite-${{ github.run_id }}
@@ -692,7 +692,7 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-logs-lite-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -7,6 +7,13 @@ on:
  merge_group:
  pull_request:
    branches: [main]
+    paths:
+      - "backend/**"
+      - "pyproject.toml"
+      - "uv.lock"
+      - ".github/workflows/pr-python-connector-tests.yml"
+      - ".github/actions/setup-python-and-install-dependencies/**"
+      - ".github/actions/setup-playwright/**"
  push:
    tags:
      - "v*.*.*"
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -73,7 +73,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f

      - name: Build and load
-        uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
+        uses: docker/bake-action@82490499d2e5613fcead7e128237ef0b0ea210f7 # ratchet:docker/bake-action@v7.0.0
        env:
          TAG: model-server-${{ github.run_id }}
        with:
@@ -122,7 +122,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -30,7 +30,7 @@ jobs:
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 # ratchet:hashicorp/setup-terraform@v4.0.0
      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v6
        with: # zizmor: ignore[cache-poisoning]
          node-version: 22
          cache: "npm"
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -22,7 +22,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
--- a/.github/workflows/release-cli.yml
+++ b/.github/workflows/release-cli.yml
@@ -26,7 +26,7 @@ jobs:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
--- a/.github/workflows/release-devtools.yml
+++ b/.github/workflows/release-devtools.yml
@@ -26,7 +26,7 @@ jobs:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+      - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
--- a/.github/workflows/reusable-nightly-llm-provider-chat.yml
+++ b/.github/workflows/reusable-nightly-llm-provider-chat.yml
@@ -319,7 +319,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
        with:
          name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
          path: |
--- a/.github/workflows/sandbox-deployment.yml
+++ b/.github/workflows/sandbox-deployment.yml
@@ -125,7 +125,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
@@ -195,7 +195,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
@@ -268,7 +268,7 @@ jobs:

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # ratchet:docker/metadata-action@v6.0.0
        with:
          images: ${{ env.REGISTRY_IMAGE }}
          flavor: |
--- a/.github/workflows/storybook-deploy.yml
+++ b/.github/workflows/storybook-deploy.yml
@@ -32,7 +32,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -24,7 +24,7 @@ jobs:
          persist-credentials: false

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098 # ratchet:astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"
--- a/.greptile/config.json
+++ b/.greptile/config.json
@@ -0,0 +1,64 @@
+{
+    "labels": [],
+    "comment": "",
+    "fixWithAI": true,
+    "hideFooter": false,
+    "strictness": 3,
+    "statusCheck": true,
+    "commentTypes": [
+      "logic",
+      "syntax",
+      "style"
+    ],
+    "instructions": "",
+    "disabledLabels": [],
+    "excludeAuthors": [
+      "dependabot[bot]",
+      "renovate[bot]"
+    ],
+    "ignoreKeywords": "",
+    "ignorePatterns": "",
+    "includeAuthors": [],
+    "summarySection": {
+      "included": true,
+      "collapsible": false,
+      "defaultOpen": false
+    },
+    "excludeBranches": [],
+    "fileChangeLimit": 300,
+    "includeBranches": [],
+    "includeKeywords": "",
+    "triggerOnUpdates": true,
+    "updateExistingSummaryComment": true,
+    "updateSummaryOnly": false,
+    "issuesTableSection": {
+      "included": true,
+      "collapsible": false,
+      "defaultOpen": false
+    },
+    "statusCommentsEnabled": true,
+    "confidenceScoreSection": {
+      "included": true,
+      "collapsible": false
+    },
+    "sequenceDiagramSection": {
+      "included": true,
+      "collapsible": false,
+      "defaultOpen": false
+    },
+    "shouldUpdateDescription": false,
+    "rules": [
+      {
+        "scope": ["web/**"],
+        "rule": "In Onyx's Next.js app, the `app/ee/admin/` directory is a filesystem convention for Enterprise Edition route overrides — it does NOT add an `/ee/` prefix to the URL. Both `app/admin/groups/page.tsx` and `app/ee/admin/groups/page.tsx` serve the same URL `/admin/groups`. Hardcoded `/admin/...` paths in router.push() calls are correct and do NOT break EE deployments. Do not flag hardcoded admin paths as bugs."
+      },
+      {
+        "scope": ["web/**"],
+        "rule": "In Onyx, each API key creates a unique user row in the database with a unique `user_id` (UUID). There is a 1:1 mapping between API keys and their backing user records. Multiple API keys do NOT share the same `user_id`. Do not flag potential duplicate row IDs when using `user_id` from API key descriptors."
+      },
+      {
+        "scope": ["backend/**/*.py"],
+        "rule": "Never raise HTTPException directly in business code. Use `raise OnyxError(OnyxErrorCode.XXX, \"message\")` from `onyx.error_handling.exceptions`. A global FastAPI exception handler converts OnyxError into structured JSON responses with {\"error_code\": \"...\", \"detail\": \"...\"}. Error codes are defined in `onyx.error_handling.error_codes.OnyxErrorCode`. For upstream errors with dynamic HTTP status codes, use `status_code_override`: `raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)`."
+      }
+    ]
+}
--- a/.greptile/files.json
+++ b/.greptile/files.json
@@ -0,0 +1,57 @@
+[
+  {
+    "scope": [],
+    "path": "contributing_guides/best_practices.md",
+    "description": "Best practices for contributing to the codebase"
+  },
+  {
+    "scope": ["web/**"],
+    "path": "web/AGENTS.md",
+    "description": "Frontend coding standards for the web directory"
+  },
+  {
+    "scope": ["web/**"],
+    "path": "web/tests/README.md",
+    "description": "Frontend testing guide and conventions"
+  },
+  {
+    "scope": ["web/**"],
+    "path": "web/CLAUDE.md",
+    "description": "Single source of truth for frontend coding standards"
+  },
+  {
+    "scope": ["web/**"],
+    "path": "web/lib/opal/README.md",
+    "description": "Opal component library usage guide"
+  },
+  {
+    "scope": ["backend/**"],
+    "path": "backend/tests/README.md",
+    "description": "Backend testing guide covering all 4 test types, fixtures, and conventions"
+  },
+  {
+    "scope": ["backend/onyx/connectors/**"],
+    "path": "backend/onyx/connectors/README.md",
+    "description": "Connector development guide covering design, interfaces, and required changes"
+  },
+  {
+    "scope": [],
+    "path": "CLAUDE.md",
+    "description": "Project instructions and coding standards"
+  },
+  {
+    "scope": [],
+    "path": "backend/alembic/README.md",
+    "description": "Migration guidance, including multi-tenant migration behavior"
+  },
+  {
+    "scope": [],
+    "path": "deployment/helm/charts/onyx/values-lite.yaml",
+    "description": "Lite deployment Helm values and service assumptions"
+  },
+  {
+    "scope": [],
+    "path": "deployment/docker_compose/docker-compose.onyx-lite.yml",
+    "description": "Lite deployment Docker Compose overlay and disabled service behavior"
+  }
+]
--- a/.greptile/rules.md
+++ b/.greptile/rules.md
@@ -0,0 +1,29 @@
+# Greptile Review Rules
+
+## Type Annotations
+
+Use explicit type annotations for variables to enhance code clarity, especially when moving type hints around in the code.
+
+## Best Practices
+
+Use `contributing_guides/best_practices.md` as core review context. Prefer consistency with existing patterns, fix issues in code you touch, avoid tacking new features onto muddy interfaces, fail loudly instead of silently swallowing errors, keep code strictly typed, preserve clear state boundaries, remove duplicate or dead logic, break up overly long functions, avoid hidden import-time side effects, respect module boundaries, and favor correctness-by-construction over relying on callers to use an API correctly.
+
+## TODOs
+
+Whenever a TODO is added, there must always be an associated name or ticket with that TODO in the style of `TODO(name): ...` or `TODO(1234): ...`
+
+## Debugging Code
+
+Remove temporary debugging code before merging to production, especially tenant-specific debugging logs.
+
+## Hardcoded Booleans
+
+When hardcoding a boolean variable to a constant value, remove the variable entirely and clean up all places where it's used rather than just setting it to a constant.
+
+## Multi-tenant vs Single-tenant
+
+Code changes must consider both multi-tenant and single-tenant deployments. In multi-tenant mode, preserve tenant isolation, ensure tenant context is propagated correctly, and avoid assumptions that only hold for a single shared schema or globally shared state. In single-tenant mode, avoid introducing unnecessary tenant-specific requirements or cloud-only control-plane dependencies.
+
+## Full vs Lite Deployments
+
+Code changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments.
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -117,7 +117,8 @@
      "presentation": {
        "group": "2"
      },
-      "consoleTitle": "API Server Console"
+      "consoleTitle": "API Server Console",
+      "justMyCode": false
    },
    {
      "name": "Slack Bot",
@@ -268,7 +269,8 @@
      "presentation": {
        "group": "2"
      },
-      "consoleTitle": "Celery heavy Console"
+      "consoleTitle": "Celery heavy Console",
+      "justMyCode": false
    },
    {
      "name": "Celery kg_processing",
@@ -355,7 +357,8 @@
      "presentation": {
        "group": "2"
      },
-      "consoleTitle": "Celery user_file_processing Console"
+      "consoleTitle": "Celery user_file_processing Console",
+      "justMyCode": false
    },
    {
      "name": "Celery docfetching",
@@ -413,7 +416,8 @@
      "presentation": {
        "group": "2"
      },
-      "consoleTitle": "Celery docprocessing Console"
+      "consoleTitle": "Celery docprocessing Console",
+      "justMyCode": false
    },
    {
      "name": "Celery beat",
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -167,284 +167,7 @@ web/

 ## Frontend Standards

-### 1. Import Standards
-
-**Always use absolute imports with the `@` prefix.**
-
-**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
-
-```typescript
-// ✅ Good
-import { Button } from "@/components/ui/button";
-import { useAuth } from "@/hooks/useAuth";
-import { Text } from "@/refresh-components/texts/Text";
-
-// ❌ Bad
-import { Button } from "../../../components/ui/button";
-import { useAuth } from "./hooks/useAuth";
-```
-
-### 2. React Component Functions
-
-**Prefer regular functions over arrow functions for React components.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-function UserProfile({ userId }: UserProfileProps) {
-  return <div>User Profile</div>
-}
-
-// ❌ Bad
-const UserProfile = ({ userId }: UserProfileProps) => {
-  return <div>User Profile</div>
-}
-```
-
-### 3. Props Interface Extraction
-
-**Extract prop types into their own interface definitions.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-interface UserCardProps {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}
-
-function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
-  return <div>User Card</div>
-}
-
-// ❌ Bad
-function UserCard({
-  user,
-  showActions = false,
-  onEdit
-}: {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}) {
-  return <div>User Card</div>
-}
-```
-
-### 4. Spacing Guidelines
-
-**Prefer padding over margins for spacing.**
-
-**Reason:** We want to consolidate usage to paddings instead of margins.
-
-```typescript
-// ✅ Good
-<div className="p-4 space-y-2">
-  <div className="p-2">Content</div>
-</div>
-
-// ❌ Bad
-<div className="m-4 space-y-2">
-  <div className="m-2">Content</div>
-</div>
-```
-
-### 5. Tailwind Dark Mode
-
-**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
-
-**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
-
-**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
-
-```typescript
-// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
-<div className="bg-background-neutral-03 text-text-02">
-  Content
-</div>
-
-// ✅ Good - Logo icons with dark mode handling via createLogoIcon
-export const GithubIcon = createLogoIcon(githubLightIcon, {
-  monochromatic: true,  // Will apply dark:invert internally
-});
-
-export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
-  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
-});
-
-// ❌ Bad - Manual dark mode overrides
-<div className="bg-white dark:bg-black text-black dark:text-white">
-  Content
-</div>
-```
-
-### 6. Class Name Utilities
-
-**Use the `cn` utility instead of raw string formatting for classNames.**
-
-**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
-
-```typescript
-import { cn } from '@/lib/utils'
-
-// ✅ Good
-<div className={cn(
-  'base-class',
-  isActive && 'active-class',
-  className
-)}>
-  Content
-</div>
-
-// ❌ Bad
-<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
-  Content
-</div>
-```
-
-### 7. Custom Hooks Organization
-
-**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
-
-**Reason:** This is just a layout preference. Keeps code clean.
-
-```typescript
-// web/src/hooks/useUserData.ts
-export function useUserData(userId: string) {
-  // hook implementation
-}
-
-// web/src/hooks/useLocalStorage.ts
-export function useLocalStorage<T>(key: string, initialValue: T) {
-  // hook implementation
-}
-```
-
-### 8. Icon Usage
-
-**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
-
-**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
-
-```typescript
-// ✅ Good
-import SvgX from "@/icons/x";
-import SvgMoreHorizontal from "@/icons/more-horizontal";
-
-// ❌ Bad
-import { User } from "lucide-react";
-import { FiSearch } from "react-icons/fi";
-```
-
-**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
-If you need help with this step, reach out to `raunak@onyx.app`.
-
-### 9. Text Rendering
-
-**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
-
-**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
-
-```typescript
-// ✅ Good
-import { Text } from '@/refresh-components/texts/Text'
-
-function UserCard({ name }: { name: string }) {
-  return (
-    <Text
-      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
-      text03
-      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
-      mainAction
-    >
-      {name}
-    </Text>
-  )
-}
-
-// ❌ Bad
-function UserCard({ name }: { name: string }) {
-  return (
-    <div>
-      <h2>{name}</h2>
-      <p>User details</p>
-    </div>
-  )
-}
-```
-
-### 10. Component Usage
-
-**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
-
-**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
-
-```typescript
-// ✅ Good
-import Button from '@/refresh-components/buttons/Button'
-import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
-import SvgPlusCircle from '@/icons/plus-circle'
-
-function ContactForm() {
-  return (
-    <form>
-      <InputTypeIn placeholder="Search..." />
-      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
-    </form>
-  )
-}
-
-// ❌ Bad
-function ContactForm() {
-  return (
-    <form>
-      <input placeholder="Name" />
-      <textarea placeholder="Message" />
-      <button type="submit">Submit</button>
-    </form>
-  )
-}
-```
-
-### 11. Colors
-
-**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
-
-**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
-
-**Available color categories:**
-
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
-
-```typescript
-// ✅ Good - Use custom Onyx color classes
-<div className="bg-background-neutral-01 border border-border-02" />
-<div className="bg-background-tint-02 border border-border-01" />
-<div className="bg-status-success-01" />
-<div className="bg-action-link-01" />
-<div className="bg-theme-primary-05" />
-
-// ❌ Bad - Do NOT use standard Tailwind colors
-<div className="bg-gray-100 border border-gray-300 text-gray-600" />
-<div className="bg-white border border-slate-200" />
-<div className="bg-green-100 text-green-700" />
-<div className="bg-blue-100 text-blue-600" />
-<div className="bg-indigo-500" />
-```
-
-### 12. Data Fetching
-
-**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
-
-**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+Frontend standards for the `web/` and `desktop/` projects live in `web/AGENTS.md`.

 ## Database & Migrations

--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -47,6 +47,8 @@ RUN apt-get update && \
        gcc \
        nano \
        vim \
+        # Install procps so kubernetes exec sessions can use ps aux for debugging
+        procps \
        libjemalloc2 \
        && \
    rm -rf /var/lib/apt/lists/* && \
--- a/backend/alembic/versions/1d78c0ca7853_remove_voice_provider_deleted_column.py
+++ b/backend/alembic/versions/1d78c0ca7853_remove_voice_provider_deleted_column.py
@@ -0,0 +1,35 @@
+"""remove voice_provider deleted column
+
+Revision ID: 1d78c0ca7853
+Revises: a3f8b2c1d4e5
+Create Date: 2026-03-26 11:30:53.883127
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "1d78c0ca7853"
+down_revision = "a3f8b2c1d4e5"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Hard-delete any soft-deleted rows before dropping the column
+    op.execute("DELETE FROM voice_provider WHERE deleted = true")
+    op.drop_column("voice_provider", "deleted")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "voice_provider",
+        sa.Column(
+            "deleted",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+    )
--- a/backend/alembic/versions/25a5501dc766_group_permissions_phase1.py
+++ b/backend/alembic/versions/25a5501dc766_group_permissions_phase1.py
@@ -0,0 +1,109 @@
+"""group_permissions_phase1
+
+Revision ID: 25a5501dc766
+Revises: b728689f45b1
+Create Date: 2026-03-23 11:41:25.557442
+
+"""
+
+from alembic import op
+import fastapi_users_db_sqlalchemy
+import sqlalchemy as sa
+
+from onyx.db.enums import AccountType
+from onyx.db.enums import GrantSource
+from onyx.db.enums import Permission
+
+
+# revision identifiers, used by Alembic.
+revision = "25a5501dc766"
+down_revision = "b728689f45b1"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # 1. Add account_type column to user table (nullable for now).
+    #    TODO(subash): backfill account_type for existing rows and add NOT NULL.
+    op.add_column(
+        "user",
+        sa.Column(
+            "account_type",
+            sa.Enum(AccountType, native_enum=False),
+            nullable=True,
+        ),
+    )
+
+    # 2. Add is_default column to user_group table
+    op.add_column(
+        "user_group",
+        sa.Column(
+            "is_default",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.false(),
+        ),
+    )
+
+    # 3. Create permission_grant table
+    op.create_table(
+        "permission_grant",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("group_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "permission",
+            sa.Enum(Permission, native_enum=False),
+            nullable=False,
+        ),
+        sa.Column(
+            "grant_source",
+            sa.Enum(GrantSource, native_enum=False),
+            nullable=False,
+        ),
+        sa.Column(
+            "granted_by",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column(
+            "granted_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+        sa.Column(
+            "is_deleted",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.false(),
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["group_id"],
+            ["user_group.id"],
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["granted_by"],
+            ["user.id"],
+            ondelete="SET NULL",
+        ),
+        sa.UniqueConstraint(
+            "group_id", "permission", name="uq_permission_grant_group_permission"
+        ),
+    )
+
+    # 4. Index on user__user_group(user_id) — existing composite PK
+    #    has user_group_id as leading column; user-filtered queries need this
+    op.create_index(
+        "ix_user__user_group_user_id",
+        "user__user_group",
+        ["user_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_user__user_group_user_id", table_name="user__user_group")
+    op.drop_table("permission_grant")
+    op.drop_column("user_group", "is_default")
+    op.drop_column("user", "account_type")
--- a/backend/alembic/versions/689433b0d8de_add_hook_and_hook_execution_log_tables.py
+++ b/backend/alembic/versions/689433b0d8de_add_hook_and_hook_execution_log_tables.py
@@ -0,0 +1,103 @@
+"""add_hook_and_hook_execution_log_tables
+
+Revision ID: 689433b0d8de
+Revises: 93a2e195e25c
+Create Date: 2026-03-13 11:25:06.547474
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID as PGUUID
+
+
+# revision identifiers, used by Alembic.
+revision = "689433b0d8de"
+down_revision = "93a2e195e25c"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "hook",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column(
+            "hook_point",
+            sa.Enum("document_ingestion", "query_processing", native_enum=False),
+            nullable=False,
+        ),
+        sa.Column("endpoint_url", sa.Text(), nullable=True),
+        sa.Column("api_key", sa.LargeBinary(), nullable=True),
+        sa.Column("is_reachable", sa.Boolean(), nullable=True),
+        sa.Column(
+            "fail_strategy",
+            sa.Enum("hard", "soft", native_enum=False),
+            nullable=False,
+        ),
+        sa.Column("timeout_seconds", sa.Float(), nullable=False),
+        sa.Column(
+            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
+        ),
+        sa.Column(
+            "deleted", sa.Boolean(), nullable=False, server_default=sa.text("false")
+        ),
+        sa.Column("creator_id", PGUUID(as_uuid=True), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["creator_id"], ["user.id"], ondelete="SET NULL"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "ix_hook_one_non_deleted_per_point",
+        "hook",
+        ["hook_point"],
+        unique=True,
+        postgresql_where=sa.text("deleted = false"),
+    )
+
+    op.create_table(
+        "hook_execution_log",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("hook_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "is_success",
+            sa.Boolean(),
+            nullable=False,
+        ),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("status_code", sa.Integer(), nullable=True),
+        sa.Column("duration_ms", sa.Integer(), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(["hook_id"], ["hook.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index("ix_hook_execution_log_hook_id", "hook_execution_log", ["hook_id"])
+    op.create_index(
+        "ix_hook_execution_log_created_at", "hook_execution_log", ["created_at"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_hook_execution_log_created_at", table_name="hook_execution_log")
+    op.drop_index("ix_hook_execution_log_hook_id", table_name="hook_execution_log")
+    op.drop_table("hook_execution_log")
+
+    op.drop_index("ix_hook_one_non_deleted_per_point", table_name="hook")
+    op.drop_table("hook")
--- a/backend/alembic/versions/a3f8b2c1d4e5_add_preferred_response_id_to_chat_message.py
+++ b/backend/alembic/versions/a3f8b2c1d4e5_add_preferred_response_id_to_chat_message.py
@@ -0,0 +1,36 @@
+"""add preferred_response_id and model_display_name to chat_message
+
+Revision ID: a3f8b2c1d4e5
+Create Date: 2026-03-22
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a3f8b2c1d4e5"
+down_revision = "25a5501dc766"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "preferred_response_id",
+            sa.Integer(),
+            sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("model_display_name", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "model_display_name")
+    op.drop_column("chat_message", "preferred_response_id")
--- a/backend/alembic/versions/b728689f45b1_rename_persona_is_visible_to_is_listed_.py
+++ b/backend/alembic/versions/b728689f45b1_rename_persona_is_visible_to_is_listed_.py
@@ -0,0 +1,26 @@
+"""rename persona is_visible to is_listed and featured to is_featured
+
+Revision ID: b728689f45b1
+Revises: 689433b0d8de
+Create Date: 2026-03-23 12:36:26.607305
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "b728689f45b1"
+down_revision = "689433b0d8de"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.alter_column("persona", "is_visible", new_column_name="is_listed")
+    op.alter_column("persona", "featured", new_column_name="is_featured")
+
+
+def downgrade() -> None:
+    op.alter_column("persona", "is_listed", new_column_name="is_visible")
+    op.alter_column("persona", "is_featured", new_column_name="featured")
--- a/backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py
+++ b/backend/alembic/versions/e7f8a9b0c1d2_create_anonymous_user.py
@@ -36,6 +36,56 @@ TABLES_WITH_USER_ID = [
 ]


+def _dedupe_null_notifications(connection: sa.Connection) -> None:
+    # Multiple NULL-owned notifications can exist because the unique index treats
+    # NULL user_id values as distinct. Before migrating them to the anonymous
+    # user, collapse duplicates and remove rows that would conflict with an
+    # already-existing anonymous notification.
+    result = connection.execute(
+        sa.text(
+            """
+            WITH ranked_null_notifications AS (
+                SELECT
+                    id,
+                    ROW_NUMBER() OVER (
+                        PARTITION BY notif_type, COALESCE(additional_data, '{}'::jsonb)
+                        ORDER BY first_shown DESC, last_shown DESC, id DESC
+                    ) AS row_num
+                FROM notification
+                WHERE user_id IS NULL
+            )
+            DELETE FROM notification
+            WHERE id IN (
+                SELECT id
+                FROM ranked_null_notifications
+                WHERE row_num > 1
+            )
+            """
+        )
+    )
+    if result.rowcount > 0:
+        print(f"Deleted {result.rowcount} duplicate NULL-owned notifications")
+
+    result = connection.execute(
+        sa.text(
+            """
+            DELETE FROM notification AS null_owned
+            USING notification AS anonymous_owned
+            WHERE null_owned.user_id IS NULL
+              AND anonymous_owned.user_id = :user_id
+              AND null_owned.notif_type = anonymous_owned.notif_type
+              AND COALESCE(null_owned.additional_data, '{}'::jsonb) =
+                  COALESCE(anonymous_owned.additional_data, '{}'::jsonb)
+            """
+        ),
+        {"user_id": ANONYMOUS_USER_UUID},
+    )
+    if result.rowcount > 0:
+        print(
+            f"Deleted {result.rowcount} NULL-owned notifications that conflict with existing anonymous-owned notifications"
+        )
+
+
 def upgrade() -> None:
    """
    Create the anonymous user for anonymous access feature.
@@ -65,7 +115,12 @@ def upgrade() -> None:

    # Migrate any remaining user_id=NULL records to anonymous user
    for table in TABLES_WITH_USER_ID:
-        try:
+        # Dedup notifications outside the savepoint so deletions persist
+        # even if the subsequent UPDATE rolls back
+        if table == "notification":
+            _dedupe_null_notifications(connection)
+
+        with connection.begin_nested():
            # Exclude public credential (id=0) which must remain user_id=NULL
            # Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
            # Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL
@@ -80,6 +135,7 @@ def upgrade() -> None:
                condition = "user_id IS NULL AND is_public = false"
            else:
                condition = "user_id IS NULL"
+
            result = connection.execute(
                sa.text(
                    f"""
@@ -92,19 +148,19 @@ def upgrade() -> None:
            )
            if result.rowcount > 0:
                print(f"Updated {result.rowcount} rows in {table} to anonymous user")
-        except Exception as e:
-            print(f"Skipping {table}: {e}")


 def downgrade() -> None:
    """
    Set anonymous user's records back to NULL and delete the anonymous user.
+
+    Note: Duplicate NULL-owned notifications removed during upgrade are not restored.
    """
    connection = op.get_bind()

    # Set records back to NULL
    for table in TABLES_WITH_USER_ID:
-        try:
+        with connection.begin_nested():
            connection.execute(
                sa.text(
                    f"""
@@ -115,8 +171,6 @@ def downgrade() -> None:
                ),
                {"user_id": ANONYMOUS_USER_UUID},
            )
-        except Exception:
-            pass

    # Delete the anonymous user
    connection.execute(
--- a/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -25,13 +25,13 @@ from onyx.redis.redis_pool import get_redis_client
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import TENANT_ID_PREFIX

-# Default number of pre-provisioned tenants to maintain
-DEFAULT_TARGET_AVAILABLE_TENANTS = 5
+# Maximum tenants to provision in a single task run.
+# Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
+_MAX_TENANTS_PER_RUN = 5

-# Soft time limit for tenant pre-provisioning tasks (in seconds)
-_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
-# Hard time limit for tenant pre-provisioning tasks (in seconds)
-_TENANT_PROVISIONING_TIME_LIMIT = 60 * 10  # 10 minutes
+# Time limits sized for worst-case batch: _MAX_TENANTS_PER_RUN × ~90s + buffer.
+_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 10  # 10 minutes
+_TENANT_PROVISIONING_TIME_LIMIT = 60 * 15  # 15 minutes


@shared_task(
@@ -58,7 +58,7 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    lock_check: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_AVAILABLE_TENANTS_LOCK,
-        timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
+        timeout=_TENANT_PROVISIONING_TIME_LIMIT,
    )

    # These tasks should never overlap
@@ -74,9 +74,7 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
            num_available_tenants = db_session.query(AvailableTenant).count()

        # Get the target number of available tenants
-        num_minimum_available_tenants = getattr(
-            TARGET_AVAILABLE_TENANTS, "value", DEFAULT_TARGET_AVAILABLE_TENANTS
-        )
+        num_minimum_available_tenants = TARGET_AVAILABLE_TENANTS

        # Calculate how many new tenants we need to provision
        if num_available_tenants < num_minimum_available_tenants:
@@ -90,22 +88,46 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
            f"To provision: {tenants_to_provision}"
        )

-        # just provision one tenant each time we run this ... increase if needed.
-        if tenants_to_provision > 0:
-            pre_provision_tenant()
+        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
+        if batch_size < tenants_to_provision:
+            task_logger.info(
+                f"Capping batch to {batch_size} "
+                f"(need {tenants_to_provision}, will catch up next cycle)"
+            )
+
+        provisioned = 0
+        for i in range(batch_size):
+            task_logger.info(f"Provisioning tenant {i + 1}/{batch_size}")
+            try:
+                if pre_provision_tenant():
+                    provisioned += 1
+            except Exception:
+                task_logger.exception(
+                    f"Failed to provision tenant {i + 1}/{batch_size}, "
+                    "continuing with remaining tenants"
+                )
+
+        task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")

    except Exception:
        task_logger.exception("Error in check_available_tenants task")

    finally:
-        lock_check.release()
+        try:
+            lock_check.release()
+        except Exception:
+            task_logger.warning(
+                "Could not release check lock (likely expired), continuing"
+            )


-def pre_provision_tenant() -> None:
+def pre_provision_tenant() -> bool:
    """
    Pre-provision a new tenant and store it in the NewAvailableTenant table.
    This function fully sets up the tenant with all necessary configurations,
    so it's ready to be assigned to a user immediately.
+
+    Returns True if a tenant was successfully provisioned, False otherwise.
    """
    # The MULTI_TENANT check is now done at the caller level (check_available_tenants)
    # rather than inside this function
@@ -113,15 +135,15 @@ def pre_provision_tenant() -> None:
    r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    lock_provision: RedisLock = r.lock(
        OnyxRedisLocks.CLOUD_PRE_PROVISION_TENANT_LOCK,
-        timeout=_TENANT_PROVISIONING_SOFT_TIME_LIMIT,
+        timeout=_TENANT_PROVISIONING_TIME_LIMIT,
    )

    # Allow multiple pre-provisioning tasks to run, but ensure they don't overlap
    if not lock_provision.acquire(blocking=False):
-        task_logger.debug(
-            "Skipping pre_provision_tenant task because it is already running"
+        task_logger.warning(
+            "Skipping pre_provision_tenant — could not acquire provision lock"
        )
-        return
+        return False

    tenant_id: str | None = None
    try:
@@ -161,6 +183,7 @@ def pre_provision_tenant() -> None:
                db_session.add(new_tenant)
                db_session.commit()
                task_logger.info(f"Successfully pre-provisioned tenant: {tenant_id}")
+                return True
            except Exception:
                db_session.rollback()
                task_logger.error(
@@ -184,5 +207,11 @@ def pre_provision_tenant() -> None:
                asyncio.run(rollback_tenant_provisioning(tenant_id))
            except Exception:
                task_logger.exception(f"Error during rollback for tenant: {tenant_id}")
+        return False
    finally:
-        lock_provision.release()
+        try:
+            lock_provision.release()
+        except Exception:
+            task_logger.warning(
+                "Could not release provision lock (likely expired), continuing"
+            )
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -118,9 +118,7 @@ JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
 SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", "[]"))
 SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")

-# The posthog client does not accept empty API keys or hosts however it fails silently
-# when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
-POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
+POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY")
 POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
 POSTHOG_DEBUG_LOGS_ENABLED = (
    os.environ.get("POSTHOG_DEBUG_LOGS_ENABLED", "").lower() == "true"
--- a/backend/ee/onyx/db/token_limit.py
+++ b/backend/ee/onyx/db/token_limit.py
@@ -115,8 +115,14 @@ def fetch_user_group_token_rate_limits_for_user(
    ordered: bool = True,
    get_editable: bool = True,
 ) -> Sequence[TokenRateLimit]:
-    stmt = select(TokenRateLimit)
-    stmt = stmt.where(User__UserGroup.user_group_id == group_id)
+    stmt = (
+        select(TokenRateLimit)
+        .join(
+            TokenRateLimit__UserGroup,
+            TokenRateLimit.id == TokenRateLimit__UserGroup.rate_limit_id,
+        )
+        .where(TokenRateLimit__UserGroup.user_group_id == group_id)
+    )
    stmt = _add_user_filters(stmt, user, get_editable)

    if enabled_only:
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -800,6 +800,33 @@ def update_user_group(
    return db_user_group


+def rename_user_group(
+    db_session: Session,
+    user_group_id: int,
+    new_name: str,
+) -> UserGroup:
+    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
+    db_user_group = db_session.scalar(stmt)
+    if db_user_group is None:
+        raise ValueError(f"UserGroup with id '{user_group_id}' not found")
+
+    _check_user_group_is_modifiable(db_user_group)
+
+    db_user_group.name = new_name
+    db_user_group.time_last_modified_by_user = func.now()
+
+    # CC pair documents in Vespa contain the group name, so we need to
+    # trigger a sync to update them with the new name.
+    _mark_user_group__cc_pair_relationships_outdated__no_commit(
+        db_session=db_session, user_group_id=user_group_id
+    )
+    if not DISABLE_VECTOR_DB:
+        db_user_group.is_up_to_date = False
+
+    db_session.commit()
+    return db_user_group
+
+
 def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> None:
    stmt = select(UserGroup).where(UserGroup.id == user_group_id)
    db_user_group = db_session.scalar(stmt)
--- a/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
@@ -250,20 +250,24 @@ def _get_sharepoint_list_item_id(drive_item: DriveItem) -> str | None:
        raise e


-def _is_public_item(drive_item: DriveItem) -> bool:
-    is_public = False
+def _is_public_item(
+    drive_item: DriveItem,
+    treat_sharing_link_as_public: bool = False,
+) -> bool:
+    if not treat_sharing_link_as_public:
+        return False
+
    try:
        permissions = sleep_and_retry(
            drive_item.permissions.get_all(page_loaded=lambda _: None), "is_public_item"
        )
        for permission in permissions:
-            if permission.link and (
-                permission.link.scope == "anonymous"
-                or permission.link.scope == "organization"
+            if permission.link and permission.link.scope in (
+                "anonymous",
+                "organization",
            ):
-                is_public = True
-                break
-        return is_public
+                return True
+        return False
    except Exception as e:
        logger.error(f"Failed to check if item {drive_item.id} is public: {e}")
        return False
@@ -504,6 +508,7 @@ def get_external_access_from_sharepoint(
    drive_item: DriveItem | None,
    site_page: dict[str, Any] | None,
    add_prefix: bool = False,
+    treat_sharing_link_as_public: bool = False,
 ) -> ExternalAccess:
    """
    Get external access information from SharePoint.
@@ -563,8 +568,7 @@ def get_external_access_from_sharepoint(
                    )

    if drive_item and drive_name:
-        # Here we check if the item have have any public links, if so we return early
-        is_public = _is_public_item(drive_item)
+        is_public = _is_public_item(drive_item, treat_sharing_link_as_public)
        if is_public:
            logger.info(f"Item {drive_item.id} is public")
            return ExternalAccess(
--- a/backend/ee/onyx/feature_flags/posthog_provider.py
+++ b/backend/ee/onyx/feature_flags/posthog_provider.py
@@ -34,6 +34,9 @@ class PostHogFeatureFlagProvider(FeatureFlagProvider):
        Returns:
            True if the feature is enabled for the user, False otherwise.
        """
+        if not posthog:
+            return False
+
        try:
            posthog.set(
                distinct_id=user_id,
--- a/backend/ee/onyx/search/process_search_query.py
+++ b/backend/ee/onyx/search/process_search_query.py
@@ -44,19 +44,21 @@ def _run_single_search(
    user: User,
    db_session: Session,
    num_hits: int | None = None,
+    hybrid_alpha: float | None = None,
 ) -> list[InferenceChunk]:
    """Execute a single search query and return chunks."""
    chunk_search_request = ChunkSearchRequest(
        query=query,
        user_selected_filters=filters,
        limit=num_hits,
+        hybrid_alpha=hybrid_alpha,
    )

    return search_pipeline(
        chunk_search_request=chunk_search_request,
        document_index=document_index,
        user=user,
-        persona=None,  # No persona for direct search
+        persona_search_info=None,
        db_session=db_session,
    )

@@ -74,7 +76,7 @@ def stream_search_query(
    Core search function that yields streaming packets.
    Used by both streaming and non-streaming endpoints.
    """
-    # Get document index
+    # Get document index.
    search_settings = get_current_search_settings(db_session)
    # This flow is for search so we do not get all indices.
    document_index = get_default_document_index(search_settings, None, db_session)
@@ -119,6 +121,7 @@ def stream_search_query(
            user=user,
            db_session=db_session,
            num_hits=request.num_hits,
+            hybrid_alpha=request.hybrid_alpha,
        )
    else:
        # Multiple queries - run in parallel and merge with RRF
@@ -133,6 +136,7 @@ def stream_search_query(
                    user,
                    db_session,
                    request.num_hits,
+                    request.hybrid_alpha,
                ),
            )
            for query in all_executed_queries
--- a/backend/ee/onyx/server/enterprise_settings/api.py
+++ b/backend/ee/onyx/server/enterprise_settings/api.py
@@ -157,7 +157,11 @@ def fetch_logo_helper(db_session: Session) -> Response:  # noqa: ARG001
            detail="No logo file found",
        )
    else:
-        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)
+        return Response(
+            content=onyx_file.data,
+            media_type=onyx_file.mime_type,
+            headers={"Cache-Control": "no-cache"},
+        )


 def fetch_logotype_helper(db_session: Session) -> Response:  # noqa: ARG001
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -27,15 +27,17 @@ class SearchFlowClassificationResponse(BaseModel):
    is_search_flow: bool


-# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
-# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
+# NOTE: This model is used for the core flow of the Onyx application, any
+# changes to it should be reviewed and approved by an experienced team member.
+# It is very important to 1. avoid bloat and 2. that this remains backwards
+# compatible across versions.
 class SendSearchQueryRequest(BaseModel):
    search_query: str
    filters: BaseFilters | None = None
    num_docs_fed_to_llm_selection: int | None = None
    run_query_expansion: bool = False
    num_hits: int = 30
-
+    hybrid_alpha: float | None = None
    include_content: bool = False
    stream: bool = False

--- a/backend/ee/onyx/server/query_and_chat/search_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/search_backend.py
@@ -20,6 +20,7 @@ from ee.onyx.server.query_and_chat.models import SearchQueryResponse
 from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
 from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
 from onyx.auth.users import current_user
+from onyx.configs.app_configs import ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.models import User
@@ -67,8 +68,10 @@ def search_flow_classification(
    return SearchFlowClassificationResponse(is_search_flow=is_search_flow)


-# NOTE: This endpoint is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
-# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
+# NOTE: This endpoint is used for the core flow of the Onyx application, any
+# changes to it should be reviewed and approved by an experienced team member.
+# It is very important to 1. avoid bloat and 2. that this remains backwards
+# compatible across versions.
@router.post(
    "/send-search-message",
    response_model=None,
@@ -80,13 +83,19 @@ def handle_send_search_message(
    db_session: Session = Depends(get_session),
 ) -> StreamingResponse | SearchFullResponse:
    """
-    Execute a search query with optional streaming.
+    Executes a search query with optional streaming.

-    When stream=True: Returns StreamingResponse with SSE
-    When stream=False: Returns SearchFullResponse
+    If hybrid_alpha is unset and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH
+    is True, executes pure keyword search.
+
+    Returns:
+        StreamingResponse with SSE if stream=True, otherwise SearchFullResponse.
    """
    logger.debug(f"Received search query: {request.search_query}")

+    if request.hybrid_alpha is None and ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH:
+        request.hybrid_alpha = 0.0
+
    # Non-streaming path
    if not request.stream:
        try:
--- a/backend/ee/onyx/server/seeding.py
+++ b/backend/ee/onyx/server/seeding.py
@@ -178,7 +178,7 @@ def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) ->
                    system_prompt=persona.system_prompt,
                    task_prompt=persona.task_prompt,
                    datetime_aware=persona.datetime_aware,
-                    featured=persona.featured,
+                    is_featured=persona.is_featured,
                    commit=False,
                )
            db_session.commit()
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -29,7 +29,6 @@ from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
 from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
 from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
 from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
-from onyx.configs.constants import MilestoneRecordType
 from onyx.db.engine.sql_engine import get_session_with_shared_schema
 from onyx.db.engine.sql_engine import get_session_with_tenant
 from onyx.db.image_generation import create_default_image_gen_config_from_api_key
@@ -59,7 +58,6 @@ from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.setup import setup_onyx
 from onyx.utils.logger import setup_logger
-from onyx.utils.telemetry import mt_cloud_telemetry
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.configs import TENANT_ID_PREFIX
@@ -71,7 +69,9 @@ logger = setup_logger()


 async def get_or_provision_tenant(
-    email: str, referral_source: str | None = None, request: Request | None = None
+    email: str,
+    referral_source: str | None = None,
+    request: Request | None = None,
 ) -> str:
    """
    Get existing tenant ID for an email or create a new tenant if none exists.
@@ -693,12 +693,6 @@ async def assign_tenant_to_user(

    try:
        add_users_to_tenant([email], tenant_id)
-
-        mt_cloud_telemetry(
-            tenant_id=tenant_id,
-            distinct_id=email,
-            event=MilestoneRecordType.TENANT_CREATED,
-        )
    except Exception:
        logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
        raise Exception("Failed to assign tenant to user")
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -4,6 +4,7 @@ from fastapi import HTTPException
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

+from ee.onyx.db.persona import update_persona_access
 from ee.onyx.db.user_group import add_users_to_user_group
 from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
 from ee.onyx.db.user_group import fetch_user_group
@@ -11,13 +12,16 @@ from ee.onyx.db.user_group import fetch_user_groups
 from ee.onyx.db.user_group import fetch_user_groups_for_user
 from ee.onyx.db.user_group import insert_user_group
 from ee.onyx.db.user_group import prepare_user_group_for_deletion
+from ee.onyx.db.user_group import rename_user_group
 from ee.onyx.db.user_group import update_user_curator_relationship
 from ee.onyx.db.user_group import update_user_group
 from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
 from ee.onyx.server.user_group.models import MinimalUserGroupSnapshot
 from ee.onyx.server.user_group.models import SetCuratorRequest
+from ee.onyx.server.user_group.models import UpdateGroupAgentsRequest
 from ee.onyx.server.user_group.models import UserGroup
 from ee.onyx.server.user_group.models import UserGroupCreate
+from ee.onyx.server.user_group.models import UserGroupRename
 from ee.onyx.server.user_group.models import UserGroupUpdate
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
@@ -27,6 +31,9 @@ from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
 from onyx.db.models import UserRole
+from onyx.db.persona import get_persona_by_id
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -87,6 +94,32 @@ def create_user_group(
    return UserGroup.from_model(db_user_group)


+@router.patch("/admin/user-group/rename")
+def rename_user_group_endpoint(
+    rename_request: UserGroupRename,
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> UserGroup:
+    try:
+        return UserGroup.from_model(
+            rename_user_group(
+                db_session=db_session,
+                user_group_id=rename_request.id,
+                new_name=rename_request.name,
+            )
+        )
+    except IntegrityError:
+        raise OnyxError(
+            OnyxErrorCode.DUPLICATE_RESOURCE,
+            f"User group with name '{rename_request.name}' already exists.",
+        )
+    except ValueError as e:
+        msg = str(e)
+        if "not found" in msg.lower():
+            raise OnyxError(OnyxErrorCode.NOT_FOUND, msg)
+        raise OnyxError(OnyxErrorCode.CONFLICT, msg)
+
+
@router.patch("/admin/user-group/{user_group_id}")
 def patch_user_group(
    user_group_id: int,
@@ -161,3 +194,38 @@ def delete_user_group(
        user_group = fetch_user_group(db_session, user_group_id)
        if user_group:
            db_delete_user_group(db_session, user_group)
+
+
+@router.patch("/admin/user-group/{user_group_id}/agents")
+def update_group_agents(
+    user_group_id: int,
+    request: UpdateGroupAgentsRequest,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    for agent_id in request.added_agent_ids:
+        persona = get_persona_by_id(
+            persona_id=agent_id, user=user, db_session=db_session
+        )
+        current_group_ids = [g.id for g in persona.groups]
+        if user_group_id not in current_group_ids:
+            update_persona_access(
+                persona_id=agent_id,
+                creator_user_id=user.id,
+                db_session=db_session,
+                group_ids=current_group_ids + [user_group_id],
+            )
+
+    for agent_id in request.removed_agent_ids:
+        persona = get_persona_by_id(
+            persona_id=agent_id, user=user, db_session=db_session
+        )
+        current_group_ids = [g.id for g in persona.groups]
+        update_persona_access(
+            persona_id=agent_id,
+            creator_user_id=user.id,
+            db_session=db_session,
+            group_ids=[gid for gid in current_group_ids if gid != user_group_id],
+        )
+
+    db_session.commit()
--- a/backend/ee/onyx/server/user_group/models.py
+++ b/backend/ee/onyx/server/user_group/models.py
@@ -104,6 +104,16 @@ class AddUsersToUserGroupRequest(BaseModel):
    user_ids: list[UUID]


+class UserGroupRename(BaseModel):
+    id: int
+    name: str
+
+
 class SetCuratorRequest(BaseModel):
    user_id: UUID
    is_curator: bool
+
+
+class UpdateGroupAgentsRequest(BaseModel):
+    added_agent_ids: list[int]
+    removed_agent_ids: list[int]
--- a/backend/ee/onyx/utils/posthog_client.py
+++ b/backend/ee/onyx/utils/posthog_client.py
@@ -9,6 +9,7 @@ from ee.onyx.configs.app_configs import POSTHOG_API_KEY
 from ee.onyx.configs.app_configs import POSTHOG_DEBUG_LOGS_ENABLED
 from ee.onyx.configs.app_configs import POSTHOG_HOST
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

@@ -18,12 +19,19 @@ def posthog_on_error(error: Any, items: Any) -> None:
    logger.error(f"PostHog error: {error}, items: {items}")


-posthog = Posthog(
-    project_api_key=POSTHOG_API_KEY,
-    host=POSTHOG_HOST,
-    debug=POSTHOG_DEBUG_LOGS_ENABLED,
-    on_error=posthog_on_error,
-)
+posthog: Posthog | None = None
+if POSTHOG_API_KEY:
+    posthog = Posthog(
+        project_api_key=POSTHOG_API_KEY,
+        host=POSTHOG_HOST,
+        debug=POSTHOG_DEBUG_LOGS_ENABLED,
+        on_error=posthog_on_error,
+    )
+elif MULTI_TENANT:
+    logger.warning(
+        "POSTHOG_API_KEY is not set but MULTI_TENANT is enabled — "
+        "PostHog telemetry and feature flags will be disabled"
+    )

 # For cross referencing between cloud and www Onyx sites
 # NOTE: These clients are separate because they are separate posthog projects.
@@ -60,7 +68,7 @@ def capture_and_sync_with_alternate_posthog(
        logger.error(f"Error capturing marketing posthog event: {e}")

    try:
-        if cloud_user_id := props.get("onyx_cloud_user_id"):
+        if posthog and (cloud_user_id := props.get("onyx_cloud_user_id")):
            cloud_props = props.copy()
            cloud_props.pop("onyx_cloud_user_id", None)

@@ -72,15 +80,45 @@ def capture_and_sync_with_alternate_posthog(
        logger.error(f"Error identifying cloud posthog user: {e}")


+def alias_user(distinct_id: str, anonymous_id: str) -> None:
+    """Link an anonymous distinct_id to an identified user, merging person profiles.
+
+    No-ops when the IDs match (e.g. returning users whose PostHog cookie
+    already contains their identified user ID).
+    """
+    if not posthog or anonymous_id == distinct_id:
+        return
+
+    try:
+        posthog.alias(previous_id=anonymous_id, distinct_id=distinct_id)
+        posthog.flush()
+    except Exception as e:
+        logger.error(f"Error aliasing PostHog user: {e}")
+
+
+def get_anon_id_from_request(request: Any) -> str | None:
+    """Extract the anonymous distinct_id from the app PostHog cookie on a request."""
+    if not POSTHOG_API_KEY:
+        return None
+
+    cookie_name = f"ph_{POSTHOG_API_KEY}_posthog"
+    if (cookie_value := request.cookies.get(cookie_name)) and (
+        parsed := parse_posthog_cookie(cookie_value)
+    ):
+        return parsed.get("distinct_id")
+
+    return None
+
+
 def get_marketing_posthog_cookie_name() -> str | None:
    if not MARKETING_POSTHOG_API_KEY:
        return None
    return f"onyx_custom_ph_{MARKETING_POSTHOG_API_KEY}_posthog"


-def parse_marketing_cookie(cookie_value: str) -> dict[str, Any] | None:
+def parse_posthog_cookie(cookie_value: str) -> dict[str, Any] | None:
    """
-    Parse the URL-encoded JSON marketing cookie.
+    Parse a URL-encoded JSON PostHog cookie

    Expected format (URL-encoded):
    {"distinct_id":"...", "featureFlags":{"landing_page_variant":"..."}, ...}
@@ -94,7 +132,7 @@ def parse_marketing_cookie(cookie_value: str) -> dict[str, Any] | None:
        cookie_data = json.loads(decoded_cookie)

        distinct_id = cookie_data.get("distinct_id")
-        if not distinct_id:
+        if not distinct_id or not isinstance(distinct_id, str):
            return None

        return cookie_data
--- a/backend/ee/onyx/utils/telemetry.py
+++ b/backend/ee/onyx/utils/telemetry.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from ee.onyx.utils.posthog_client import posthog
 from onyx.utils.logger import setup_logger

@@ -5,12 +7,27 @@ logger = setup_logger()


 def event_telemetry(
-    distinct_id: str, event: str, properties: dict | None = None
+    distinct_id: str, event: str, properties: dict[str, Any] | None = None
 ) -> None:
    """Capture and send an event to PostHog, flushing immediately."""
+    if not posthog:
+        return
+
    logger.info(f"Capturing PostHog event: {distinct_id} {event} {properties}")
    try:
        posthog.capture(distinct_id, event, properties)
        posthog.flush()
    except Exception as e:
        logger.error(f"Error capturing PostHog event: {e}")
+
+
+def identify_user(distinct_id: str, properties: dict[str, Any] | None = None) -> None:
+    """Create/update a PostHog person profile, flushing immediately."""
+    if not posthog:
+        return
+
+    try:
+        posthog.identify(distinct_id, properties)
+        posthog.flush()
+    except Exception as e:
+        logger.error(f"Error identifying PostHog user: {e}")
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -19,6 +19,7 @@ from typing import Optional
 from typing import Protocol
 from typing import Tuple
 from typing import TypeVar
+from urllib.parse import urlparse

 import jwt
 from email_validator import EmailNotValidError
@@ -134,6 +135,9 @@ from onyx.redis.redis_pool import retrieve_ws_token_data
 from onyx.server.settings.store import load_settings
 from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
+from onyx.utils.telemetry import mt_cloud_alias
+from onyx.utils.telemetry import mt_cloud_get_anon_id
+from onyx.utils.telemetry import mt_cloud_identify
 from onyx.utils.telemetry import mt_cloud_telemetry
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
@@ -249,18 +253,12 @@ def verify_email_is_invited(email: str) -> None:
    whitelist = get_invited_users()

    if not email:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail={"reason": "Email must be specified"},
-        )
+        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email must be specified")

    try:
        email_info = validate_email(email, check_deliverability=False)
    except EmailUndeliverableError:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail={"reason": "Email is not valid"},
-        )
+        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email is not valid")

    for email_whitelist in whitelist:
        try:
@@ -277,12 +275,9 @@ def verify_email_is_invited(email: str) -> None:
        if email_info.normalized.lower() == email_info_whitelist.normalized.lower():
            return

-    raise HTTPException(
-        status_code=status.HTTP_403_FORBIDDEN,
-        detail={
-            "code": REGISTER_INVITE_ONLY_CODE,
-            "reason": "This workspace is invite-only. Please ask your admin to invite you.",
-        },
+    raise OnyxError(
+        OnyxErrorCode.UNAUTHORIZED,
+        "This workspace is invite-only. Please ask your admin to invite you.",
    )


@@ -292,48 +287,47 @@ def verify_email_in_whitelist(email: str, tenant_id: str) -> None:
            verify_email_is_invited(email)


-def verify_email_domain(email: str) -> None:
+def verify_email_domain(email: str, *, is_registration: bool = False) -> None:
    if email.count("@") != 1:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Email is not valid",
-        )
+        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email is not valid")

    local_part, domain = email.split("@")
    domain = domain.lower()
+    local_part = local_part.lower()

    if AUTH_TYPE == AuthType.CLOUD:
        # Normalize googlemail.com to gmail.com (they deliver to the same inbox)
        if domain == "googlemail.com":
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail={"reason": "Please use @gmail.com instead of @googlemail.com."},
+            raise OnyxError(
+                OnyxErrorCode.INVALID_INPUT,
+                "Please use @gmail.com instead of @googlemail.com.",
+            )
+
+        # Only block dotted Gmail on new signups — existing users must still be
+        # able to sign in with the address they originally registered with.
+        if is_registration and domain == "gmail.com" and "." in local_part:
+            raise OnyxError(
+                OnyxErrorCode.INVALID_INPUT,
+                "Gmail addresses with '.' are not allowed. Please use your base email address.",
            )

        if "+" in local_part and domain != "onyx.app":
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail={
-                    "reason": "Email addresses with '+' are not allowed. Please use your base email address."
-                },
+            raise OnyxError(
+                OnyxErrorCode.INVALID_INPUT,
+                "Email addresses with '+' are not allowed. Please use your base email address.",
            )

    # Check if email uses a disposable/temporary domain
    if is_disposable_email(email):
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail={
-                "reason": "Disposable email addresses are not allowed. Please use a permanent email address."
-            },
+        raise OnyxError(
+            OnyxErrorCode.INVALID_INPUT,
+            "Disposable email addresses are not allowed. Please use a permanent email address.",
        )

    # Check domain whitelist if configured
    if VALID_EMAIL_DOMAINS:
        if domain not in VALID_EMAIL_DOMAINS:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Email domain is not valid",
-            )
+            raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Email domain is not valid")


 def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
@@ -349,7 +343,7 @@ def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
    )(db_session, seats_needed=seats_needed)

    if result is not None and not result.available:
-        raise HTTPException(status_code=402, detail=result.error_message)
+        raise OnyxError(OnyxErrorCode.SEAT_LIMIT_EXCEEDED, result.error_message)


 class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
@@ -402,10 +396,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                    captcha_token or "", expected_action="signup"
                )
            except CaptchaVerificationError as e:
-                raise HTTPException(
-                    status_code=status.HTTP_400_BAD_REQUEST,
-                    detail={"reason": str(e)},
-                )
+                raise OnyxError(OnyxErrorCode.INVALID_INPUT, str(e))

        # We verify the password here to make sure it's valid before we proceed
        await self.validate_password(
@@ -415,13 +406,10 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        # Check for disposable emails BEFORE provisioning tenant
        # This prevents creating tenants for throwaway email addresses
        try:
-            verify_email_domain(user_create.email)
-        except HTTPException as e:
+            verify_email_domain(user_create.email, is_registration=True)
+        except OnyxError as e:
            # Log blocked disposable email attempts
-            if (
-                e.status_code == status.HTTP_400_BAD_REQUEST
-                and "Disposable email" in str(e.detail)
-            ):
+            if "Disposable email" in e.detail:
                domain = (
                    user_create.email.split("@")[-1]
                    if "@" in user_create.email
@@ -565,9 +553,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        result = await db_session.execute(
            select(Persona.id)
            .where(
-                Persona.featured.is_(True),
+                Persona.is_featured.is_(True),
                Persona.is_public.is_(True),
-                Persona.is_visible.is_(True),
+                Persona.is_listed.is_(True),
                Persona.deleted.is_(False),
            )
            .order_by(
@@ -695,6 +683,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                        raise exceptions.UserNotExists()

                except exceptions.UserNotExists:
+                    verify_email_domain(account_email, is_registration=True)
+
                    # Check seat availability before creating (single-tenant only)
                    with get_session_with_current_tenant() as sync_db:
                        enforce_seat_limit(sync_db)
@@ -792,6 +782,18 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        except Exception:
            logger.exception("Error deleting anonymous user cookie")

+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+
+        # Link the anonymous PostHog session to the identified user so that
+        # pre-login session recordings and events merge into one person profile.
+        if anon_id := mt_cloud_get_anon_id(request):
+            mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)
+
+        mt_cloud_identify(
+            distinct_id=str(user.id),
+            properties={"email": user.email, "tenant_id": tenant_id},
+        )
+
    async def on_after_register(
        self, user: User, request: Optional[Request] = None
    ) -> None:
@@ -810,12 +812,30 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            user_count = await get_user_count()
            logger.debug(f"Current tenant user count: {user_count}")

+            # Link the anonymous PostHog session to the identified user so
+            # that pre-signup session recordings merge into one person profile.
+            if anon_id := mt_cloud_get_anon_id(request):
+                mt_cloud_alias(distinct_id=str(user.id), anonymous_id=anon_id)
+
+            # Ensure a PostHog person profile exists for this user.
+            mt_cloud_identify(
+                distinct_id=str(user.id),
+                properties={"email": user.email, "tenant_id": tenant_id},
+            )
+
            mt_cloud_telemetry(
                tenant_id=tenant_id,
-                distinct_id=user.email,
+                distinct_id=str(user.id),
                event=MilestoneRecordType.USER_SIGNED_UP,
            )

+            if user_count == 1:
+                mt_cloud_telemetry(
+                    tenant_id=tenant_id,
+                    distinct_id=str(user.id),
+                    event=MilestoneRecordType.TENANT_CREATED,
+                )
+
        finally:
            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)

@@ -825,9 +845,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            attribute="get_marketing_posthog_cookie_name",
            noop_return_value=None,
        )
-        parse_marketing_cookie = fetch_ee_implementation_or_noop(
+        parse_posthog_cookie = fetch_ee_implementation_or_noop(
            module="onyx.utils.posthog_client",
-            attribute="parse_marketing_cookie",
+            attribute="parse_posthog_cookie",
            noop_return_value=None,
        )
        capture_and_sync_with_alternate_posthog = fetch_ee_implementation_or_noop(
@@ -841,7 +861,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            and user_count is not None
            and (marketing_cookie_name := get_marketing_posthog_cookie_name())
            and (marketing_cookie_value := request.cookies.get(marketing_cookie_name))
-            and (parsed_cookie := parse_marketing_cookie(marketing_cookie_value))
+            and (parsed_cookie := parse_posthog_cookie(marketing_cookie_value))
        ):
            marketing_anonymous_id = parsed_cookie["distinct_id"]

@@ -1652,6 +1672,33 @@ async def _get_user_from_token_data(token_data: dict) -> User | None:
        return user


+_LOOPBACK_HOSTNAMES = frozenset({"localhost", "127.0.0.1", "::1"})
+
+
+def _is_same_origin(actual: str, expected: str) -> bool:
+    """Compare two origins for the WebSocket CSWSH check.
+
+    Scheme and hostname must match exactly.  Port must also match, except
+    when the hostname is a loopback address (localhost / 127.0.0.1 / ::1),
+    where port is ignored.  On loopback, all ports belong to the same
+    operator, so port differences carry no security significance — the
+    CSWSH threat is remote origins, not local ones.
+    """
+    a = urlparse(actual.rstrip("/"))
+    e = urlparse(expected.rstrip("/"))
+
+    if a.scheme != e.scheme or a.hostname != e.hostname:
+        return False
+
+    if a.hostname in _LOOPBACK_HOSTNAMES:
+        return True
+
+    actual_port = a.port or (443 if a.scheme == "https" else 80)
+    expected_port = e.port or (443 if e.scheme == "https" else 80)
+
+    return actual_port == expected_port
+
+
 async def current_user_from_websocket(
    websocket: WebSocket,
    token: str = Query(..., description="WebSocket authentication token"),
@@ -1671,19 +1718,15 @@ async def current_user_from_websocket(

    This applies the same auth checks as current_user() for HTTP endpoints.
    """
-    # Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH)
-    # Browsers always send Origin on WebSocket connections
+    # Check Origin header to prevent Cross-Site WebSocket Hijacking (CSWSH).
+    # Browsers always send Origin on WebSocket connections.
    origin = websocket.headers.get("origin")
-    expected_origin = WEB_DOMAIN.rstrip("/")
    if not origin:
        logger.warning("WS auth: missing Origin header")
        raise BasicAuthenticationError(detail="Access denied. Missing origin.")

-    actual_origin = origin.rstrip("/")
-    if actual_origin != expected_origin:
-        logger.warning(
-            f"WS auth: origin mismatch. Expected {expected_origin}, got {actual_origin}"
-        )
+    if not _is_same_origin(origin, WEB_DOMAIN):
+        logger.warning(f"WS auth: origin mismatch. Expected {WEB_DOMAIN}, got {origin}")
        raise BasicAuthenticationError(detail="Access denied. Invalid origin.")

    # Validate WS token in Redis (single-use, deleted after retrieval)
--- a/backend/onyx/background/celery/apps/docfetching.py
+++ b/backend/onyx/background/celery/apps/docfetching.py
@@ -13,6 +13,14 @@ from celery.signals import worker_shutdown
 import onyx.background.celery.apps.app_base as app_base
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME
 from onyx.db.engine.sql_engine import SqlEngine
+from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
+from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
+from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
+from onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun
+from onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun
+from onyx.server.metrics.metrics_server import start_metrics_server
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT

@@ -34,6 +42,8 @@ def on_task_prerun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+    on_celery_task_prerun(task_id, task)
+    on_indexing_task_prerun(task_id, task, kwargs)


@signals.task_postrun.connect
@@ -48,6 +58,36 @@ def on_task_postrun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+    on_celery_task_postrun(task_id, task, state)
+    on_indexing_task_postrun(task_id, task, kwargs, state)
+
+
+@signals.task_retry.connect
+def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    # task_retry signal doesn't pass task_id in kwargs; get it from
+    # the sender (the task instance) via sender.request.id.
+    task_id = getattr(getattr(sender, "request", None), "id", None)
+    on_celery_task_retry(task_id, sender)
+
+
+@signals.task_revoked.connect
+def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
+    task_name = getattr(sender, "name", None) or str(sender)
+    on_celery_task_revoked(kwargs.get("task_id"), task_name)
+
+
+@signals.task_rejected.connect
+def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    # task_rejected sends the Consumer as sender, not the task instance.
+    # The task name must be extracted from the Celery message headers.
+    message = kwargs.get("message")
+    task_name: str | None = None
+    if message is not None:
+        headers = getattr(message, "headers", None) or {}
+        task_name = headers.get("task")
+    if task_name is None:
+        task_name = "unknown"
+    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
@@ -76,6 +116,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

@worker_ready.connect
 def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    start_metrics_server("docfetching")
    app_base.on_worker_ready(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/docprocessing.py
+++ b/backend/onyx/background/celery/apps/docprocessing.py
@@ -14,6 +14,14 @@ from celery.signals import worker_shutdown
 import onyx.background.celery.apps.app_base as app_base
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME
 from onyx.db.engine.sql_engine import SqlEngine
+from onyx.server.metrics.celery_task_metrics import on_celery_task_postrun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_prerun
+from onyx.server.metrics.celery_task_metrics import on_celery_task_rejected
+from onyx.server.metrics.celery_task_metrics import on_celery_task_retry
+from onyx.server.metrics.celery_task_metrics import on_celery_task_revoked
+from onyx.server.metrics.indexing_task_metrics import on_indexing_task_postrun
+from onyx.server.metrics.indexing_task_metrics import on_indexing_task_prerun
+from onyx.server.metrics.metrics_server import start_metrics_server
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT

@@ -35,6 +43,8 @@ def on_task_prerun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+    on_celery_task_prerun(task_id, task)
+    on_indexing_task_prerun(task_id, task, kwargs)


@signals.task_postrun.connect
@@ -49,6 +59,36 @@ def on_task_postrun(
    **kwds: Any,
 ) -> None:
    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+    on_celery_task_postrun(task_id, task, state)
+    on_indexing_task_postrun(task_id, task, kwargs, state)
+
+
+@signals.task_retry.connect
+def on_task_retry(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    # task_retry signal doesn't pass task_id in kwargs; get it from
+    # the sender (the task instance) via sender.request.id.
+    task_id = getattr(getattr(sender, "request", None), "id", None)
+    on_celery_task_retry(task_id, sender)
+
+
+@signals.task_revoked.connect
+def on_task_revoked(sender: Any | None = None, **kwargs: Any) -> None:
+    task_name = getattr(sender, "name", None) or str(sender)
+    on_celery_task_revoked(kwargs.get("task_id"), task_name)
+
+
+@signals.task_rejected.connect
+def on_task_rejected(sender: Any | None = None, **kwargs: Any) -> None:  # noqa: ARG001
+    # task_rejected sends the Consumer as sender, not the task instance.
+    # The task name must be extracted from the Celery message headers.
+    message = kwargs.get("message")
+    task_name: str | None = None
+    if message is not None:
+        headers = getattr(message, "headers", None) or {}
+        task_name = headers.get("task")
+    if task_name is None:
+        task_name = "unknown"
+    on_celery_task_rejected(None, task_name)


@celeryd_init.connect
@@ -82,6 +122,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

@worker_ready.connect
 def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    start_metrics_server("docprocessing")
    app_base.on_worker_ready(sender, **kwargs)


@@ -90,6 +131,12 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


+# Note: worker_process_init only fires in prefork pool mode. Docprocessing uses
+# worker_pool="threads" (see configs/docprocessing.py), so this handler is
+# effectively a no-op in normal operation. It remains as a safety net in case
+# the pool type is ever changed to prefork. Prometheus metrics are safe in
+# thread-pool mode since all threads share the same process memory and can
+# update the same Counter/Gauge/Histogram objects directly.
@worker_process_init.connect
 def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
    SqlEngine.reset_engine()
--- a/backend/onyx/background/celery/apps/monitoring.py
+++ b/backend/onyx/background/celery/apps/monitoring.py
@@ -54,8 +54,14 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None
    app_base.on_celeryd_init(sender, conf, **kwargs)


+# Set by on_worker_init so on_worker_ready knows whether to start the server.
+_prometheus_collectors_ok: bool = False
+
+
@worker_init.connect
 def on_worker_init(sender: Any, **kwargs: Any) -> None:
+    global _prometheus_collectors_ok
+
    logger.info("worker_init signal received.")
    logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")

@@ -65,6 +71,8 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)

+    _prometheus_collectors_ok = _setup_prometheus_collectors(sender)
+
    # Less startup checks in multi-tenant case
    if MULTI_TENANT:
        return
@@ -72,8 +80,37 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    app_base.on_secondary_worker_init(sender, **kwargs)


+def _setup_prometheus_collectors(sender: Any) -> bool:
+    """Register Prometheus collectors that need Redis/DB access.
+
+    Passes the Celery app so the queue depth collector can obtain a fresh
+    broker Redis client on each scrape (rather than holding a stale reference).
+
+    Returns True if registration succeeded, False otherwise.
+    """
+    try:
+        from onyx.server.metrics.indexing_pipeline_setup import (
+            setup_indexing_pipeline_metrics,
+        )
+
+        setup_indexing_pipeline_metrics(sender.app)
+        logger.info("Prometheus indexing pipeline collectors registered")
+        return True
+    except Exception:
+        logger.exception("Failed to register Prometheus indexing pipeline collectors")
+        return False
+
+
@worker_ready.connect
 def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    if _prometheus_collectors_ok:
+        from onyx.server.metrics.metrics_server import start_metrics_server
+
+        start_metrics_server("monitoring")
+    else:
+        logger.warning(
+            "Skipping Prometheus metrics server — collector registration failed"
+        )
    app_base.on_worker_ready(sender, **kwargs)


--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -317,6 +317,7 @@ celery_app.autodiscover_tasks(
            "onyx.background.celery.tasks.docprocessing",
            "onyx.background.celery.tasks.evals",
            "onyx.background.celery.tasks.hierarchyfetching",
+            "onyx.background.celery.tasks.hooks",
            "onyx.background.celery.tasks.periodic",
            "onyx.background.celery.tasks.pruning",
            "onyx.background.celery.tasks.shared",
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -14,6 +14,7 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
+from onyx.hooks.utils import HOOKS_AVAILABLE
 from shared_configs.configs import MULTI_TENANT

 # choosing 15 minutes because it roughly gives us enough time to process many tasks
@@ -361,6 +362,19 @@ if not MULTI_TENANT:

    tasks_to_schedule.extend(beat_task_templates)

+if HOOKS_AVAILABLE:
+    tasks_to_schedule.append(
+        {
+            "name": "hook-execution-log-cleanup",
+            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
+            "schedule": timedelta(days=1),
+            "options": {
+                "priority": OnyxCeleryPriority.LOW,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
+        }
+    )
+

 def generate_cloud_tasks(
    beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float
--- a/backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py
@@ -29,6 +29,8 @@ from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
+from onyx.connectors.factory import ConnectorMissingException
+from onyx.connectors.factory import identify_connector_class
 from onyx.connectors.factory import instantiate_connector
 from onyx.connectors.interfaces import HierarchyConnector
 from onyx.connectors.models import HierarchyNode as PydanticHierarchyNode
@@ -55,6 +57,26 @@ logger = setup_logger()
 HIERARCHY_FETCH_INTERVAL_SECONDS = 24 * 60 * 60


+def _connector_supports_hierarchy_fetching(
+    cc_pair: ConnectorCredentialPair,
+) -> bool:
+    """Return True only for connectors whose class implements HierarchyConnector."""
+    try:
+        connector_class = identify_connector_class(
+            cc_pair.connector.source,
+        )
+    except ConnectorMissingException as e:
+        task_logger.warning(
+            "Skipping hierarchy fetching enqueue for source=%s input_type=%s: %s",
+            cc_pair.connector.source,
+            cc_pair.connector.input_type,
+            str(e),
+        )
+        return False
+
+    return issubclass(connector_class, HierarchyConnector)
+
+
 def _is_hierarchy_fetching_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if hierarchy fetching is due for this connector.

@@ -186,7 +208,10 @@ def check_for_hierarchy_fetching(self: Task, *, tenant_id: str) -> int | None:
                    cc_pair_id=cc_pair_id,
                )

-                if not cc_pair or not _is_hierarchy_fetching_due(cc_pair):
+                if not cc_pair or not _connector_supports_hierarchy_fetching(cc_pair):
+                    continue
+
+                if not _is_hierarchy_fetching_due(cc_pair):
                    continue

                task_id = _try_creating_hierarchy_fetching_task(
--- a/backend/onyx/background/celery/tasks/hooks/init.py
+++ b/backend/onyx/background/celery/tasks/hooks/init.py
--- a/backend/onyx/background/celery/tasks/hooks/tasks.py
+++ b/backend/onyx/background/celery/tasks/hooks/tasks.py
@@ -0,0 +1,35 @@
+from celery import shared_task
+
+from onyx.configs.app_configs import JOB_TIMEOUT
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.hook import cleanup_old_execution_logs__no_commit
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_HOOK_EXECUTION_LOG_RETENTION_DAYS: int = 30
+
+
+@shared_task(
+    name=OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
+    ignore_result=True,
+    soft_time_limit=JOB_TIMEOUT,
+    trail=False,
+)
+def hook_execution_log_cleanup_task(*, tenant_id: str) -> None:  # noqa: ARG001
+    try:
+        with get_session_with_current_tenant() as db_session:
+            deleted: int = cleanup_old_execution_logs__no_commit(
+                db_session=db_session,
+                max_age_days=_HOOK_EXECUTION_LOG_RETENTION_DAYS,
+            )
+            db_session.commit()
+            if deleted:
+                logger.info(
+                    f"Deleted {deleted} hook execution log(s) older than "
+                    f"{_HOOK_EXECUTION_LOG_RETENTION_DAYS} days."
+                )
+    except Exception:
+        logger.exception("Failed to clean up hook execution logs")
+        raise
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -24,6 +24,7 @@ from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import CELERY_USER_FILE_DELETE_TASK_EXPIRES
 from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
 from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
@@ -33,6 +34,7 @@ from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
+from onyx.configs.constants import USER_FILE_DELETE_MAX_QUEUE_DEPTH
 from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
 from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
 from onyx.connectors.file.connector import LocalFileConnector
@@ -91,6 +93,17 @@ def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"


+def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:
+    """Key that exists while a delete_single_user_file task is sitting in the queue.
+
+    The beat generator sets this with a TTL equal to CELERY_USER_FILE_DELETE_TASK_EXPIRES
+    before enqueuing and the worker deletes it as its first action.  This prevents
+    the beat from adding duplicate tasks for files that already have a live task
+    in flight.
+    """
+    return f"{OnyxRedisLocks.USER_FILE_DELETE_QUEUED_PREFIX}:{user_file_id}"
+
+
 def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
    redis_celery: Redis = celery_app.broker_connection().channel().client  # type: ignore
    return celery_get_queue_length(
@@ -546,7 +559,23 @@ def process_single_user_file(
    ignore_result=True,
 )
 def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
-    """Scan for user files with DELETING status and enqueue per-file tasks."""
+    """Scan for user files with DELETING status and enqueue per-file tasks.
+
+    Three mechanisms prevent queue runaway (mirrors check_user_file_processing):
+
+    1. **Queue depth backpressure** – if the broker queue already has more than
+       USER_FILE_DELETE_MAX_QUEUE_DEPTH items we skip this beat cycle entirely.
+
+    2. **Per-file queued guard** – before enqueuing a task we set a short-lived
+       Redis key (TTL = CELERY_USER_FILE_DELETE_TASK_EXPIRES).  If that key
+       already exists the file already has a live task in the queue, so we skip
+       it.  The worker deletes the key the moment it picks up the task so the
+       next beat cycle can re-enqueue if the file is still DELETING.
+
+    3. **Task expiry** – every enqueued task carries an `expires` value equal to
+       CELERY_USER_FILE_DELETE_TASK_EXPIRES.  If a task is still sitting in
+       the queue after that deadline, Celery discards it without touching the DB.
+    """
    task_logger.info("check_for_user_file_delete - Starting")
    redis_client = get_redis_client(tenant_id=tenant_id)
    lock: RedisLock = redis_client.lock(
@@ -555,8 +584,23 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
    )
    if not lock.acquire(blocking=False):
        return None
+
    enqueued = 0
+    skipped_guard = 0
    try:
+        # --- Protection 1: queue depth backpressure ---
+        # NOTE: must use the broker's Redis client (not redis_client) because
+        # Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
+        r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
+        queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
+        if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
+            task_logger.warning(
+                f"check_for_user_file_delete - Queue depth {queue_len} exceeds "
+                f"{USER_FILE_DELETE_MAX_QUEUE_DEPTH}, skipping enqueue for "
+                f"tenant={tenant_id}"
+            )
+            return None
+
        with get_session_with_current_tenant() as db_session:
            user_file_ids = (
                db_session.execute(
@@ -568,23 +612,40 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
                .all()
            )
            for user_file_id in user_file_ids:
-                self.app.send_task(
-                    OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
-                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
-                    queue=OnyxCeleryQueues.USER_FILE_DELETE,
-                    priority=OnyxCeleryPriority.HIGH,
+                # --- Protection 2: per-file queued guard ---
+                queued_key = _user_file_delete_queued_key(user_file_id)
+                guard_set = redis_client.set(
+                    queued_key,
+                    1,
+                    ex=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
+                    nx=True,
                )
+                if not guard_set:
+                    skipped_guard += 1
+                    continue
+
+                # --- Protection 3: task expiry ---
+                try:
+                    self.app.send_task(
+                        OnyxCeleryTask.DELETE_SINGLE_USER_FILE,
+                        kwargs={
+                            "user_file_id": str(user_file_id),
+                            "tenant_id": tenant_id,
+                        },
+                        queue=OnyxCeleryQueues.USER_FILE_DELETE,
+                        priority=OnyxCeleryPriority.HIGH,
+                        expires=CELERY_USER_FILE_DELETE_TASK_EXPIRES,
+                    )
+                except Exception:
+                    redis_client.delete(queued_key)
+                    raise
                enqueued += 1
-    except Exception as e:
-        task_logger.exception(
-            f"check_for_user_file_delete - Error enqueuing deletes - {e.__class__.__name__}"
-        )
-        return None
    finally:
        if lock.owned():
            lock.release()
+
    task_logger.info(
-        f"check_for_user_file_delete - Enqueued {enqueued} tasks for tenant={tenant_id}"
+        f"check_for_user_file_delete - Enqueued {enqueued} tasks, skipped_guard={skipped_guard} for tenant={tenant_id}"
    )
    return None

@@ -602,6 +663,9 @@ def delete_user_file_impl(
    file_lock: RedisLock | None = None
    if redis_locking:
        redis_client = get_redis_client(tenant_id=tenant_id)
+        # Clear the queued guard so the beat can re-enqueue if deletion fails
+        # and the file remains in DELETING status.
+        redis_client.delete(_user_file_delete_queued_key(user_file_id))
        file_lock = redis_client.lock(
            _user_file_delete_lock_key(user_file_id),
            timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
--- a/backend/onyx/cache/postgres_backend.py
+++ b/backend/onyx/cache/postgres_backend.py
@@ -297,7 +297,9 @@ class PostgresCacheBackend(CacheBackend):

    def _lock_id_for(self, name: str) -> int:
        """Map *name* to a 64-bit signed int for ``pg_advisory_lock``."""
-        h = hashlib.md5(f"{self._tenant_id}:{name}".encode()).digest()
+        h = hashlib.md5(
+            f"{self._tenant_id}:{name}".encode(), usedforsecurity=False
+        ).digest()
        return struct.unpack("q", h[:8])[0]


--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -30,6 +30,8 @@ from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_store.file_store import get_default_file_store
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import FileDescriptor
+from onyx.file_store.utils import plaintext_file_name_for_id
+from onyx.file_store.utils import store_plaintext
 from onyx.kg.models import KGException
 from onyx.kg.setup.kg_default_entity_definitions import (
    populate_missing_default_entity_types__commit,
@@ -289,6 +291,33 @@ def process_kg_commands(
        raise KGException("KG setup done")


+def _get_or_extract_plaintext(
+    file_id: str,
+    extract_fn: Callable[[], str],
+) -> str:
+    """Load cached plaintext for a file, or extract and store it.
+
+    Tries to read pre-stored plaintext from the file store.  On a miss,
+    calls extract_fn to produce the text, then stores the result so
+    future calls skip the expensive extraction.
+    """
+    file_store = get_default_file_store()
+    plaintext_key = plaintext_file_name_for_id(file_id)
+
+    # Try cached plaintext first.
+    try:
+        plaintext_io = file_store.read_file(plaintext_key, mode="b")
+        return plaintext_io.read().decode("utf-8")
+    except Exception:
+        logger.exception(f"Error when reading file, id={file_id}")
+
+    # Cache miss — extract and store.
+    content_text = extract_fn()
+    if content_text:
+        store_plaintext(file_id, content_text)
+    return content_text
+
+
@log_function_time(print_only=True)
 def load_chat_file(
    file_descriptor: FileDescriptor, db_session: Session
@@ -303,12 +332,23 @@ def load_chat_file(
    file_type = ChatFileType(file_descriptor["type"])

    if file_type.is_text_file():
-        try:
-            content_text = extract_file_text(
+        file_id = file_descriptor["id"]
+
+        def _extract() -> str:
+            return extract_file_text(
                file=file_io,
                file_name=file_descriptor.get("name") or "",
                break_on_unprocessable=False,
            )
+
+        # Use the user_file_id as cache key when available (matches what
+        # the celery indexing worker stores), otherwise fall back to the
+        # file store id (covers code-interpreter-generated files, etc.).
+        user_file_id_str = file_descriptor.get("user_file_id")
+        cache_key = user_file_id_str or file_id
+
+        try:
+            content_text = _get_or_extract_plaintext(cache_key, _extract)
        except Exception as e:
            logger.warning(
                f"Failed to retrieve content for file {file_descriptor['id']}: {str(e)}"
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -36,9 +36,11 @@ from onyx.db.memory import add_memory
 from onyx.db.memory import update_memory_at_index
 from onyx.db.memory import UserMemoryContext
 from onyx.db.models import Persona
+from onyx.llm.constants import LlmProviderNames
 from onyx.llm.interfaces import LLM
 from onyx.llm.interfaces import LLMUserIdentity
 from onyx.llm.interfaces import ToolChoiceOptions
+from onyx.llm.utils import is_true_openai_model
 from onyx.prompts.chat_prompts import IMAGE_GEN_REMINDER
 from onyx.prompts.chat_prompts import OPEN_URL_REMINDER
 from onyx.server.query_and_chat.placement import Placement
@@ -72,6 +74,70 @@ from shared_configs.contextvars import get_current_tenant_id
 logger = setup_logger()


+class EmptyLLMResponseError(RuntimeError):
+    """Raised when the streamed LLM response completes without a usable answer."""
+
+    def __init__(
+        self,
+        *,
+        provider: str,
+        model: str,
+        tool_choice: ToolChoiceOptions,
+        client_error_msg: str,
+        error_code: str = "EMPTY_LLM_RESPONSE",
+        is_retryable: bool = True,
+    ) -> None:
+        super().__init__(client_error_msg)
+        self.provider = provider
+        self.model = model
+        self.tool_choice = tool_choice
+        self.client_error_msg = client_error_msg
+        self.error_code = error_code
+        self.is_retryable = is_retryable
+
+
+def _build_empty_llm_response_error(
+    llm: LLM,
+    llm_step_result: LlmStepResult,
+    tool_choice: ToolChoiceOptions,
+) -> EmptyLLMResponseError:
+    provider = llm.config.model_provider
+    model = llm.config.model_name
+
+    # OpenAI quota exhaustion has reached us as a streamed "stop" with zero content.
+    # When the stream is completely empty and there is no reasoning/tool output, surface
+    # the likely account-level cause instead of a generic tool-calling error.
+    if (
+        not llm_step_result.reasoning
+        and provider == LlmProviderNames.OPENAI
+        and is_true_openai_model(provider, model)
+    ):
+        return EmptyLLMResponseError(
+            provider=provider,
+            model=model,
+            tool_choice=tool_choice,
+            client_error_msg=(
+                "The selected OpenAI model returned an empty streamed response "
+                "before producing any tokens. This commonly happens when the API "
+                "key or project has no remaining quota or billing is not enabled. "
+                "Verify quota and billing for this key and try again."
+            ),
+            error_code="BUDGET_EXCEEDED",
+            is_retryable=False,
+        )
+
+    return EmptyLLMResponseError(
+        provider=provider,
+        model=model,
+        tool_choice=tool_choice,
+        client_error_msg=(
+            "The selected model returned no final answer before the stream "
+            "completed. No text or tool calls were received from the upstream "
+            "provider."
+        ),
+    )
+
+
 def _looks_like_xml_tool_call_payload(text: str | None) -> bool:
    """Detect XML-style marshaled tool calls emitted as plain text."""
    if not text:
@@ -613,7 +679,12 @@ def run_llm_loop(
            )
            citation_processor.update_citation_mapping(project_citation_mapping)

-        llm_step_result: LlmStepResult | None = None
+        llm_step_result = LlmStepResult(
+            reasoning=None,
+            answer=None,
+            tool_calls=None,
+            raw_answer=None,
+        )

        # Pass the total budget to construct_message_history, which will handle token allocation
        available_tokens = llm.config.max_input_tokens
@@ -1084,12 +1155,18 @@ def run_llm_loop(
                # As long as 1 tool with citeable documents is called at any point, we ask the LLM to try to cite
                should_cite_documents = True

-        if not llm_step_result or not llm_step_result.answer:
+        if not llm_step_result.answer and not llm_step_result.tool_calls:
+            raise _build_empty_llm_response_error(
+                llm=llm,
+                llm_step_result=llm_step_result,
+                tool_choice=tool_choice,
+            )
+
+        if not llm_step_result.answer:
            raise RuntimeError(
-                "The LLM did not return an answer. "
-                "Typically this is an issue with LLMs that do not support tool calling natively, "
-                "or the model serving API is not configured correctly. "
-                "This may also happen with models that are lower quality outputting invalid tool calls."
+                "The LLM did not return a final answer after tool execution. "
+                "Typically this indicates invalid tool-call output, a model/provider mismatch, "
+                "or serving API misconfiguration."
            )

        emitter.emit(
--- a/backend/onyx/chat/llm_step.py
+++ b/backend/onyx/chat/llm_step.py
@@ -1013,6 +1013,10 @@ def run_llm_step_pkt_generator(
    accumulated_reasoning = ""
    accumulated_answer = ""
    accumulated_raw_answer = ""
+    stream_chunk_count = 0
+    actionable_chunk_count = 0
+    empty_chunk_count = 0
+    finish_reasons: set[str] = set()
    xml_tool_call_content_filter = _XmlToolCallContentFilter()

    processor_state: Any = None
@@ -1145,6 +1149,7 @@ def run_llm_step_pkt_generator(
            user_identity=user_identity,
            timeout_override=timeout_override,
        ):
+            stream_chunk_count += 1
            if packet.usage:
                usage = packet.usage
                span_generation.span_data.usage = {
@@ -1154,16 +1159,21 @@ def run_llm_step_pkt_generator(
                    "cache_creation_input_tokens": usage.cache_creation_input_tokens,
                }
                # Note: LLM cost tracking is now handled in multi_llm.py
+            finish_reason = packet.choice.finish_reason
+            if finish_reason:
+                finish_reasons.add(str(finish_reason))
            delta = packet.choice.delta

            # Weird behavior from some model providers, just log and ignore for now
            if (
-                delta.content is None
+                not delta.content
                and delta.reasoning_content is None
-                and delta.tool_calls is None
+                and not delta.tool_calls
            ):
+                empty_chunk_count += 1
                logger.warning(
-                    f"LLM packet is empty (no contents, reasoning or tool calls). Skipping: {packet}"
+                    "LLM packet is empty (no content, reasoning, or tool calls). "
+                    f"finish_reason={finish_reason}. Skipping: {packet}"
                )
                continue

@@ -1172,6 +1182,8 @@ def run_llm_step_pkt_generator(
                    time.monotonic() - stream_start_time
                )
                first_action_recorded = True
+            if _delta_has_action(delta):
+                actionable_chunk_count += 1

            if custom_token_processor:
                # The custom token processor can modify the deltas for specific custom logic
@@ -1307,6 +1319,15 @@ def run_llm_step_pkt_generator(
        else:
            logger.debug("Tool calls: []")

+    if actionable_chunk_count == 0:
+        logger.warning(
+            "LLM stream completed with no actionable deltas. "
+            f"chunks={stream_chunk_count}, empty_chunks={empty_chunk_count}, "
+            f"finish_reasons={sorted(finish_reasons)}, "
+            f"provider={llm.config.model_provider}, model={llm.config.model_name}, "
+            f"tool_choice={tool_choice}, tools_sent={len(tool_definitions)}"
+        )
+
    return (
        LlmStepResult(
            reasoning=accumulated_reasoning if accumulated_reasoning else None,
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -8,6 +8,7 @@ from onyx.configs.constants import MessageType
 from onyx.context.search.models import SearchDoc
 from onyx.file_store.models import InMemoryChatFile
 from onyx.server.query_and_chat.models import MessageResponseIDInfo
+from onyx.server.query_and_chat.models import MultiModelMessageResponseIDInfo
 from onyx.server.query_and_chat.streaming_models import CitationInfo
 from onyx.server.query_and_chat.streaming_models import GeneratedImage
 from onyx.server.query_and_chat.streaming_models import Packet
@@ -35,7 +36,13 @@ class CreateChatSessionID(BaseModel):
    chat_session_id: UUID


-AnswerStreamPart = Packet | MessageResponseIDInfo | StreamingError | CreateChatSessionID
+AnswerStreamPart = (
+    Packet
+    | MessageResponseIDInfo
+    | MultiModelMessageResponseIDInfo
+    | StreamingError
+    | CreateChatSessionID
+)

 AnswerStream = Iterator[AnswerStreamPart]

@@ -177,8 +184,8 @@ class ExtractedContextFiles(BaseModel):
 class SearchParams(BaseModel):
    """Resolved search filter IDs and search-tool usage for a chat turn."""

-    search_project_id: int | None
-    search_persona_id: int | None
+    project_id_filter: int | None
+    persona_id_filter: int | None
    search_usage: SearchToolUsage


--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -29,6 +29,7 @@ from onyx.chat.compression import compress_chat_history
 from onyx.chat.compression import find_summary_for_branch
 from onyx.chat.compression import get_compression_params
 from onyx.chat.emitter import get_default_emitter
+from onyx.chat.llm_loop import EmptyLLMResponseError
 from onyx.chat.llm_loop import run_llm_loop
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ChatBasicResponse
@@ -58,6 +59,7 @@ from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_or_create_root_message
 from onyx.db.chat import reserve_message_id
+from onyx.db.enums import HookPoint
 from onyx.db.memory import get_memories
 from onyx.db.models import ChatMessage
 from onyx.db.models import ChatSession
@@ -67,11 +69,19 @@ from onyx.db.models import UserFile
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.tools import get_tools
 from onyx.deep_research.dr_loop import run_deep_research_llm_loop
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import log_onyx_error
+from onyx.error_handling.exceptions import OnyxError
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import load_in_memory_chat_files
 from onyx.file_store.utils import verify_user_files
+from onyx.hooks.executor import execute_hook
+from onyx.hooks.executor import HookSkipped
+from onyx.hooks.executor import HookSoftFailed
+from onyx.hooks.points.query_processing import QueryProcessingPayload
+from onyx.hooks.points.query_processing import QueryProcessingResponse
 from onyx.llm.factory import get_llm_for_persona
 from onyx.llm.factory import get_llm_token_counter
 from onyx.llm.interfaces import LLM
@@ -398,13 +408,13 @@ def determine_search_params(
    """
    is_custom_persona = persona_id != DEFAULT_PERSONA_ID

-    search_project_id: int | None = None
-    search_persona_id: int | None = None
+    project_id_filter: int | None = None
+    persona_id_filter: int | None = None
    if extracted_context_files.use_as_search_filter:
        if is_custom_persona:
-            search_persona_id = persona_id
+            persona_id_filter = persona_id
        else:
-            search_project_id = project_id
+            project_id_filter = project_id

    search_usage = SearchToolUsage.AUTO
    if not is_custom_persona and project_id:
@@ -417,12 +427,34 @@ def determine_search_params(
            search_usage = SearchToolUsage.DISABLED

    return SearchParams(
-        search_project_id=search_project_id,
-        search_persona_id=search_persona_id,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        search_usage=search_usage,
    )


+def _resolve_query_processing_hook_result(
+    hook_result: QueryProcessingResponse | HookSkipped | HookSoftFailed,
+    message_text: str,
+) -> str:
+    """Apply the Query Processing hook result to the message text.
+
+    Returns the (possibly rewritten) message text, or raises OnyxError with
+    QUERY_REJECTED if the hook signals rejection (query is null or empty).
+    HookSkipped and HookSoftFailed are pass-throughs — the original text is
+    returned unchanged.
+    """
+    if isinstance(hook_result, (HookSkipped, HookSoftFailed)):
+        return message_text
+    if not (hook_result.query and hook_result.query.strip()):
+        raise OnyxError(
+            OnyxErrorCode.QUERY_REJECTED,
+            hook_result.rejection_message
+            or "The hook extension for query processing did not return a valid query. No rejection reason was provided.",
+        )
+    return hook_result.query.strip()
+
+
 def handle_stream_message_objects(
    new_msg_req: SendMessageRequest,
    user: User,
@@ -473,16 +505,24 @@ def handle_stream_message_objects(
                db_session=db_session,
            )
            yield CreateChatSessionID(chat_session_id=chat_session.id)
+            chat_session = get_chat_session_by_id(
+                chat_session_id=chat_session.id,
+                user_id=user_id,
+                db_session=db_session,
+                eager_load_persona=True,
+            )
        else:
            chat_session = get_chat_session_by_id(
                chat_session_id=new_msg_req.chat_session_id,
                user_id=user_id,
                db_session=db_session,
+                eager_load_persona=True,
            )

        persona = chat_session.persona

        message_text = new_msg_req.message
+
        user_identity = LLMUserIdentity(
            user_id=llm_user_identifier, session_id=str(chat_session.id)
        )
@@ -490,13 +530,13 @@ def handle_stream_message_objects(
        # Milestone tracking, most devs using the API don't need to understand this
        mt_cloud_telemetry(
            tenant_id=tenant_id,
-            distinct_id=user.email if not user.is_anonymous else tenant_id,
+            distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
            event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
        )

        mt_cloud_telemetry(
            tenant_id=tenant_id,
-            distinct_id=user.email if not user.is_anonymous else tenant_id,
+            distinct_id=str(user.id) if not user.is_anonymous else tenant_id,
            event=MilestoneRecordType.USER_MESSAGE_SENT,
            properties={
                "origin": new_msg_req.origin.value,
@@ -574,6 +614,28 @@ def handle_stream_message_objects(
        if parent_message.message_type == MessageType.USER:
            user_message = parent_message
        else:
+            # New message — run the Query Processing hook before saving to DB.
+            # Skipped on regeneration: the message already exists and was accepted previously.
+            # Skip the hook for empty/whitespace-only messages — no meaningful query
+            # to process, and SendMessageRequest.message has no min_length guard.
+            if message_text.strip():
+                hook_result = execute_hook(
+                    db_session=db_session,
+                    hook_point=HookPoint.QUERY_PROCESSING,
+                    payload=QueryProcessingPayload(
+                        query=message_text,
+                        # Pass None for anonymous users or authenticated users without an email
+                        # (e.g. some SSO flows). QueryProcessingPayload.user_email is str | None,
+                        # so None is accepted and serialised as null in both cases.
+                        user_email=None if user.is_anonymous else user.email,
+                        chat_session_id=str(chat_session.id),
+                    ).model_dump(),
+                    response_type=QueryProcessingResponse,
+                )
+                message_text = _resolve_query_processing_hook_result(
+                    hook_result, message_text
+                )
+
            user_message = create_new_chat_message(
                chat_session_id=chat_session.id,
                parent_message=parent_message,
@@ -710,8 +772,8 @@ def handle_stream_message_objects(
            llm=llm,
            search_tool_config=SearchToolConfig(
                user_selected_filters=new_msg_req.internal_search_filters,
-                project_id=search_params.search_project_id,
-                persona_id=search_params.search_persona_id,
+                project_id_filter=search_params.project_id_filter,
+                persona_id_filter=search_params.persona_id_filter,
                bypass_acl=bypass_acl,
                slack_context=slack_context,
                enable_slack_search=_should_enable_slack_search(
@@ -913,6 +975,17 @@ def handle_stream_message_objects(
                state_container=state_container,
            )

+    except OnyxError as e:
+        if e.error_code is not OnyxErrorCode.QUERY_REJECTED:
+            log_onyx_error(e)
+        yield StreamingError(
+            error=e.detail,
+            error_code=e.error_code.code,
+            is_retryable=e.status_code >= 500,
+        )
+        db_session.rollback()
+        return
+
    except ValueError as e:
        logger.exception("Failed to process chat message.")

@@ -925,9 +998,28 @@ def handle_stream_message_objects(
        db_session.rollback()
        return

+    except EmptyLLMResponseError as e:
+        stack_trace = traceback.format_exc()
+
+        logger.warning(
+            "LLM returned an empty response "
+            f"(provider={e.provider}, model={e.model}, tool_choice={e.tool_choice})"
+        )
+
+        yield StreamingError(
+            error=e.client_error_msg,
+            stack_trace=stack_trace,
+            error_code=e.error_code,
+            is_retryable=e.is_retryable,
+            details={
+                "model": e.model,
+                "provider": e.provider,
+                "tool_choice": e.tool_choice.value,
+            },
+        )
+        db_session.rollback()
    except Exception as e:
        logger.exception(f"Failed to process chat message due to {e}")
-        error_msg = str(e)
        stack_trace = traceback.format_exc()

        if llm:
@@ -1046,10 +1138,46 @@ def llm_loop_completion_handle(
        )


-def remove_answer_citations(answer: str) -> str:
-    pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)"
+_CITATION_LINK_START_PATTERN = re.compile(r"\s*\[\[\d+\]\]\(")

-    return re.sub(pattern, "", answer)
+
+def _find_markdown_link_end(text: str, destination_start: int) -> int | None:
+    depth = 0
+    i = destination_start
+
+    while i < len(text):
+        curr = text[i]
+        if curr == "\\":
+            i += 2
+            continue
+
+        if curr == "(":
+            depth += 1
+        elif curr == ")":
+            if depth == 0:
+                return i
+            depth -= 1
+
+        i += 1
+
+    return None
+
+
+def remove_answer_citations(answer: str) -> str:
+    stripped_parts: list[str] = []
+    cursor = 0
+
+    while match := _CITATION_LINK_START_PATTERN.search(answer, cursor):
+        stripped_parts.append(answer[cursor : match.start()])
+        link_end = _find_markdown_link_end(answer, match.end())
+        if link_end is None:
+            stripped_parts.append(answer[match.start() :])
+            return "".join(stripped_parts)
+
+        cursor = link_end + 1
+
+    stripped_parts.append(answer[cursor:])
+    return "".join(stripped_parts)


@log_function_time()
@@ -1087,8 +1215,11 @@ def gather_stream(
        raise ValueError("Message ID is required")

    if answer is None:
-        # This should never be the case as these non-streamed flows do not have a stop-generation signal
-        raise RuntimeError("Answer was not generated")
+        if error_msg is not None:
+            answer = ""
+        else:
+            # This should never be the case as these non-streamed flows do not have a stop-generation signal
+            raise RuntimeError("Answer was not generated")

    return ChatBasicResponse(
        answer=answer,
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -278,14 +278,17 @@ USING_AWS_MANAGED_OPENSEARCH = (
 OPENSEARCH_PROFILING_DISABLED = (
    os.environ.get("OPENSEARCH_PROFILING_DISABLED", "").lower() == "true"
 )
-
+# Whether to disable match highlights for OpenSearch. Defaults to True for now
+# as we investigate query performance.
+OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED = (
+    os.environ.get("OPENSEARCH_MATCH_HIGHLIGHTS_DISABLED", "true").lower() == "true"
+)
 # When enabled, OpenSearch returns detailed score breakdowns for each hit.
 # Useful for debugging and tuning search relevance. Has ~10-30% performance overhead according to documentation.
 # Seems for Hybrid Search in practice, the impact is actually more like 1000x slower.
 OPENSEARCH_EXPLAIN_ENABLED = (
    os.environ.get("OPENSEARCH_EXPLAIN_ENABLED", "").lower() == "true"
 )
-
 # Analyzer used for full-text fields (title, content). Use OpenSearch built-in analyzer
 # names (e.g. "english", "standard", "german"). Affects stemming and tokenization;
 # existing indices need reindexing after a change.
@@ -318,8 +321,20 @@ VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
 OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE = int(
    os.environ.get("OPENSEARCH_MIGRATION_GET_VESPA_CHUNKS_PAGE_SIZE") or 500
 )
-OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = int(
-    os.environ.get("OPENSEARCH_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES") or 0
+# If set, will override the default number of shards and replicas for the index.
+OPENSEARCH_INDEX_NUM_SHARDS: int | None = (
+    int(os.environ["OPENSEARCH_INDEX_NUM_SHARDS"])
+    if os.environ.get("OPENSEARCH_INDEX_NUM_SHARDS", None) is not None
+    else None
+)
+OPENSEARCH_INDEX_NUM_REPLICAS: int | None = (
+    int(os.environ["OPENSEARCH_INDEX_NUM_REPLICAS"])
+    if os.environ.get("OPENSEARCH_INDEX_NUM_REPLICAS", None) is not None
+    else None
+)
+ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH = (
+    os.environ.get("ONYX_SEARCH_UI_USES_OPENSEARCH_KEYWORD_SEARCH", "").lower()
+    == "true"
 )

 VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -1046,6 +1061,8 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")

 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

+HOOK_ENABLED = os.environ.get("HOOK_ENABLED", "").lower() == "true"
+
 INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"

 #####
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -24,11 +24,11 @@ CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 1)
 LLM_SOCKET_READ_TIMEOUT = int(
    os.environ.get("LLM_SOCKET_READ_TIMEOUT") or "60"
 )  # 60 seconds
-# Weighting factor between Vector and Keyword Search, 1 for completely vector search
+# Weighting factor between vector and keyword Search; 1 for completely vector
+# search, 0 for keyword. Enforces a valid range of [0, 1]. A supplied value from
+# the env outside of this range will be clipped to the respective end of the
+# range. Defaults to 0.5.
 HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.5)))
-HYBRID_ALPHA_KEYWORD = max(
-    0, min(1, float(os.environ.get("HYBRID_ALPHA_KEYWORD") or 0.4))
-)
 # Weighting factor between Title and Content of documents during search, 1 for completely
 # Title based. Default heavily favors Content because Title is also included at the top of
 # Content. This is to avoid cases where the Content is very relevant but it may not be clear
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -177,6 +177,14 @@ USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500

 CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)

+# How long a queued user-file-delete task is valid before workers discard it.
+# Mirrors the processing task expiry to prevent indefinite queue growth when
+# files are stuck in DELETING status and the beat keeps re-enqueuing them.
+CELERY_USER_FILE_DELETE_TASK_EXPIRES = 60  # 1 minute (in seconds)
+
+# Max queue depth before the delete beat stops enqueuing more delete tasks.
+USER_FILE_DELETE_MAX_QUEUE_DEPTH = 500
+
 CELERY_SANDBOX_FILE_SYNC_LOCK_TIMEOUT = 5 * 60  # 5 minutes (in seconds)

 DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
@@ -469,6 +477,9 @@ class OnyxRedisLocks:
    USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
    USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
    USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
+    # Short-lived key set when a delete task is enqueued; cleared when the worker picks it up.
+    # Prevents the beat from re-enqueuing the same file while a delete task is already queued.
+    USER_FILE_DELETE_QUEUED_PREFIX = "da_lock:user_file_delete_queued"

    # Release notes
    RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"
@@ -597,6 +608,9 @@ class OnyxCeleryTask:
    EXPORT_QUERY_HISTORY_TASK = "export_query_history_task"
    EXPORT_QUERY_HISTORY_CLEANUP_TASK = "export_query_history_cleanup_task"

+    # Hook execution log retention
+    HOOK_EXECUTION_LOG_CLEANUP_TASK = "hook_execution_log_cleanup_task"
+
    # Sandbox cleanup
    CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
    CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"
--- a/backend/onyx/connectors/canvas/init.py
+++ b/backend/onyx/connectors/canvas/init.py
--- a/backend/onyx/connectors/canvas/client.py
+++ b/backend/onyx/connectors/canvas/client.py
@@ -0,0 +1,192 @@
+from __future__ import annotations
+
+import logging
+import re
+from typing import Any
+from urllib.parse import urlparse
+
+from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
+    rl_requests,
+)
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+
+logger = logging.getLogger(__name__)
+
+# Requests timeout in seconds.
+_CANVAS_CALL_TIMEOUT: int = 30
+_CANVAS_API_VERSION: str = "/api/v1"
+# Matches the "next" URL in a Canvas Link header, e.g.:
+#   <https://canvas.example.com/api/v1/courses?page=2>; rel="next"
+# Captures the URL inside the angle brackets.
+_NEXT_LINK_PATTERN: re.Pattern[str] = re.compile(r'<([^>]+)>;\s*rel="next"')
+
+
+_STATUS_TO_ERROR_CODE: dict[int, OnyxErrorCode] = {
+    401: OnyxErrorCode.CREDENTIAL_EXPIRED,
+    403: OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
+    404: OnyxErrorCode.BAD_GATEWAY,
+    429: OnyxErrorCode.RATE_LIMITED,
+}
+
+
+def _error_code_for_status(status_code: int) -> OnyxErrorCode:
+    """Map an HTTP status code to the appropriate OnyxErrorCode.
+
+    Expects a >= 400 status code. Known codes (401, 403, 404, 429) are
+    mapped to specific error codes; all other codes (unrecognised 4xx
+    and 5xx) map to BAD_GATEWAY as unexpected upstream errors.
+    """
+    if status_code in _STATUS_TO_ERROR_CODE:
+        return _STATUS_TO_ERROR_CODE[status_code]
+    return OnyxErrorCode.BAD_GATEWAY
+
+
+class CanvasApiClient:
+    def __init__(
+        self,
+        bearer_token: str,
+        canvas_base_url: str,
+    ) -> None:
+        parsed_base = urlparse(canvas_base_url)
+        if not parsed_base.hostname:
+            raise ValueError("canvas_base_url must include a valid host")
+        if parsed_base.scheme != "https":
+            raise ValueError("canvas_base_url must use https")
+
+        self._bearer_token = bearer_token
+        self.base_url = (
+            canvas_base_url.rstrip("/").removesuffix(_CANVAS_API_VERSION)
+            + _CANVAS_API_VERSION
+        )
+        # Hostname is already validated above; reuse parsed_base instead
+        # of re-parsing.  Used by _parse_next_link to validate pagination URLs.
+        self._expected_host: str = parsed_base.hostname
+
+    def get(
+        self,
+        endpoint: str = "",
+        params: dict[str, Any] | None = None,
+        full_url: str | None = None,
+    ) -> tuple[Any, str | None]:
+        """Make a GET request to the Canvas API.
+
+        Returns a tuple of (json_body, next_url).
+        next_url is parsed from the Link header and is None if there are no more pages.
+        If full_url is provided, it is used directly (for following pagination links).
+
+        Security note: full_url must only be set to values returned by
+        ``_parse_next_link``, which validates the host against the configured
+        Canvas base URL.  Passing an arbitrary URL would leak the bearer token.
+        """
+        # full_url is used when following pagination (Canvas returns the
+        # next-page URL in the Link header).  For the first request we build
+        # the URL from the endpoint name instead.
+        url = full_url if full_url else self._build_url(endpoint)
+        headers = self._build_headers()
+
+        response = rl_requests.get(
+            url,
+            headers=headers,
+            params=params if not full_url else None,
+            timeout=_CANVAS_CALL_TIMEOUT,
+        )
+
+        try:
+            response_json = response.json()
+        except ValueError as e:
+            if response.status_code < 300:
+                raise OnyxError(
+                    OnyxErrorCode.BAD_GATEWAY,
+                    detail=f"Invalid JSON in Canvas response: {e}",
+                )
+            logger.warning(
+                "Failed to parse JSON from Canvas error response (status=%d): %s",
+                response.status_code,
+                e,
+            )
+            response_json = {}
+
+        if response.status_code >= 400:
+            # Try to extract the most specific error message from the
+            # Canvas response body.  Canvas uses three different shapes
+            # depending on the endpoint and error type:
+            default_error: str = response.reason or f"HTTP {response.status_code}"
+            error = default_error
+            if isinstance(response_json, dict):
+                # Shape 1: {"error": {"message": "Not authorized"}}
+                error_field = response_json.get("error")
+                if isinstance(error_field, dict):
+                    response_error = error_field.get("message", "")
+                    if response_error:
+                        error = response_error
+                # Shape 2: {"error": "Invalid access token"}
+                elif isinstance(error_field, str):
+                    error = error_field
+                # Shape 3: {"errors": [{"message": "..."}]}
+                # Used for validation errors.  Only use as fallback if
+                # we didn't already find a more specific message above.
+                if error == default_error:
+                    errors_list = response_json.get("errors")
+                    if isinstance(errors_list, list) and errors_list:
+                        first_error = errors_list[0]
+                        if isinstance(first_error, dict):
+                            msg = first_error.get("message", "")
+                            if msg:
+                                error = msg
+            raise OnyxError(
+                _error_code_for_status(response.status_code),
+                detail=error,
+                status_code_override=response.status_code,
+            )
+
+        next_url = self._parse_next_link(response.headers.get("Link", ""))
+        return response_json, next_url
+
+    def _parse_next_link(self, link_header: str) -> str | None:
+        """Extract the 'next' URL from a Canvas Link header.
+
+        Only returns URLs whose host matches the configured Canvas base URL
+        to prevent leaking the bearer token to arbitrary hosts.
+        """
+        expected_host = self._expected_host
+        for match in _NEXT_LINK_PATTERN.finditer(link_header):
+            url = match.group(1)
+            parsed_url = urlparse(url)
+            if parsed_url.hostname != expected_host:
+                raise OnyxError(
+                    OnyxErrorCode.BAD_GATEWAY,
+                    detail=(
+                        "Canvas pagination returned an unexpected host "
+                        f"({parsed_url.hostname}); expected {expected_host}"
+                    ),
+                )
+            if parsed_url.scheme != "https":
+                raise OnyxError(
+                    OnyxErrorCode.BAD_GATEWAY,
+                    detail=(
+                        "Canvas pagination link must use https, "
+                        f"got {parsed_url.scheme!r}"
+                    ),
+                )
+            return url
+        return None
+
+    def _build_headers(self) -> dict[str, str]:
+        """Return the Authorization header with the bearer token."""
+        return {"Authorization": f"Bearer {self._bearer_token}"}
+
+    def _build_url(self, endpoint: str) -> str:
+        """Build a full Canvas API URL from an endpoint path.
+
+        Assumes endpoint is non-empty (e.g. ``"courses"``, ``"announcements"``).
+        Only called on a first request, endpoint must be set for first request.
+        Verify endpoint exists in case of future changes where endpoint might be optional.
+        Leading slashes are stripped to avoid double-slash in the result.
+        self.base_url is already normalized with no trailing slash.
+        """
+        final_url = self.base_url
+        clean_endpoint = endpoint.lstrip("/")
+        if clean_endpoint:
+            final_url += "/" + clean_endpoint
+        return final_url
--- a/backend/onyx/connectors/canvas/connector.py
+++ b/backend/onyx/connectors/canvas/connector.py
@@ -0,0 +1,74 @@
+from typing import Literal
+from typing import TypeAlias
+
+from pydantic import BaseModel
+
+from onyx.connectors.models import ConnectorCheckpoint
+
+
+class CanvasCourse(BaseModel):
+    id: int
+    name: str
+    course_code: str
+    created_at: str
+    workflow_state: str
+
+
+class CanvasPage(BaseModel):
+    page_id: int
+    url: str
+    title: str
+    body: str | None = None
+    created_at: str
+    updated_at: str
+    course_id: int
+
+
+class CanvasAssignment(BaseModel):
+    id: int
+    name: str
+    description: str | None = None
+    html_url: str
+    course_id: int
+    created_at: str
+    updated_at: str
+    due_at: str | None = None
+
+
+class CanvasAnnouncement(BaseModel):
+    id: int
+    title: str
+    message: str | None = None
+    html_url: str
+    posted_at: str | None = None
+    course_id: int
+
+
+CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]
+
+
+class CanvasConnectorCheckpoint(ConnectorCheckpoint):
+    """Checkpoint state for resumable Canvas indexing.
+
+    Fields:
+        course_ids: Materialized list of course IDs to process.
+        current_course_index: Index into course_ids for current course.
+        stage: Which item type we're processing for the current course.
+        next_url: Pagination cursor within the current stage. None means
+            start from the first page; a URL means resume from that page.
+
+    Invariant:
+        If current_course_index is incremented, stage must be reset to
+        "pages" and next_url must be reset to None.
+    """
+
+    course_ids: list[int] = []
+    current_course_index: int = 0
+    stage: CanvasStage = "pages"
+    next_url: str | None = None
+
+    def advance_course(self) -> None:
+        """Move to the next course and reset within-course state."""
+        self.current_course_index += 1
+        self.stage = "pages"
+        self.next_url = None
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -123,7 +123,7 @@ class OnyxConfluence:

        self.shared_base_kwargs: dict[str, str | int | bool] = {
            "api_version": "cloud" if is_cloud else "latest",
-            "backoff_and_retry": True,
+            "backoff_and_retry": False,
            "cloud": is_cloud,
        }
        if timeout:
@@ -456,7 +456,7 @@ class OnyxConfluence:
                        return attr(*args, **kwargs)

                except HTTPError as e:
-                    delay_until = _handle_http_error(e, attempt)
+                    delay_until = _handle_http_error(e, attempt, MAX_RETRIES)
                    logger.warning(
                        f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
                    )
--- a/backend/onyx/connectors/confluence/utils.py
+++ b/backend/onyx/connectors/confluence/utils.py
@@ -363,7 +363,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
                # and applying our own retries in a more specific set of circumstances
                return confluence_call(*args, **kwargs)
            except requests.HTTPError as e:
-                delay_until = _handle_http_error(e, attempt)
+                delay_until = _handle_http_error(e, attempt, MAX_RETRIES)
                logger.warning(
                    f"HTTPError in confluence call. Retrying in {delay_until} seconds..."
                )
@@ -384,7 +384,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
    return cast(F, wrapped_call)


-def _handle_http_error(e: requests.HTTPError, attempt: int) -> int:
+def _handle_http_error(e: requests.HTTPError, attempt: int, max_retries: int) -> int:
    MIN_DELAY = 2
    MAX_DELAY = 60
    STARTING_DELAY = 5
@@ -408,6 +408,17 @@ def _handle_http_error(e: requests.HTTPError, attempt: int) -> int:

        raise e

+    if e.response.status_code >= 500:
+        if attempt >= max_retries - 1:
+            raise e
+
+        delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY)
+        logger.warning(
+            f"Server error {e.response.status_code}. "
+            f"Retrying in {delay} seconds (attempt {attempt + 1})..."
+        )
+        return math.ceil(time.monotonic() + delay)
+
    if (
        e.response.status_code != 429
        and RATE_LIMIT_MESSAGE_LOWERCASE not in e.response.text.lower()
--- a/backend/onyx/connectors/google_utils/google_utils.py
+++ b/backend/onyx/connectors/google_utils/google_utils.py
@@ -157,9 +157,7 @@ def _execute_single_retrieval(
            logger.error(f"Error executing request: {e}")
            raise e
        elif _is_rate_limit_error(e):
-            results = _execute_with_retry(
-                lambda: retrieval_function(**request_kwargs).execute()
-            )
+            results = _execute_with_retry(retrieval_function(**request_kwargs))
        elif e.resp.status == 404 or e.resp.status == 403:
            if continue_on_404_or_403:
                logger.debug(f"Error executing request: {e}")
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -10,6 +10,7 @@ from datetime import timedelta
 from datetime import timezone
 from typing import Any

+import requests
 from jira import JIRA
 from jira.exceptions import JIRAError
 from jira.resources import Issue
@@ -239,29 +240,53 @@ def enhanced_search_ids(
    )


-def bulk_fetch_issues(
-    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
-) -> list[Issue]:
-    # TODO: move away from this jira library if they continue to not support
-    # the endpoints we need. Using private fields is not ideal, but
-    # is likely fine for now since we pin the library version
+def _bulk_fetch_request(
+    jira_client: JIRA, issue_ids: list[str], fields: str | None
+) -> list[dict[str, Any]]:
+    """Raw POST to the bulkfetch endpoint. Returns the list of raw issue dicts."""
    bulk_fetch_path = jira_client._get_url("issue/bulkfetch")
-
    # Prepare the payload according to Jira API v3 specification
    payload: dict[str, Any] = {"issueIdsOrKeys": issue_ids}
-
    # Only restrict fields if specified, might want to explicitly do this in the future
    # to avoid reading unnecessary data
    payload["fields"] = fields.split(",") if fields else ["*all"]

+    resp = jira_client._session.post(bulk_fetch_path, json=payload)
+    return resp.json()["issues"]
+
+
+def bulk_fetch_issues(
+    jira_client: JIRA, issue_ids: list[str], fields: str | None = None
+) -> list[Issue]:
+    # TODO(evan): move away from this jira library if they continue to not support
+    # the endpoints we need. Using private fields is not ideal, but
+    # is likely fine for now since we pin the library version
+
    try:
-        response = jira_client._session.post(bulk_fetch_path, json=payload).json()
+        raw_issues = _bulk_fetch_request(jira_client, issue_ids, fields)
+    except requests.exceptions.JSONDecodeError:
+        if len(issue_ids) <= 1:
+            logger.exception(
+                f"Jira bulk-fetch response for issue(s) {issue_ids} could not "
+                f"be decoded as JSON (response too large or truncated)."
+            )
+            raise
+
+        mid = len(issue_ids) // 2
+        logger.warning(
+            f"Jira bulk-fetch JSON decode failed for batch of {len(issue_ids)} issues. "
+            f"Splitting into sub-batches of {mid} and {len(issue_ids) - mid}."
+        )
+        left = bulk_fetch_issues(jira_client, issue_ids[:mid], fields)
+        right = bulk_fetch_issues(jira_client, issue_ids[mid:], fields)
+        return left + right
    except Exception as e:
        logger.error(f"Error fetching issues: {e}")
-        raise e
+        raise
+
    return [
        Issue(jira_client._options, jira_client._session, raw=issue)
-        for issue in response["issues"]
+        for issue in raw_issues
    ]


--- a/backend/onyx/connectors/notion/connector.py
+++ b/backend/onyx/connectors/notion/connector.py
@@ -53,7 +53,7 @@ class NotionPage(BaseModel):
    id: str
    created_time: str
    last_edited_time: str
-    archived: bool
+    in_trash: bool
    properties: dict[str, Any]
    url: str

@@ -63,6 +63,13 @@ class NotionPage(BaseModel):
    )


+class NotionDataSource(BaseModel):
+    """Represents a Notion Data Source within a database."""
+
+    id: str
+    name: str = ""
+
+
 class NotionBlock(BaseModel):
    """Represents a Notion Block object"""

@@ -107,7 +114,7 @@ class NotionConnector(LoadConnector, PollConnector):
        self.batch_size = batch_size
        self.headers = {
            "Content-Type": "application/json",
-            "Notion-Version": "2022-06-28",
+            "Notion-Version": "2026-03-11",
        }
        self.indexed_pages: set[str] = set()
        self.root_page_id = root_page_id
@@ -127,6 +134,9 @@ class NotionConnector(LoadConnector, PollConnector):
        # Maps child page IDs to their containing page ID (discovered in _read_blocks).
        # Used to resolve block_id parent types to the actual containing page.
        self._child_page_parent_map: dict[str, str] = {}
+        # Maps data_source_id -> database_id (populated in _read_pages_from_database).
+        # Used to resolve data_source_id parent types back to the database.
+        self._data_source_to_database_map: dict[str, str] = {}

    @classmethod
    @override
@@ -227,7 +237,11 @@ class NotionConnector(LoadConnector, PollConnector):

    @retry(tries=3, delay=1, backoff=2)
    def _fetch_database_as_page(self, database_id: str) -> NotionPage:
-        """Attempt to fetch a database as a page."""
+        """Attempt to fetch a database as a page.
+
+        Note: As of API 2025-09-03, database objects no longer include
+        `properties` (schema moved to individual data sources).
+        """
        logger.debug(f"Fetching database for ID '{database_id}' as a page")
        database_url = f"https://api.notion.com/v1/databases/{database_id}"
        res = rl_requests.get(
@@ -246,18 +260,52 @@ class NotionConnector(LoadConnector, PollConnector):
            database_name[0].get("text", {}).get("content") if database_name else None
        )

+        db_data.setdefault("properties", {})
+
        return NotionPage(**db_data, database_name=database_name)

    @retry(tries=3, delay=1, backoff=2)
-    def _fetch_database(
-        self, database_id: str, cursor: str | None = None
+    def _fetch_data_sources_for_database(
+        self, database_id: str
+    ) -> list[NotionDataSource]:
+        """Fetch the list of data sources for a database."""
+        logger.debug(f"Fetching data sources for database '{database_id}'")
+        res = rl_requests.get(
+            f"https://api.notion.com/v1/databases/{database_id}",
+            headers=self.headers,
+            timeout=_NOTION_CALL_TIMEOUT,
+        )
+        try:
+            res.raise_for_status()
+        except Exception as e:
+            if res.status_code in (403, 404):
+                logger.error(
+                    f"Unable to access database with ID '{database_id}'. "
+                    f"This is likely due to the database not being shared "
+                    f"with the Onyx integration. Exact exception:\n{e}"
+                )
+                return []
+            logger.exception(f"Error fetching database - {res.json()}")
+            raise e
+
+        db_data = res.json()
+        data_sources = db_data.get("data_sources", [])
+        return [
+            NotionDataSource(id=ds["id"], name=ds.get("name", ""))
+            for ds in data_sources
+            if ds.get("id")
+        ]
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _fetch_data_source(
+        self, data_source_id: str, cursor: str | None = None
    ) -> dict[str, Any]:
-        """Fetch a database from it's ID via the Notion API."""
-        logger.debug(f"Fetching database for ID '{database_id}'")
-        block_url = f"https://api.notion.com/v1/databases/{database_id}/query"
+        """Query a data source via POST /v1/data_sources/{id}/query."""
+        logger.debug(f"Querying data source '{data_source_id}'")
+        url = f"https://api.notion.com/v1/data_sources/{data_source_id}/query"
        body = None if not cursor else {"start_cursor": cursor}
        res = rl_requests.post(
-            block_url,
+            url,
            headers=self.headers,
            json=body,
            timeout=_NOTION_CALL_TIMEOUT,
@@ -265,25 +313,14 @@ class NotionConnector(LoadConnector, PollConnector):
        try:
            res.raise_for_status()
        except Exception as e:
-            json_data = res.json()
-            code = json_data.get("code")
-            # Sep 3 2025 backend changed the error message for this case
-            # TODO: it is also now possible for there to be multiple data sources per database; at present we
-            # just don't handle that. We will need to upgrade the API to the current version + query the
-            # new data sources endpoint to handle that case correctly.
-            if code == "object_not_found" or (
-                code == "validation_error"
-                and "does not contain any data sources" in json_data.get("message", "")
-            ):
-                # this happens when a database is not shared with the integration
-                # in this case, we should just ignore the database
+            if res.status_code in (403, 404):
                logger.error(
-                    f"Unable to access database with ID '{database_id}'. "
-                    f"This is likely due to the database not being shared "
+                    f"Unable to access data source with ID '{data_source_id}'. "
+                    f"This is likely due to it not being shared "
                    f"with the Onyx integration. Exact exception:\n{e}"
                )
                return {"results": [], "next_cursor": None}
-            logger.exception(f"Error fetching database - {res.json()}")
+            logger.exception(f"Error querying data source - {res.json()}")
            raise e
        return res.json()

@@ -348,8 +385,9 @@ class NotionConnector(LoadConnector, PollConnector):
            # Fallback to workspace if we don't know the parent
            return self.workspace_id
        elif parent_type == "data_source_id":
-            # Newer Notion API may use data_source_id for databases
-            return parent.get("database_id") or parent.get("data_source_id")
+            ds_id = parent.get("data_source_id")
+            if ds_id:
+                return self._data_source_to_database_map.get(ds_id, self.workspace_id)
        elif parent_type in ["page_id", "database_id"]:
            return parent.get(parent_type)

@@ -497,18 +535,32 @@ class NotionConnector(LoadConnector, PollConnector):
        if db_node:
            hierarchy_nodes.append(db_node)

-        cursor = None
-        while True:
-            data = self._fetch_database(database_id, cursor)
+        # Discover all data sources under this database, then query each one.
+        # Even legacy single-source databases have one entry in the array.
+        data_sources = self._fetch_data_sources_for_database(database_id)
+        if not data_sources:
+            logger.warning(
+                f"Database '{database_id}' returned zero data sources — "
+                f"no pages will be indexed from this database."
+            )
+        for ds in data_sources:
+            self._data_source_to_database_map[ds.id] = database_id
+            cursor = None
+            while True:
+                data = self._fetch_data_source(ds.id, cursor)

-            for result in data["results"]:
-                obj_id = result["id"]
-                obj_type = result["object"]
-                text = self._properties_to_str(result.get("properties", {}))
-                if text:
-                    result_blocks.append(NotionBlock(id=obj_id, text=text, prefix="\n"))
+                for result in data["results"]:
+                    obj_id = result["id"]
+                    obj_type = result["object"]
+                    text = self._properties_to_str(result.get("properties", {}))
+                    if text:
+                        result_blocks.append(
+                            NotionBlock(id=obj_id, text=text, prefix="\n")
+                        )
+
+                    if not self.recursive_index_enabled:
+                        continue

-                if self.recursive_index_enabled:
                    if obj_type == "page":
                        logger.debug(
                            f"Found page with ID '{obj_id}' in database '{database_id}'"
@@ -518,7 +570,6 @@ class NotionConnector(LoadConnector, PollConnector):
                        logger.debug(
                            f"Found database with ID '{obj_id}' in database '{database_id}'"
                        )
-                        # Get nested database name from properties if available
                        nested_db_title = result.get("title", [])
                        nested_db_name = None
                        if nested_db_title and len(nested_db_title) > 0:
@@ -533,10 +584,10 @@ class NotionConnector(LoadConnector, PollConnector):
                        result_pages.extend(nested_output.child_page_ids)
                        hierarchy_nodes.extend(nested_output.hierarchy_nodes)

-            if data["next_cursor"] is None:
-                break
+                if data["next_cursor"] is None:
+                    break

-            cursor = data["next_cursor"]
+                cursor = data["next_cursor"]

        return BlockReadOutput(
            blocks=result_blocks,
@@ -807,36 +858,55 @@ class NotionConnector(LoadConnector, PollConnector):
    def _yield_database_hierarchy_nodes(
        self,
    ) -> Generator[HierarchyNode | Document, None, None]:
-        """Search for all databases and yield hierarchy nodes for each.
+        """Search for all data sources and yield hierarchy nodes for their parent databases.

        This must be called BEFORE page indexing so that database hierarchy nodes
        exist when pages inside databases reference them as parents.
+
+        With the new API, search returns data source objects instead of databases.
+        Multiple data sources can share the same parent database, so we use
+        database_id as the hierarchy node key and deduplicate via
+        _maybe_yield_hierarchy_node.
        """
        query_dict: dict[str, Any] = {
-            "filter": {"property": "object", "value": "database"},
+            "filter": {"property": "object", "value": "data_source"},
            "page_size": _NOTION_PAGE_SIZE,
        }
        pages_seen = 0
        while pages_seen < _MAX_PAGES:
            db_res = self._search_notion(query_dict)
-            for db in db_res.results:
-                db_id = db["id"]
-                # Extract title from the title array
-                title_arr = db.get("title", [])
-                db_name = None
-                if title_arr:
-                    db_name = " ".join(
-                        t.get("plain_text", "") for t in title_arr
-                    ).strip()
-                if not db_name:
+            for ds in db_res.results:
+                # Extract the parent database_id from the data source's parent
+                ds_parent = ds.get("parent", {})
+                db_id = ds_parent.get("database_id")
+                if not db_id:
+                    continue
+
+                # Populate the mapping so _get_parent_raw_id can resolve later
+                ds_id = ds.get("id")
+                if not ds_id:
+                    continue
+                self._data_source_to_database_map[ds_id] = db_id
+
+                # Fetch the database to get its actual name and parent
+                try:
+                    db_page = self._fetch_database_as_page(db_id)
+                    db_name = db_page.database_name or f"Database {db_id}"
+                    parent_raw_id = self._get_parent_raw_id(db_page.parent)
+                    db_url = (
+                        db_page.url or f"https://notion.so/{db_id.replace('-', '')}"
+                    )
+                except requests.exceptions.RequestException as e:
+                    logger.warning(
+                        f"Could not fetch database '{db_id}', "
+                        f"defaulting to workspace root. Error: {e}"
+                    )
                    db_name = f"Database {db_id}"
+                    parent_raw_id = self.workspace_id
+                    db_url = f"https://notion.so/{db_id.replace('-', '')}"

-                # Get parent using existing helper
-                parent_raw_id = self._get_parent_raw_id(db.get("parent"))
-
-                # Notion URLs omit dashes from UUIDs
-                db_url = db.get("url") or f"https://notion.so/{db_id.replace('-', '')}"
-
+                # _maybe_yield_hierarchy_node deduplicates by raw_node_id,
+                # so multiple data sources under one database produce one node.
                node = self._maybe_yield_hierarchy_node(
                    raw_node_id=db_id,
                    raw_parent_id=parent_raw_id or self.workspace_id,
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -1,5 +1,6 @@
 import base64
 import copy
+import fnmatch
 import html
 import io
 import os
@@ -84,6 +85,44 @@ SHARED_DOCUMENTS_MAP_REVERSE = {v: k for k, v in SHARED_DOCUMENTS_MAP.items()}

 ASPX_EXTENSION = ".aspx"

+
+def _is_site_excluded(site_url: str, excluded_site_patterns: list[str]) -> bool:
+    """Check if a site URL matches any of the exclusion glob patterns."""
+    for pattern in excluded_site_patterns:
+        if fnmatch.fnmatch(site_url, pattern) or fnmatch.fnmatch(
+            site_url.rstrip("/"), pattern.rstrip("/")
+        ):
+            return True
+    return False
+
+
+def _is_path_excluded(item_path: str, excluded_path_patterns: list[str]) -> bool:
+    """Check if a drive item path matches any of the exclusion glob patterns.
+
+    item_path is the relative path within a drive, e.g. "Engineering/API/report.docx".
+    Matches are attempted against the full path and the filename alone so that
+    patterns like "*.tmp" match files at any depth.
+    """
+    filename = item_path.rsplit("/", 1)[-1] if "/" in item_path else item_path
+    for pattern in excluded_path_patterns:
+        if fnmatch.fnmatch(item_path, pattern) or fnmatch.fnmatch(filename, pattern):
+            return True
+    return False
+
+
+def _build_item_relative_path(parent_reference_path: str | None, item_name: str) -> str:
+    """Build the relative path of a drive item from its parentReference.path and name.
+
+    Example: parentReference.path="/drives/abc/root:/Eng/API", name="report.docx"
+    => "Eng/API/report.docx"
+    """
+    if parent_reference_path and "root:/" in parent_reference_path:
+        folder = unquote(parent_reference_path.split("root:/", 1)[1])
+        if folder:
+            return f"{folder}/{item_name}"
+    return item_name
+
+
 DEFAULT_AUTHORITY_HOST = "https://login.microsoftonline.com"
 DEFAULT_GRAPH_API_HOST = "https://graph.microsoft.com"
 DEFAULT_SHAREPOINT_DOMAIN_SUFFIX = "sharepoint.com"
@@ -478,6 +517,7 @@ def _convert_driveitem_to_document_with_permissions(
    include_permissions: bool = False,
    parent_hierarchy_raw_node_id: str | None = None,
    access_token: str | None = None,
+    treat_sharing_link_as_public: bool = False,
 ) -> Document | ConnectorFailure | None:

    if not driveitem.name or not driveitem.id:
@@ -610,6 +650,7 @@ def _convert_driveitem_to_document_with_permissions(
            drive_item=sdk_item,
            drive_name=drive_name,
            add_prefix=True,
+            treat_sharing_link_as_public=treat_sharing_link_as_public,
        )
    else:
        external_access = ExternalAccess.empty()
@@ -644,6 +685,7 @@ def _convert_sitepage_to_document(
    graph_client: GraphClient,
    include_permissions: bool = False,
    parent_hierarchy_raw_node_id: str | None = None,
+    treat_sharing_link_as_public: bool = False,
 ) -> Document:
    """Convert a SharePoint site page to a Document object."""
    # Extract text content from the site page
@@ -773,6 +815,7 @@ def _convert_sitepage_to_document(
            graph_client=graph_client,
            site_page=site_page,
            add_prefix=True,
+            treat_sharing_link_as_public=treat_sharing_link_as_public,
        )
    else:
        external_access = ExternalAccess.empty()
@@ -803,6 +846,7 @@ def _convert_driveitem_to_slim_document(
    ctx: ClientContext,
    graph_client: GraphClient,
    parent_hierarchy_raw_node_id: str | None = None,
+    treat_sharing_link_as_public: bool = False,
 ) -> SlimDocument:
    if driveitem.id is None:
        raise ValueError("DriveItem ID is required")
@@ -813,6 +857,7 @@ def _convert_driveitem_to_slim_document(
        graph_client=graph_client,
        drive_item=sdk_item,
        drive_name=drive_name,
+        treat_sharing_link_as_public=treat_sharing_link_as_public,
    )

    return SlimDocument(
@@ -827,6 +872,7 @@ def _convert_sitepage_to_slim_document(
    ctx: ClientContext | None,
    graph_client: GraphClient,
    parent_hierarchy_raw_node_id: str | None = None,
+    treat_sharing_link_as_public: bool = False,
 ) -> SlimDocument:
    """Convert a SharePoint site page to a SlimDocument object."""
    if site_page.get("id") is None:
@@ -836,6 +882,7 @@ def _convert_sitepage_to_slim_document(
        ctx=ctx,
        graph_client=graph_client,
        site_page=site_page,
+        treat_sharing_link_as_public=treat_sharing_link_as_public,
    )
    id = site_page.get("id")
    if id is None:
@@ -855,14 +902,20 @@ class SharepointConnector(
        self,
        batch_size: int = INDEX_BATCH_SIZE,
        sites: list[str] = [],
+        excluded_sites: list[str] = [],
+        excluded_paths: list[str] = [],
        include_site_pages: bool = True,
        include_site_documents: bool = True,
+        treat_sharing_link_as_public: bool = False,
        authority_host: str = DEFAULT_AUTHORITY_HOST,
        graph_api_host: str = DEFAULT_GRAPH_API_HOST,
        sharepoint_domain_suffix: str = DEFAULT_SHAREPOINT_DOMAIN_SUFFIX,
    ) -> None:
        self.batch_size = batch_size
        self.sites = list(sites)
+        self.excluded_sites = [s for p in excluded_sites if (s := p.strip())]
+        self.excluded_paths = [s for p in excluded_paths if (s := p.strip())]
+        self.treat_sharing_link_as_public = treat_sharing_link_as_public
        self.site_descriptors: list[SiteDescriptor] = self._extract_site_and_drive_info(
            sites
        )
@@ -1233,6 +1286,29 @@ class SharepointConnector(
                break
            sites = sites._get_next().execute_query()

+    def _is_driveitem_excluded(self, driveitem: DriveItemData) -> bool:
+        """Check if a drive item should be excluded based on excluded_paths patterns."""
+        if not self.excluded_paths:
+            return False
+        relative_path = _build_item_relative_path(
+            driveitem.parent_reference_path, driveitem.name
+        )
+        return _is_path_excluded(relative_path, self.excluded_paths)
+
+    def _filter_excluded_sites(
+        self, site_descriptors: list[SiteDescriptor]
+    ) -> list[SiteDescriptor]:
+        """Remove sites matching any excluded_sites glob pattern."""
+        if not self.excluded_sites:
+            return site_descriptors
+        result = []
+        for sd in site_descriptors:
+            if _is_site_excluded(sd.url, self.excluded_sites):
+                logger.info(f"Excluding site by denylist: {sd.url}")
+                continue
+            result.append(sd)
+        return result
+
    def fetch_sites(self) -> list[SiteDescriptor]:
        sites = self.graph_client.sites.get_all_sites().execute_query()

@@ -1249,7 +1325,7 @@ class SharepointConnector(
            for site in self._handle_paginated_sites(sites)
            if "-my.sharepoint" not in site.web_url
        ]
-        return site_descriptors
+        return self._filter_excluded_sites(site_descriptors)

    def _fetch_site_pages(
        self,
@@ -1690,7 +1766,9 @@ class SharepointConnector(
        checkpoint.seen_document_ids.clear()

    def _fetch_slim_documents_from_sharepoint(self) -> GenerateSlimDocumentOutput:
-        site_descriptors = self.site_descriptors or self.fetch_sites()
+        site_descriptors = self._filter_excluded_sites(
+            self.site_descriptors or self.fetch_sites()
+        )

        # Create a temporary checkpoint for hierarchy node tracking
        temp_checkpoint = SharepointConnectorCheckpoint(has_more=True)
@@ -1710,6 +1788,10 @@ class SharepointConnector(
                for driveitem, drive_name, drive_web_url in self._fetch_driveitems(
                    site_descriptor=site_descriptor
                ):
+                    if self._is_driveitem_excluded(driveitem):
+                        logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
+                        continue
+
                    if drive_web_url:
                        doc_batch.extend(
                            self._yield_drive_hierarchy_node(
@@ -1747,6 +1829,7 @@ class SharepointConnector(
                                ctx,
                                self.graph_client,
                                parent_hierarchy_raw_node_id=parent_hierarchy_url,
+                                treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                            )
                        )
                    except Exception as e:
@@ -1770,6 +1853,7 @@ class SharepointConnector(
                            ctx,
                            self.graph_client,
                            parent_hierarchy_raw_node_id=site_descriptor.url,
+                            treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                        )
                    )
                    if len(doc_batch) >= SLIM_BATCH_SIZE:
@@ -2043,7 +2127,9 @@ class SharepointConnector(
            and not checkpoint.process_site_pages
        ):
            logger.info("Initializing SharePoint sites for processing")
-            site_descs = self.site_descriptors or self.fetch_sites()
+            site_descs = self._filter_excluded_sites(
+                self.site_descriptors or self.fetch_sites()
+            )
            checkpoint.cached_site_descriptors = deque(site_descs)

            if not checkpoint.cached_site_descriptors:
@@ -2264,6 +2350,10 @@ class SharepointConnector(
            for driveitem in driveitems:
                item_count += 1

+                if self._is_driveitem_excluded(driveitem):
+                    logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
+                    continue
+
                if driveitem.id and driveitem.id in checkpoint.seen_document_ids:
                    logger.debug(
                        f"Skipping duplicate document {driveitem.id} ({driveitem.name})"
@@ -2318,6 +2408,7 @@ class SharepointConnector(
                        parent_hierarchy_raw_node_id=parent_hierarchy_url,
                        graph_api_base=self.graph_api_base,
                        access_token=access_token,
+                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                    )

                    if isinstance(doc_or_failure, Document):
@@ -2398,6 +2489,7 @@ class SharepointConnector(
                        include_permissions=include_permissions,
                        # Site pages have the site as their parent
                        parent_hierarchy_raw_node_id=site_descriptor.url,
+                        treat_sharing_link_as_public=self.treat_sharing_link_as_public,
                    )
                )
            logger.info(
--- a/backend/onyx/connectors/sharepoint/connector_utils.py
+++ b/backend/onyx/connectors/sharepoint/connector_utils.py
@@ -17,6 +17,7 @@ def get_sharepoint_external_access(
    drive_name: str | None = None,
    site_page: dict[str, Any] | None = None,
    add_prefix: bool = False,
+    treat_sharing_link_as_public: bool = False,
 ) -> ExternalAccess:
    if drive_item and drive_item.id is None:
        raise ValueError("DriveItem ID is required")
@@ -34,7 +35,13 @@ def get_sharepoint_external_access(
    )

    external_access = get_external_access_func(
-        ctx, graph_client, drive_name, drive_item, site_page, add_prefix
+        ctx,
+        graph_client,
+        drive_name,
+        drive_item,
+        site_page,
+        add_prefix,
+        treat_sharing_link_as_public,
    )

    return external_access
--- a/backend/onyx/connectors/web/connector.py
+++ b/backend/onyx/connectors/web/connector.py
@@ -88,8 +88,9 @@ WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
 IFRAME_TEXT_LENGTH_THRESHOLD = 700
 # Message indicating JavaScript is disabled, which often appears when scraping fails
 JAVASCRIPT_DISABLED_MESSAGE = "You have JavaScript disabled in your browser"
-# Grace period after page navigation to allow bot-detection challenges to complete
-BOT_DETECTION_GRACE_PERIOD_MS = 5000
+# Grace period after page navigation to allow bot-detection challenges
+# and SPA content rendering to complete
+PAGE_RENDER_TIMEOUT_MS = 5000

 # Define common headers that mimic a real browser
 DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
@@ -547,7 +548,15 @@ class WebConnector(LoadConnector):
            )
            # Give the page a moment to start rendering after navigation commits.
            # Allows CloudFlare and other bot-detection challenges to complete.
-            page.wait_for_timeout(BOT_DETECTION_GRACE_PERIOD_MS)
+            page.wait_for_timeout(PAGE_RENDER_TIMEOUT_MS)
+
+            # Wait for network activity to settle so SPAs that fetch content
+            # asynchronously after the initial JS bundle have time to render.
+            try:
+                # A bit of extra time to account for long-polling, websockets, etc.
+                page.wait_for_load_state("networkidle", timeout=PAGE_RENDER_TIMEOUT_MS)
+            except TimeoutError:
+                pass

            last_modified = (
                page_response.header_value("Last-Modified") if page_response else None
@@ -576,7 +585,7 @@ class WebConnector(LoadConnector):
                    # (e.g., CloudFlare protection keeps making requests)
                    try:
                        page.wait_for_load_state(
-                            "networkidle", timeout=BOT_DETECTION_GRACE_PERIOD_MS
+                            "networkidle", timeout=PAGE_RENDER_TIMEOUT_MS
                        )
                    except TimeoutError:
                        # If networkidle times out, just give it a moment for content to render
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -2,7 +2,6 @@ from collections.abc import Sequence
 from datetime import datetime
 from enum import Enum
 from typing import Any
-from uuid import UUID

 from pydantic import BaseModel
 from pydantic import Field
@@ -70,9 +69,13 @@ class BaseFilters(BaseModel):


 class UserFileFilters(BaseModel):
-    user_file_ids: list[UUID] | None = None
-    project_id: int | None = None
-    persona_id: int | None = None
+    # Scopes search to user files tagged with a given project/persona in Vespa.
+    # These are NOT simply the IDs of the current project or persona — they are
+    # only set when the persona's/project's user files overflowed the LLM
+    # context window and must be searched via vector DB instead of being loaded
+    # directly into the prompt.
+    project_id_filter: int | None = None
+    persona_id_filter: int | None = None


 class AssistantKnowledgeFilters(BaseModel):
@@ -398,3 +401,16 @@ class SavedSearchDocWithContent(SavedSearchDoc):
    section in addition to the match_highlights."""

    content: str
+
+
+class PersonaSearchInfo(BaseModel):
+    """Snapshot of persona data needed by the search pipeline.
+
+    Extracted from the ORM Persona before the DB session is released so that
+    SearchTool and search_pipeline never lazy-load relationships post-commit.
+    """
+
+    document_set_names: list[str]
+    search_start_date: datetime | None
+    attached_document_ids: list[str]
+    hierarchy_node_ids: list[int]
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -1,6 +1,5 @@
 from collections import defaultdict
 from datetime import datetime
-from uuid import UUID

 from sqlalchemy.orm import Session

@@ -10,12 +9,12 @@ from onyx.context.search.models import ChunkSearchRequest
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import PersonaSearchInfo
 from onyx.context.search.preprocessing.access_filters import (
    build_access_filters_for_user,
 )
 from onyx.context.search.retrieval.search_runner import search_chunks
 from onyx.context.search.utils import inference_section_from_chunks
-from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
@@ -39,9 +38,8 @@ logger = setup_logger()
 def _build_index_filters(
    user_provided_filters: BaseFilters | None,
    user: User,  # Used for ACLs, anonymous users only see public docs
-    project_id: int | None,
-    persona_id: int | None,
-    user_file_ids: list[UUID] | None,
+    project_id_filter: int | None,
+    persona_id_filter: int | None,
    persona_document_sets: list[str] | None,
    persona_time_cutoff: datetime | None,
    db_session: Session | None = None,
@@ -97,16 +95,6 @@ def _build_index_filters(
    if not source_filter and detected_source_filter:
        source_filter = detected_source_filter

-    # CRITICAL FIX: If user_file_ids are present, we must ensure "user_file"
-    # source type is included in the filter, otherwise user files will be excluded!
-    if user_file_ids and source_filter:
-        from onyx.configs.constants import DocumentSource
-
-        # Add user_file to the source filter if not already present
-        if DocumentSource.USER_FILE not in source_filter:
-            source_filter = list(source_filter) + [DocumentSource.USER_FILE]
-            logger.debug("Added USER_FILE to source_filter for user knowledge search")
-
    if bypass_acl:
        user_acl_filters = None
    elif acl_filters is not None:
@@ -117,9 +105,8 @@ def _build_index_filters(
        user_acl_filters = build_access_filters_for_user(user, db_session)

    final_filters = IndexFilters(
-        user_file_ids=user_file_ids,
-        project_id=project_id,
-        persona_id=persona_id,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        source_type=source_filter,
        document_set=document_set_filter,
        time_cutoff=time_filter,
@@ -260,51 +247,41 @@ def search_pipeline(
    document_index: DocumentIndex,
    # Used for ACLs and federated search, anonymous users only see public docs
    user: User,
-    # Used for default filters and settings
-    persona: Persona | None,
+    # Pre-extracted persona search configuration (None when no persona)
+    persona_search_info: PersonaSearchInfo | None,
    db_session: Session | None = None,
    auto_detect_filters: bool = False,
    llm: LLM | None = None,
-    # If a project ID is provided, it will be exclusively scoped to that project
-    project_id: int | None = None,
-    # If a persona_id is provided, search scopes to files attached to this persona
-    persona_id: int | None = None,
+    # Vespa metadata filters for overflowing user files.  NOT the raw IDs
+    # of the current project/persona — only set when user files couldn't fit
+    # in the LLM context and need to be searched via vector DB.
+    project_id_filter: int | None = None,
+    persona_id_filter: int | None = None,
    # Pre-fetched data — when provided, avoids DB queries (no session needed)
    acl_filters: list[str] | None = None,
    embedding_model: EmbeddingModel | None = None,
    prefetched_federated_retrieval_infos: list[FederatedRetrievalInfo] | None = None,
 ) -> list[InferenceChunk]:
-    user_uploaded_persona_files: list[UUID] | None = (
-        [user_file.id for user_file in persona.user_files] if persona else None
-    )
-
    persona_document_sets: list[str] | None = (
-        [persona_document_set.name for persona_document_set in persona.document_sets]
-        if persona
-        else None
+        persona_search_info.document_set_names if persona_search_info else None
    )
    persona_time_cutoff: datetime | None = (
-        persona.search_start_date if persona else None
+        persona_search_info.search_start_date if persona_search_info else None
    )
-
-    # Extract assistant knowledge filters from persona
    attached_document_ids: list[str] | None = (
-        [doc.id for doc in persona.attached_documents]
-        if persona and persona.attached_documents
+        persona_search_info.attached_document_ids or None
+        if persona_search_info
        else None
    )
    hierarchy_node_ids: list[int] | None = (
-        [node.id for node in persona.hierarchy_nodes]
-        if persona and persona.hierarchy_nodes
-        else None
+        persona_search_info.hierarchy_node_ids or None if persona_search_info else None
    )

    filters = _build_index_filters(
        user_provided_filters=chunk_search_request.user_selected_filters,
        user=user,
-        project_id=project_id,
-        persona_id=persona_id,
-        user_file_ids=user_uploaded_persona_files,
+        project_id_filter=project_id_filter,
+        persona_id_filter=persona_id_filter,
        persona_document_sets=persona_document_sets,
        persona_time_cutoff=persona_time_cutoff,
        db_session=db_session,
--- a/backend/onyx/context/search/retrieval/search_runner.py
+++ b/backend/onyx/context/search/retrieval/search_runner.py
@@ -14,6 +14,10 @@ from onyx.context.search.utils import get_query_embedding
 from onyx.context.search.utils import inference_section_from_chunks
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import VespaChunkRequest
+from onyx.document_index.interfaces_new import DocumentIndex as NewDocumentIndex
+from onyx.document_index.opensearch.opensearch_document_index import (
+    OpenSearchOldDocumentIndex,
+)
 from onyx.federated_connectors.federated_retrieval import FederatedRetrievalInfo
 from onyx.federated_connectors.federated_retrieval import (
    get_federated_retrieval_functions,
@@ -49,7 +53,7 @@ def combine_retrieval_results(
    return sorted_chunks


-def _embed_and_search(
+def _embed_and_hybrid_search(
    query_request: ChunkIndexRequest,
    document_index: DocumentIndex,
    db_session: Session | None = None,
@@ -81,6 +85,17 @@ def _embed_and_search(
    return top_chunks


+def _keyword_search(
+    query_request: ChunkIndexRequest,
+    document_index: NewDocumentIndex,
+) -> list[InferenceChunk]:
+    return document_index.keyword_retrieval(
+        query=query_request.query,
+        filters=query_request.filters,
+        num_to_retrieve=query_request.limit or NUM_RETURNED_HITS,
+    )
+
+
 def search_chunks(
    query_request: ChunkIndexRequest,
    user_id: UUID | None,
@@ -110,7 +125,6 @@ def search_chunks(
            user_id=user_id,
            source_types=list(source_filters) if source_filters else None,
            document_set_names=query_request.filters.document_set,
-            user_file_ids=query_request.filters.user_file_ids,
        )

    federated_sources = set(
@@ -129,21 +143,38 @@ def search_chunks(
    )

    if normal_search_enabled:
-        run_queries.append(
-            (
-                _embed_and_search,
-                (query_request, document_index, db_session, embedding_model),
+        if (
+            query_request.hybrid_alpha is not None
+            and query_request.hybrid_alpha == 0.0
+            and isinstance(document_index, OpenSearchOldDocumentIndex)
+        ):
+            # If hybrid alpha is explicitly set to keyword only, do pure keyword
+            # search without generating an embedding. This is currently only
+            # supported with OpenSearchDocumentIndex.
+            opensearch_new_document_index: NewDocumentIndex = document_index._real_index
+            run_queries.append(
+                (
+                    lambda: _keyword_search(
+                        query_request, opensearch_new_document_index
+                    ),
+                    (),
+                )
+            )
+        else:
+            run_queries.append(
+                (
+                    _embed_and_hybrid_search,
+                    (query_request, document_index, db_session, embedding_model),
+                )
            )
-        )

    parallel_search_results = run_functions_tuples_in_parallel(run_queries)
    top_chunks = combine_retrieval_results(parallel_search_results)

    if not top_chunks:
        logger.debug(
-            f"Hybrid search returned no results for query: {query_request.query}with filters: {query_request.filters}"
+            f"Search returned no results for query: {query_request.query} with filters: {query_request.filters}."
        )
-        return []

    return top_chunks

--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -16,6 +16,7 @@ from sqlalchemy import Row
 from sqlalchemy import select
 from sqlalchemy import update
 from sqlalchemy.exc import MultipleResultsFound
+from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

@@ -28,6 +29,7 @@ from onyx.db.models import ChatMessage
 from onyx.db.models import ChatMessage__SearchDoc
 from onyx.db.models import ChatSession
 from onyx.db.models import ChatSessionSharedStatus
+from onyx.db.models import Persona
 from onyx.db.models import SearchDoc as DBSearchDoc
 from onyx.db.models import ToolCall
 from onyx.db.models import User
@@ -53,9 +55,22 @@ def get_chat_session_by_id(
    db_session: Session,
    include_deleted: bool = False,
    is_shared: bool = False,
+    eager_load_persona: bool = False,
 ) -> ChatSession:
    stmt = select(ChatSession).where(ChatSession.id == chat_session_id)

+    if eager_load_persona:
+        stmt = stmt.options(
+            joinedload(ChatSession.persona).options(
+                selectinload(Persona.tools),
+                selectinload(Persona.user_files),
+                selectinload(Persona.document_sets),
+                selectinload(Persona.attached_documents),
+                selectinload(Persona.hierarchy_nodes),
+            ),
+            joinedload(ChatSession.project),
+        )
+
    if is_shared:
        stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC)
    else:
--- a/backend/onyx/db/connector_credential_pair.py
+++ b/backend/onyx/db/connector_credential_pair.py
@@ -511,7 +511,7 @@ def add_credential_to_connector(
    user: User,
    connector_id: int,
    credential_id: int,
-    cc_pair_name: str | None,
+    cc_pair_name: str,
    access_type: AccessType,
    groups: list[int] | None,
    auto_sync_options: dict | None = None,
@@ -750,3 +750,31 @@ def resync_cc_pair(
    )

    db_session.commit()
+
+
+# ── Metrics query helpers ──────────────────────────────────────────────
+
+
+def get_connector_health_for_metrics(
+    db_session: Session,
+) -> list:  # Returns list of Row tuples
+    """Return connector health data for Prometheus metrics.
+
+    Each row is (cc_pair_id, status, in_repeated_error_state,
+    last_successful_index_time, name, source).
+    """
+    return (
+        db_session.query(
+            ConnectorCredentialPair.id,
+            ConnectorCredentialPair.status,
+            ConnectorCredentialPair.in_repeated_error_state,
+            ConnectorCredentialPair.last_successful_index_time,
+            ConnectorCredentialPair.name,
+            Connector.source,
+        )
+        .join(
+            Connector,
+            ConnectorCredentialPair.connector_id == Connector.id,
+        )
+        .all()
+    )
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -1,4 +1,31 @@
+from __future__ import annotations
+
 from enum import Enum as PyEnum
+from typing import ClassVar
+
+
+class AccountType(str, PyEnum):
+    """
+    What kind of account this is — determines whether the user
+    enters the group-based permission system.
+
+    STANDARD + SERVICE_ACCOUNT → participate in group system
+    BOT, EXT_PERM_USER, ANONYMOUS → fixed behavior
+    """
+
+    STANDARD = "standard"
+    BOT = "bot"
+    EXT_PERM_USER = "ext_perm_user"
+    SERVICE_ACCOUNT = "service_account"
+    ANONYMOUS = "anonymous"
+
+
+class GrantSource(str, PyEnum):
+    """How a permission grant was created."""
+
+    USER = "user"
+    SCIM = "scim"
+    SYSTEM = "system"


 class IndexingStatus(str, PyEnum):
@@ -304,3 +331,64 @@ class LLMModelFlowType(str, PyEnum):
    CHAT = "chat"
    VISION = "vision"
    CONTEXTUAL_RAG = "contextual_rag"
+
+
+class HookPoint(str, PyEnum):
+    DOCUMENT_INGESTION = "document_ingestion"
+    QUERY_PROCESSING = "query_processing"
+
+
+class HookFailStrategy(str, PyEnum):
+    HARD = "hard"  # exception propagates, pipeline aborts
+    SOFT = "soft"  # log error, return original input, pipeline continues
+
+
+class Permission(str, PyEnum):
+    """
+    Permission tokens for group-based authorization.
+    19 tokens total. full_admin_panel_access is an override —
+    if present, any permission check passes.
+    """
+
+    # Basic (auto-granted to every new group)
+    BASIC_ACCESS = "basic"
+
+    # Read tokens — implied only, never granted directly
+    READ_CONNECTORS = "read:connectors"
+    READ_DOCUMENT_SETS = "read:document_sets"
+    READ_AGENTS = "read:agents"
+    READ_USERS = "read:users"
+
+    # Add / Manage pairs
+    ADD_AGENTS = "add:agents"
+    MANAGE_AGENTS = "manage:agents"
+    MANAGE_DOCUMENT_SETS = "manage:document_sets"
+    ADD_CONNECTORS = "add:connectors"
+    MANAGE_CONNECTORS = "manage:connectors"
+    MANAGE_LLMS = "manage:llms"
+
+    # Toggle tokens
+    READ_AGENT_ANALYTICS = "read:agent_analytics"
+    MANAGE_ACTIONS = "manage:actions"
+    READ_QUERY_HISTORY = "read:query_history"
+    MANAGE_USER_GROUPS = "manage:user_groups"
+    CREATE_USER_API_KEYS = "create:user_api_keys"
+    CREATE_SERVICE_ACCOUNT_API_KEYS = "create:service_account_api_keys"
+    CREATE_SLACK_DISCORD_BOTS = "create:slack_discord_bots"
+
+    # Override — any permission check passes
+    FULL_ADMIN_PANEL_ACCESS = "admin"
+
+    # Permissions that are implied by other grants and must never be stored
+    # directly in the permission_grant table.
+    IMPLIED: ClassVar[frozenset[Permission]]
+
+
+Permission.IMPLIED = frozenset(
+    {
+        Permission.READ_CONNECTORS,
+        Permission.READ_DOCUMENT_SETS,
+        Permission.READ_AGENTS,
+        Permission.READ_USERS,
+    }
+)
--- a/backend/onyx/db/hook.py
+++ b/backend/onyx/db/hook.py
@@ -0,0 +1,235 @@
+import datetime
+from uuid import UUID
+
+from sqlalchemy import delete
+from sqlalchemy import select
+from sqlalchemy.engine import CursorResult
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.orm import selectinload
+from sqlalchemy.orm import Session
+
+from onyx.db.constants import UNSET
+from onyx.db.constants import UnsetType
+from onyx.db.enums import HookFailStrategy
+from onyx.db.enums import HookPoint
+from onyx.db.models import Hook
+from onyx.db.models import HookExecutionLog
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+
+
+# ── Hook CRUD ────────────────────────────────────────────────────────────
+
+
+def get_hook_by_id(
+    *,
+    db_session: Session,
+    hook_id: int,
+    include_deleted: bool = False,
+    include_creator: bool = False,
+) -> Hook | None:
+    stmt = select(Hook).where(Hook.id == hook_id)
+    if not include_deleted:
+        stmt = stmt.where(Hook.deleted.is_(False))
+    if include_creator:
+        stmt = stmt.options(selectinload(Hook.creator))
+    return db_session.scalar(stmt)
+
+
+def get_non_deleted_hook_by_hook_point(
+    *,
+    db_session: Session,
+    hook_point: HookPoint,
+    include_creator: bool = False,
+) -> Hook | None:
+    stmt = (
+        select(Hook).where(Hook.hook_point == hook_point).where(Hook.deleted.is_(False))
+    )
+    if include_creator:
+        stmt = stmt.options(selectinload(Hook.creator))
+    return db_session.scalar(stmt)
+
+
+def get_hooks(
+    *,
+    db_session: Session,
+    include_deleted: bool = False,
+    include_creator: bool = False,
+) -> list[Hook]:
+    stmt = select(Hook)
+    if not include_deleted:
+        stmt = stmt.where(Hook.deleted.is_(False))
+    if include_creator:
+        stmt = stmt.options(selectinload(Hook.creator))
+    stmt = stmt.order_by(Hook.hook_point, Hook.created_at.desc())
+    return list(db_session.scalars(stmt).all())
+
+
+def create_hook__no_commit(
+    *,
+    db_session: Session,
+    name: str,
+    hook_point: HookPoint,
+    endpoint_url: str | None = None,
+    api_key: str | None = None,
+    fail_strategy: HookFailStrategy,
+    timeout_seconds: float,
+    is_active: bool = False,
+    is_reachable: bool | None = None,
+    creator_id: UUID | None = None,
+) -> Hook:
+    """Create a new hook for the given hook point.
+
+    At most one non-deleted hook per hook point is allowed. Raises
+    OnyxError(CONFLICT) if a hook already exists, including under concurrent
+    duplicate creates where the partial unique index fires an IntegrityError.
+    """
+    existing = get_non_deleted_hook_by_hook_point(
+        db_session=db_session, hook_point=hook_point
+    )
+    if existing:
+        raise OnyxError(
+            OnyxErrorCode.CONFLICT,
+            f"A hook for '{hook_point.value}' already exists (id={existing.id}).",
+        )
+
+    hook = Hook(
+        name=name,
+        hook_point=hook_point,
+        endpoint_url=endpoint_url,
+        api_key=api_key,
+        fail_strategy=fail_strategy,
+        timeout_seconds=timeout_seconds,
+        is_active=is_active,
+        is_reachable=is_reachable,
+        creator_id=creator_id,
+    )
+    # Use a savepoint so that a failed insert only rolls back this operation,
+    # not the entire outer transaction.
+    savepoint = db_session.begin_nested()
+    try:
+        db_session.add(hook)
+        savepoint.commit()
+    except IntegrityError as exc:
+        savepoint.rollback()
+        if "ix_hook_one_non_deleted_per_point" in str(exc.orig):
+            raise OnyxError(
+                OnyxErrorCode.CONFLICT,
+                f"A hook for '{hook_point.value}' already exists.",
+            )
+        raise  # re-raise unrelated integrity errors (FK violations, etc.)
+    return hook
+
+
+def update_hook__no_commit(
+    *,
+    db_session: Session,
+    hook_id: int,
+    name: str | None = None,
+    endpoint_url: str | None | UnsetType = UNSET,
+    api_key: str | None | UnsetType = UNSET,
+    fail_strategy: HookFailStrategy | None = None,
+    timeout_seconds: float | None = None,
+    is_active: bool | None = None,
+    is_reachable: bool | None = None,
+    include_creator: bool = False,
+) -> Hook:
+    """Update hook fields.
+
+    Sentinel conventions:
+    - endpoint_url, api_key: pass UNSET to leave unchanged; pass None to clear.
+    - name, fail_strategy, timeout_seconds, is_active, is_reachable: pass None to leave unchanged.
+    """
+    hook = get_hook_by_id(
+        db_session=db_session, hook_id=hook_id, include_creator=include_creator
+    )
+    if hook is None:
+        raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")
+
+    if name is not None:
+        hook.name = name
+    if not isinstance(endpoint_url, UnsetType):
+        hook.endpoint_url = endpoint_url
+    if not isinstance(api_key, UnsetType):
+        hook.api_key = api_key  # type: ignore[assignment]  # EncryptedString coerces str → SensitiveValue at the ORM level
+    if fail_strategy is not None:
+        hook.fail_strategy = fail_strategy
+    if timeout_seconds is not None:
+        hook.timeout_seconds = timeout_seconds
+    if is_active is not None:
+        hook.is_active = is_active
+    if is_reachable is not None:
+        hook.is_reachable = is_reachable
+
+    db_session.flush()
+    return hook
+
+
+def delete_hook__no_commit(
+    *,
+    db_session: Session,
+    hook_id: int,
+) -> None:
+    hook = get_hook_by_id(db_session=db_session, hook_id=hook_id)
+    if hook is None:
+        raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Hook with id {hook_id} not found.")
+
+    hook.deleted = True
+    hook.is_active = False
+    db_session.flush()
+
+
+# ── HookExecutionLog CRUD ────────────────────────────────────────────────
+
+
+def create_hook_execution_log__no_commit(
+    *,
+    db_session: Session,
+    hook_id: int,
+    is_success: bool,
+    error_message: str | None = None,
+    status_code: int | None = None,
+    duration_ms: int | None = None,
+) -> HookExecutionLog:
+    log = HookExecutionLog(
+        hook_id=hook_id,
+        is_success=is_success,
+        error_message=error_message,
+        status_code=status_code,
+        duration_ms=duration_ms,
+    )
+    db_session.add(log)
+    db_session.flush()
+    return log
+
+
+def get_hook_execution_logs(
+    *,
+    db_session: Session,
+    hook_id: int,
+    limit: int,
+) -> list[HookExecutionLog]:
+    stmt = (
+        select(HookExecutionLog)
+        .where(HookExecutionLog.hook_id == hook_id)
+        .order_by(HookExecutionLog.created_at.desc())
+        .limit(limit)
+    )
+    return list(db_session.scalars(stmt).all())
+
+
+def cleanup_old_execution_logs__no_commit(
+    *,
+    db_session: Session,
+    max_age_days: int,
+) -> int:
+    """Delete execution logs older than max_age_days. Returns the number of rows deleted."""
+    cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
+        days=max_age_days
+    )
+    result: CursorResult = db_session.execute(  # type: ignore[assignment]
+        delete(HookExecutionLog)
+        .where(HookExecutionLog.created_at < cutoff)
+        .execution_options(synchronize_session=False)
+    )
+    return result.rowcount
--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -2,6 +2,8 @@ from collections.abc import Sequence
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
+from typing import NamedTuple
+from typing import TYPE_CHECKING
 from typing import TypeVarTuple

 from sqlalchemy import and_
@@ -28,6 +30,9 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType

+if TYPE_CHECKING:
+    from onyx.configs.constants import DocumentSource
+
 # from sqlalchemy.sql.selectable import Select

 # Comment out unused imports that cause mypy errors
@@ -583,6 +588,67 @@ def get_latest_index_attempt_for_cc_pair_id(
    return db_session.execute(stmt).scalar_one_or_none()


+def get_latest_successful_index_attempt_for_cc_pair_id(
+    db_session: Session,
+    connector_credential_pair_id: int,
+    secondary_index: bool = False,
+) -> IndexAttempt | None:
+    """Returns the most recent successful index attempt for the given cc pair,
+    filtered to the current (or future) search settings.
+    Uses MAX(id) semantics to match get_latest_index_attempts_by_status."""
+    status = IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
+    stmt = (
+        select(IndexAttempt)
+        .where(
+            IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
+            IndexAttempt.status.in_(
+                [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
+            ),
+        )
+        .join(SearchSettings)
+        .where(SearchSettings.status == status)
+        .order_by(desc(IndexAttempt.id))
+        .limit(1)
+    )
+    return db_session.execute(stmt).scalar_one_or_none()
+
+
+def get_latest_successful_index_attempts_parallel(
+    secondary_index: bool = False,
+) -> Sequence[IndexAttempt]:
+    """Batch version: returns the latest successful index attempt per cc pair.
+    Covers both SUCCESS and COMPLETED_WITH_ERRORS (matching is_successful())."""
+    model_status = (
+        IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
+    )
+    with get_session_with_current_tenant() as db_session:
+        latest_ids = (
+            select(
+                IndexAttempt.connector_credential_pair_id,
+                func.max(IndexAttempt.id).label("max_id"),
+            )
+            .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
+            .where(
+                SearchSettings.status == model_status,
+                IndexAttempt.status.in_(
+                    [IndexingStatus.SUCCESS, IndexingStatus.COMPLETED_WITH_ERRORS]
+                ),
+            )
+            .group_by(IndexAttempt.connector_credential_pair_id)
+            .subquery()
+        )
+
+        stmt = select(IndexAttempt).join(
+            latest_ids,
+            (
+                IndexAttempt.connector_credential_pair_id
+                == latest_ids.c.connector_credential_pair_id
+            )
+            & (IndexAttempt.id == latest_ids.c.max_id),
+        )
+        return db_session.execute(stmt).scalars().all()
+
+
 def count_index_attempts_for_cc_pair(
    db_session: Session,
    cc_pair_id: int,
@@ -911,3 +977,106 @@ def get_index_attempt_errors_for_cc_pair(
        stmt = stmt.offset(page * page_size).limit(page_size)

    return list(db_session.scalars(stmt).all())
+
+
+# ── Metrics query helpers ──────────────────────────────────────────────
+
+
+class ActiveIndexAttemptMetric(NamedTuple):
+    """Row returned by get_active_index_attempts_for_metrics."""
+
+    status: IndexingStatus
+    source: "DocumentSource"
+    cc_pair_id: int
+    cc_pair_name: str | None
+    attempt_count: int
+
+
+def get_active_index_attempts_for_metrics(
+    db_session: Session,
+) -> list[ActiveIndexAttemptMetric]:
+    """Return non-terminal index attempts grouped by status, source, and connector.
+
+    Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
+    """
+    from onyx.db.models import Connector
+
+    terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
+    rows = (
+        db_session.query(
+            IndexAttempt.status,
+            Connector.source,
+            ConnectorCredentialPair.id,
+            ConnectorCredentialPair.name,
+            func.count(),
+        )
+        .join(
+            ConnectorCredentialPair,
+            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
+        )
+        .join(
+            Connector,
+            ConnectorCredentialPair.connector_id == Connector.id,
+        )
+        .filter(IndexAttempt.status.notin_(terminal_statuses))
+        .group_by(
+            IndexAttempt.status,
+            Connector.source,
+            ConnectorCredentialPair.id,
+            ConnectorCredentialPair.name,
+        )
+        .all()
+    )
+    return [ActiveIndexAttemptMetric(*row) for row in rows]
+
+
+def get_failed_attempt_counts_by_cc_pair(
+    db_session: Session,
+    since: datetime | None = None,
+) -> dict[int, int]:
+    """Return {cc_pair_id: failed_attempt_count} for all connectors.
+
+    When ``since`` is provided, only attempts created after that timestamp
+    are counted. Defaults to the last 90 days to avoid unbounded historical
+    aggregation.
+    """
+    if since is None:
+        since = datetime.now(timezone.utc) - timedelta(days=90)
+
+    rows = (
+        db_session.query(
+            IndexAttempt.connector_credential_pair_id,
+            func.count(),
+        )
+        .filter(IndexAttempt.status == IndexingStatus.FAILED)
+        .filter(IndexAttempt.time_created >= since)
+        .group_by(IndexAttempt.connector_credential_pair_id)
+        .all()
+    )
+    return {cc_id: count for cc_id, count in rows}
+
+
+def get_docs_indexed_by_cc_pair(
+    db_session: Session,
+    since: datetime | None = None,
+) -> dict[int, int]:
+    """Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
+
+    Only counts attempts with status SUCCESS to avoid inflating counts with
+    partial results from failed attempts. When ``since`` is provided, only
+    attempts created after that timestamp are included.
+    """
+    if since is None:
+        since = datetime.now(timezone.utc) - timedelta(days=90)
+
+    query = (
+        db_session.query(
+            IndexAttempt.connector_credential_pair_id,
+            func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
+        )
+        .filter(IndexAttempt.status == IndexingStatus.SUCCESS)
+        .filter(IndexAttempt.time_created >= since)
+        .group_by(IndexAttempt.connector_credential_pair_id)
+    )
+    rows = query.all()
+    return {cc_id: int(total or 0) for cc_id, total in rows}
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -48,6 +48,7 @@ from sqlalchemy.types import LargeBinary
 from sqlalchemy.types import TypeDecorator
 from sqlalchemy import PrimaryKeyConstraint

+from onyx.db.enums import AccountType
 from onyx.auth.schemas import UserRole
 from onyx.configs.constants import (
    ANONYMOUS_USER_UUID,
@@ -64,6 +65,8 @@ from onyx.db.enums import (
    BuildSessionStatus,
    EmbeddingPrecision,
    HierarchyNodeType,
+    HookFailStrategy,
+    HookPoint,
    IndexingMode,
    OpenSearchDocumentMigrationStatus,
    OpenSearchTenantMigrationStatus,
@@ -76,6 +79,8 @@ from onyx.db.enums import (
    MCPAuthenticationPerformer,
    MCPTransport,
    MCPServerStatus,
+    Permission,
+    GrantSource,
    LLMModelFlowType,
    ThemePreference,
    DefaultAppMode,
@@ -300,6 +305,9 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
    role: Mapped[UserRole] = mapped_column(
        Enum(UserRole, native_enum=False, default=UserRole.BASIC)
    )
+    account_type: Mapped[AccountType | None] = mapped_column(
+        Enum(AccountType, native_enum=False), nullable=True
+    )

    """
    Preferences probably should be in a separate table at some point, but for now
@@ -2643,6 +2651,15 @@ class ChatMessage(Base):
        nullable=True,
    )

+    # For multi-model turns: the user message points to which assistant response
+    # was selected as the preferred one to continue the conversation with.
+    preferred_response_id: Mapped[int | None] = mapped_column(
+        ForeignKey("chat_message.id", ondelete="SET NULL"), nullable=True
+    )
+
+    # The display name of the model that generated this assistant message
+    model_display_name: Mapped[str | None] = mapped_column(String, nullable=True)
+
    # What does this message contain
    reasoning_tokens: Mapped[str | None] = mapped_column(Text, nullable=True)
    message: Mapped[str] = mapped_column(Text)
@@ -2710,6 +2727,12 @@ class ChatMessage(Base):
        remote_side="ChatMessage.id",
    )

+    preferred_response: Mapped["ChatMessage | None"] = relationship(
+        "ChatMessage",
+        foreign_keys=[preferred_response_id],
+        remote_side="ChatMessage.id",
+    )
+
    # Chat messages only need to know their immediate tool call children
    # If there are nested tool calls, they are stored in the tool_call_children relationship.
    tool_calls: Mapped[list["ToolCall"] | None] = relationship(
@@ -3112,8 +3135,6 @@ class VoiceProvider(Base):
    is_default_stt: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    is_default_tts: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

-    deleted: Mapped[bool] = mapped_column(Boolean, default=False)
-
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
@@ -3465,9 +3486,9 @@ class Persona(Base):
    builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False)

    # Featured personas are highlighted in the UI
-    featured: Mapped[bool] = mapped_column(Boolean, default=False)
-    # controls whether the persona is available to be selected by users
-    is_visible: Mapped[bool] = mapped_column(Boolean, default=True)
+    is_featured: Mapped[bool] = mapped_column(Boolean, default=False)
+    # controls whether the persona is listed in user-facing agent lists
+    is_listed: Mapped[bool] = mapped_column(Boolean, default=True)
    # controls the ordering of personas in the UI
    # higher priority personas are displayed first, ties are resolved by the ID,
    # where lower value IDs (e.g. created earlier) are displayed first
@@ -3969,6 +3990,8 @@ class SamlAccount(Base):
 class User__UserGroup(Base):
    __tablename__ = "user__user_group"

+    __table_args__ = (Index("ix_user__user_group_user_id", "user_id"),)
+
    is_curator: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    user_group_id: Mapped[int] = mapped_column(
@@ -3979,6 +4002,48 @@ class User__UserGroup(Base):
    )


+class PermissionGrant(Base):
+    __tablename__ = "permission_grant"
+
+    __table_args__ = (
+        UniqueConstraint(
+            "group_id", "permission", name="uq_permission_grant_group_permission"
+        ),
+    )
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    group_id: Mapped[int] = mapped_column(
+        ForeignKey("user_group.id", ondelete="CASCADE"), nullable=False
+    )
+    permission: Mapped[Permission] = mapped_column(
+        Enum(Permission, native_enum=False), nullable=False
+    )
+    grant_source: Mapped[GrantSource] = mapped_column(
+        Enum(GrantSource, native_enum=False), nullable=False
+    )
+    granted_by: Mapped[UUID | None] = mapped_column(
+        ForeignKey("user.id", ondelete="SET NULL"), nullable=True
+    )
+    granted_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    is_deleted: Mapped[bool] = mapped_column(
+        Boolean, nullable=False, default=False, server_default=text("false")
+    )
+
+    group: Mapped["UserGroup"] = relationship(
+        "UserGroup", back_populates="permission_grants"
+    )
+
+    @validates("permission")
+    def _validate_permission(self, _key: str, value: Permission) -> Permission:
+        if value in Permission.IMPLIED:
+            raise ValueError(
+                f"{value!r} is an implied permission and cannot be granted directly"
+            )
+        return value
+
+
 class UserGroup__ConnectorCredentialPair(Base):
    __tablename__ = "user_group__connector_credential_pair"

@@ -4073,6 +4138,8 @@ class UserGroup(Base):
    is_up_for_deletion: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
+    # whether this is a default group (e.g. "Basic", "Admins") that cannot be deleted
+    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    # Last time a user updated this user group
    time_last_modified_by_user: Mapped[datetime.datetime] = mapped_column(
@@ -4116,6 +4183,9 @@ class UserGroup(Base):
    accessible_mcp_servers: Mapped[list["MCPServer"]] = relationship(
        "MCPServer", secondary="mcp_server__user_group", back_populates="user_groups"
    )
+    permission_grants: Mapped[list["PermissionGrant"]] = relationship(
+        "PermissionGrant", back_populates="group", cascade="all, delete-orphan"
+    )


 """Tables related to Token Rate Limiting
@@ -5178,3 +5248,90 @@ class CacheStore(Base):
    expires_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
+
+
+class Hook(Base):
+    """Pairs a HookPoint with a customer-provided API endpoint.
+
+    At most one non-deleted Hook per HookPoint is allowed, enforced by a
+    partial unique index on (hook_point) where deleted=false.
+    """
+
+    __tablename__ = "hook"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    name: Mapped[str] = mapped_column(String, nullable=False)
+    hook_point: Mapped[HookPoint] = mapped_column(
+        Enum(HookPoint, native_enum=False), nullable=False
+    )
+    endpoint_url: Mapped[str | None] = mapped_column(Text, nullable=True)
+    api_key: Mapped[SensitiveValue[str] | None] = mapped_column(
+        EncryptedString(), nullable=True
+    )
+    is_reachable: Mapped[bool | None] = mapped_column(
+        Boolean, nullable=True, default=None
+    )  # null = never validated, true = last check passed, false = last check failed
+    fail_strategy: Mapped[HookFailStrategy] = mapped_column(
+        Enum(HookFailStrategy, native_enum=False),
+        nullable=False,
+        default=HookFailStrategy.HARD,
+    )
+    timeout_seconds: Mapped[float] = mapped_column(Float, nullable=False, default=30.0)
+    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    deleted: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    creator_id: Mapped[UUID | None] = mapped_column(
+        PGUUID(as_uuid=True),
+        ForeignKey("user.id", ondelete="SET NULL"),
+        nullable=True,
+    )
+    created_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True),
+        server_default=func.now(),
+        onupdate=func.now(),
+        nullable=False,
+    )
+
+    creator: Mapped["User | None"] = relationship("User", foreign_keys=[creator_id])
+    execution_logs: Mapped[list["HookExecutionLog"]] = relationship(
+        "HookExecutionLog", back_populates="hook", cascade="all, delete-orphan"
+    )
+
+    __table_args__ = (
+        Index(
+            "ix_hook_one_non_deleted_per_point",
+            "hook_point",
+            unique=True,
+            postgresql_where=(deleted == False),  # noqa: E712
+        ),
+    )
+
+
+class HookExecutionLog(Base):
+    """Records hook executions for health monitoring and debugging.
+
+    Currently only failures are logged; the is_success column exists so
+    success logging can be added later without a schema change.
+    Retention: rows older than 30 days are deleted by a nightly Celery task.
+    """
+
+    __tablename__ = "hook_execution_log"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    hook_id: Mapped[int] = mapped_column(
+        Integer,
+        ForeignKey("hook.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    is_success: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    status_code: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    created_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
+    )
+
+    hook: Mapped["Hook"] = relationship("Hook", back_populates="execution_logs")
--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -50,8 +50,18 @@ from onyx.utils.variable_functionality import fetch_versioned_implementation
 logger = setup_logger()


-def get_default_behavior_persona(db_session: Session) -> Persona | None:
+def get_default_behavior_persona(
+    db_session: Session,
+    eager_load_for_tools: bool = False,
+) -> Persona | None:
    stmt = select(Persona).where(Persona.id == DEFAULT_PERSONA_ID)
+    if eager_load_for_tools:
+        stmt = stmt.options(
+            selectinload(Persona.tools),
+            selectinload(Persona.document_sets),
+            selectinload(Persona.attached_documents),
+            selectinload(Persona.hierarchy_nodes),
+        )
    return db_session.scalars(stmt).first()


@@ -126,7 +136,7 @@ def _add_user_filters(
    else:
        # Group the public persona conditions
        public_condition = (Persona.is_public == True) & (  # noqa: E712
-            Persona.is_visible == True  # noqa: E712
+            Persona.is_listed == True  # noqa: E712
        )

        where_clause |= public_condition
@@ -260,7 +270,7 @@ def create_update_persona(

    try:
        # Featured persona validation
-        if create_persona_request.featured:
+        if create_persona_request.is_featured:
            # Curators can edit featured personas, but not make them
            # TODO this will be reworked soon with RBAC permissions feature
            if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:
@@ -300,7 +310,7 @@ def create_update_persona(
            remove_image=create_persona_request.remove_image,
            search_start_date=create_persona_request.search_start_date,
            label_ids=create_persona_request.label_ids,
-            featured=create_persona_request.featured,
+            is_featured=create_persona_request.is_featured,
            user_file_ids=converted_user_file_ids,
            commit=False,
            hierarchy_node_ids=create_persona_request.hierarchy_node_ids,
@@ -910,11 +920,11 @@ def upsert_persona(
    uploaded_image_id: str | None = None,
    icon_name: str | None = None,
    display_priority: int | None = None,
-    is_visible: bool = True,
+    is_listed: bool = True,
    remove_image: bool | None = None,
    search_start_date: datetime | None = None,
    builtin_persona: bool = False,
-    featured: bool | None = None,
+    is_featured: bool | None = None,
    label_ids: list[int] | None = None,
    user_file_ids: list[UUID] | None = None,
    hierarchy_node_ids: list[int] | None = None,
@@ -1037,13 +1047,13 @@ def upsert_persona(
        if remove_image or uploaded_image_id:
            existing_persona.uploaded_image_id = uploaded_image_id
        existing_persona.icon_name = icon_name
-        existing_persona.is_visible = is_visible
+        existing_persona.is_listed = is_listed
        existing_persona.search_start_date = search_start_date
        if label_ids is not None:
            existing_persona.labels.clear()
            existing_persona.labels = labels or []
-        existing_persona.featured = (
-            featured if featured is not None else existing_persona.featured
+        existing_persona.is_featured = (
+            is_featured if is_featured is not None else existing_persona.is_featured
        )
        # Update embedded prompt fields if provided
        if system_prompt is not None:
@@ -1109,9 +1119,9 @@ def upsert_persona(
            uploaded_image_id=uploaded_image_id,
            icon_name=icon_name,
            display_priority=display_priority,
-            is_visible=is_visible,
+            is_listed=is_listed,
            search_start_date=search_start_date,
-            featured=(featured if featured is not None else False),
+            is_featured=(is_featured if is_featured is not None else False),
            user_files=user_files or [],
            labels=labels or [],
            hierarchy_nodes=hierarchy_nodes or [],
@@ -1158,7 +1168,7 @@ def delete_old_default_personas(

 def update_persona_featured(
    persona_id: int,
-    featured: bool,
+    is_featured: bool,
    db_session: Session,
    user: User,
 ) -> None:
@@ -1166,13 +1176,13 @@ def update_persona_featured(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

-    persona.featured = featured
+    persona.is_featured = is_featured
    db_session.commit()


 def update_persona_visibility(
    persona_id: int,
-    is_visible: bool,
+    is_listed: bool,
    db_session: Session,
    user: User,
 ) -> None:
@@ -1180,7 +1190,7 @@ def update_persona_visibility(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

-    persona.is_visible = is_visible
+    persona.is_listed = is_listed
    db_session.commit()


--- a/backend/onyx/db/projects.py
+++ b/backend/onyx/db/projects.py
@@ -12,6 +12,7 @@ from sqlalchemy.orm import Session
 from starlette.background import BackgroundTasks

 from onyx.configs.app_configs import DISABLE_VECTOR_DB
+from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
 from onyx.configs.constants import FileOrigin
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
@@ -144,6 +145,7 @@ def upload_files_to_user_files_with_indexing(
                kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
                queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
                priority=OnyxCeleryPriority.HIGH,
+                expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
            )
            logger.info(
                f"Triggered indexing for user_file_id={user_file.id} with task_id={task.id}"
--- a/backend/onyx/db/slack_channel_config.py
+++ b/backend/onyx/db/slack_channel_config.py
@@ -75,7 +75,7 @@ def create_slack_channel_persona(
        llm_model_version_override=None,
        starter_messages=None,
        is_public=True,
-        featured=False,
+        is_featured=False,
        db_session=db_session,
        commit=False,
    )
--- a/backend/onyx/db/swap_index.py
+++ b/backend/onyx/db/swap_index.py
@@ -2,6 +2,7 @@ import time

 from sqlalchemy.orm import Session

+from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
@@ -149,6 +150,9 @@ def check_and_perform_index_swap(db_session: Session) -> SearchSettings | None:
    Returns None if search settings did not change, or the old search settings if they
    did change.
    """
+    if DISABLE_VECTOR_DB:
+        return None
+
    # Default CC-pair created for Ingestion API unused here
    all_cc_pairs = get_connector_credential_pairs(db_session)
    cc_pair_count = max(len(all_cc_pairs) - 1, 0)
--- a/backend/onyx/db/voice.py
+++ b/backend/onyx/db/voice.py
@@ -17,39 +17,30 @@ MAX_VOICE_PLAYBACK_SPEED = 2.0
 def fetch_voice_providers(db_session: Session) -> list[VoiceProvider]:
    """Fetch all voice providers."""
    return list(
-        db_session.scalars(
-            select(VoiceProvider)
-            .where(VoiceProvider.deleted.is_(False))
-            .order_by(VoiceProvider.name)
-        ).all()
+        db_session.scalars(select(VoiceProvider).order_by(VoiceProvider.name)).all()
    )


 def fetch_voice_provider_by_id(
-    db_session: Session, provider_id: int, include_deleted: bool = False
+    db_session: Session, provider_id: int
 ) -> VoiceProvider | None:
    """Fetch a voice provider by ID."""
-    stmt = select(VoiceProvider).where(VoiceProvider.id == provider_id)
-    if not include_deleted:
-        stmt = stmt.where(VoiceProvider.deleted.is_(False))
-    return db_session.scalar(stmt)
+    return db_session.scalar(
+        select(VoiceProvider).where(VoiceProvider.id == provider_id)
+    )


 def fetch_default_stt_provider(db_session: Session) -> VoiceProvider | None:
    """Fetch the default STT provider."""
    return db_session.scalar(
-        select(VoiceProvider)
-        .where(VoiceProvider.is_default_stt.is_(True))
-        .where(VoiceProvider.deleted.is_(False))
+        select(VoiceProvider).where(VoiceProvider.is_default_stt.is_(True))
    )


 def fetch_default_tts_provider(db_session: Session) -> VoiceProvider | None:
    """Fetch the default TTS provider."""
    return db_session.scalar(
-        select(VoiceProvider)
-        .where(VoiceProvider.is_default_tts.is_(True))
-        .where(VoiceProvider.deleted.is_(False))
+        select(VoiceProvider).where(VoiceProvider.is_default_tts.is_(True))
    )


@@ -58,9 +49,7 @@ def fetch_voice_provider_by_type(
 ) -> VoiceProvider | None:
    """Fetch a voice provider by type."""
    return db_session.scalar(
-        select(VoiceProvider)
-        .where(VoiceProvider.provider_type == provider_type)
-        .where(VoiceProvider.deleted.is_(False))
+        select(VoiceProvider).where(VoiceProvider.provider_type == provider_type)
    )


@@ -119,10 +108,10 @@ def upsert_voice_provider(


 def delete_voice_provider(db_session: Session, provider_id: int) -> None:
-    """Soft-delete a voice provider by ID."""
+    """Delete a voice provider by ID."""
    provider = fetch_voice_provider_by_id(db_session, provider_id)
    if provider:
-        provider.deleted = True
+        db_session.delete(provider)
        db_session.flush()


--- a/backend/onyx/document_index/FILTER_SEMANTICS.md
+++ b/backend/onyx/document_index/FILTER_SEMANTICS.md
@@ -10,8 +10,8 @@ How `IndexFilters` fields combine into the final query filter. Applies to both V
 | **Tenant** | `tenant_id` | AND (multi-tenant only) |
 | **ACL** | `access_control_list` | OR within, AND with rest |
 | **Narrowing** | `source_type`, `tags`, `time_cutoff` | Each OR within, AND with rest |
-| **Knowledge scope** | `document_set`, `user_file_ids`, `attached_document_ids`, `hierarchy_node_ids` | OR within group, AND with rest |
-| **Additive scope** | `project_id`, `persona_id` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |
+| **Knowledge scope** | `document_set`, `attached_document_ids`, `hierarchy_node_ids`, `persona_id_filter` | OR within group, AND with rest |
+| **Additive scope** | `project_id_filter` | OR'd into knowledge scope **only when** a knowledge scope filter already exists |

 ## How filters combine

@@ -31,12 +31,22 @@ AND time >= cutoff                      -- if set

 The knowledge scope filter controls **what knowledge an assistant can access**.

+### Primary vs additive triggers
+
+- **`persona_id_filter`** is a **primary** trigger. A persona with user files IS explicit
+  knowledge, so `persona_id_filter` alone can start a knowledge scope. Note: this is
+  NOT the raw ID of the persona being used — it is only set when the persona's
+  user files overflowed the LLM context window.
+- **`project_id_filter`** is **additive**. It widens an existing scope to include project
+  files but never restricts on its own — a chat inside a project should still search
+  team knowledge when no other knowledge is attached.
+
 ### No explicit knowledge attached

-When `document_set`, `user_file_ids`, `attached_document_ids`, and `hierarchy_node_ids` are all empty/None:
+When `document_set`, `attached_document_ids`, `hierarchy_node_ids`, and `persona_id_filter` are all empty/None:

 - **No knowledge scope filter is applied.** The assistant can see everything (subject to ACL).
- `project_id` and `persona_id` are ignored — they never restrict on their own.
+- `project_id_filter` is ignored — it never restricts on its own.

 ### One explicit knowledge type

@@ -44,39 +54,40 @@ When `document_set`, `user_file_ids`, `attached_document_ids`, and `hierarchy_no
 -- Only document sets
 AND (document_sets contains "Engineering" OR document_sets contains "Legal")

-- Only user files
-AND (document_id = "uuid-1" OR document_id = "uuid-2")
+-- Only persona user files (overflowed context)
+AND (personas contains 42)
 ```

 ### Multiple explicit knowledge types (OR'd)

 ```
-- Document sets + user files
-AND (
-    document_sets contains "Engineering"
-    OR document_id = "uuid-1"
-)
-```
-
-### Explicit knowledge + overflowing user files
-
-When an explicit knowledge restriction is in effect **and** `project_id` or `persona_id` is set (user files overflowed the LLM context window), the additive scopes widen the filter:
-
-```
-- Document sets + persona user files overflowed
+-- Document sets + persona user files
 AND (
    document_sets contains "Engineering"
    OR personas contains 42
 )
+```

-- User files + project files overflowed
+### Explicit knowledge + overflowing project files
+
+When an explicit knowledge restriction is in effect **and** `project_id_filter` is set (project files overflowed the LLM context window), `project_id_filter` widens the filter:
+
+```
+-- Document sets + project files overflowed
 AND (
-    document_id = "uuid-1"
+    document_sets contains "Engineering"
+    OR user_project contains 7
+)
+
+-- Persona user files + project files (won't happen in practice;
+-- custom personas ignore project files per the precedence rule)
+AND (
+    personas contains 42
    OR user_project contains 7
 )
 ```

-### Only project_id or persona_id (no explicit knowledge)
+### Only project_id_filter (no explicit knowledge)

 No knowledge scope filter. The assistant searches everything.

@@ -91,11 +102,10 @@ AND (acl contains ...)
 | Filter field | Vespa field | Vespa type | Purpose |
 |---|---|---|---|
 | `document_set` | `document_sets` | `weightedset<string>` | Connector doc sets attached to assistant |
-| `user_file_ids` | `document_id` | `string` | User files uploaded to assistant |
 | `attached_document_ids` | `document_id` | `string` | Documents explicitly attached (OpenSearch only) |
 | `hierarchy_node_ids` | `ancestor_hierarchy_node_ids` | `array<int>` | Folder/space nodes (OpenSearch only) |
-| `project_id` | `user_project` | `array<int>` | Project tag for overflowing user files |
-| `persona_id` | `personas` | `array<int>` | Persona tag for overflowing user files |
+| `persona_id_filter` | `personas` | `array<int>` | Persona tag for overflowing user files (**primary** trigger) |
+| `project_id_filter` | `user_project` | `array<int>` | Project tag for overflowing project files (**additive** only) |
 | `access_control_list` | `access_control_list` | `weightedset<string>` | ACL entries for the requesting user |
 | `source_type` | `source_type` | `string` | Connector source type (e.g. `web`, `jira`) |
 | `tags` | `metadata_list` | `array<string>` | Document metadata tags |
--- a/backend/onyx/document_index/interfaces_new.py
+++ b/backend/onyx/document_index/interfaces_new.py
@@ -381,6 +381,47 @@ class HybridCapable(abc.ABC):
        """
        raise NotImplementedError

+    @abc.abstractmethod
+    def keyword_retrieval(
+        self,
+        query: str,
+        filters: IndexFilters,
+        num_to_retrieve: int,
+    ) -> list[InferenceChunk]:
+        """Runs keyword-only search and returns a list of inference chunks.
+
+        Args:
+            query: User query.
+            filters: Filters for things like permissions, source type, time,
+                etc.
+            num_to_retrieve: Number of highest matching chunks to return.
+
+        Returns:
+            Score-ranked (highest first) list of highest matching chunks.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def semantic_retrieval(
+        self,
+        query_embedding: Embedding,
+        filters: IndexFilters,
+        num_to_retrieve: int,
+    ) -> list[InferenceChunk]:
+        """Runs semantic-only search and returns a list of inference chunks.
+
+        Args:
+            query_embedding: Vector representation of the query. Must be of the
+                correct dimensionality for the primary index.
+            filters: Filters for things like permissions, source type, time,
+                etc.
+            num_to_retrieve: Number of highest matching chunks to return.
+
+        Returns:
+            Score-ranked (highest first) list of highest matching chunks.
+        """
+        raise NotImplementedError
+

 class RandomCapable(abc.ABC):
    """
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import time
 from contextlib import AbstractContextManager
@@ -17,9 +18,13 @@ from onyx.configs.app_configs import OPENSEARCH_ADMIN_USERNAME
 from onyx.configs.app_configs import OPENSEARCH_HOST
 from onyx.configs.app_configs import OPENSEARCH_REST_API_PORT
 from onyx.document_index.interfaces_new import TenantState
+from onyx.document_index.opensearch.constants import OpenSearchSearchType
 from onyx.document_index.opensearch.schema import DocumentChunk
+from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
 from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
 from onyx.document_index.opensearch.search import DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW
+from onyx.server.metrics.opensearch_search import observe_opensearch_search
+from onyx.server.metrics.opensearch_search import track_opensearch_search_in_progress
 from onyx.utils.logger import setup_logger
 from onyx.utils.timing import log_function_time

@@ -56,8 +61,8 @@ class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
    # Maps schema property name to a list of highlighted snippets with match
    # terms wrapped in tags (e.g. "something <hi>keyword</hi> other thing").
    match_highlights: dict[str, list[str]] = {}
-    # Score explanation from OpenSearch when "explain": true is set in the query.
-    # Contains detailed breakdown of how the score was calculated.
+    # Score explanation from OpenSearch when "explain": true is set in the
+    # query. Contains detailed breakdown of how the score was calculated.
    explanation: dict[str, Any] | None = None


@@ -254,7 +259,6 @@ class OpenSearchClient(AbstractContextManager):
        """
        return self._client.ping()

-    @log_function_time(print_only=True, debug_only=True)
    def close(self) -> None:
        """Closes the client.

@@ -302,6 +306,7 @@ class OpenSearchIndexClient(OpenSearchClient):
        verify_certs: bool = False,
        ssl_show_warn: bool = False,
        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
+        emit_metrics: bool = True,
    ):
        super().__init__(
            host=host,
@@ -313,6 +318,7 @@ class OpenSearchIndexClient(OpenSearchClient):
            timeout=timeout,
        )
        self._index_name = index_name
+        self._emit_metrics = emit_metrics
        logger.debug(
            f"OpenSearch client created successfully for index {self._index_name}."
        )
@@ -832,10 +838,17 @@ class OpenSearchIndexClient(OpenSearchClient):

    @log_function_time(print_only=True, debug_only=True)
    def search(
-        self, body: dict[str, Any], search_pipeline_id: str | None
-    ) -> list[SearchHit[DocumentChunk]]:
+        self,
+        body: dict[str, Any],
+        search_pipeline_id: str | None,
+        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
+    ) -> list[SearchHit[DocumentChunkWithoutVectors]]:
        """Searches the index.

+        NOTE: Does not return vector fields. In order to take advantage of
+        performance benefits, the search body should exclude the schema's vector
+        fields.
+
        TODO(andrei): Ideally we could check that every field in the body is
        present in the index, to avoid a class of runtime bugs that could easily
        be caught during development. Or change the function signature to accept
@@ -846,6 +859,8 @@ class OpenSearchIndexClient(OpenSearchClient):
                documentation for more information on search request bodies.
            search_pipeline_id: The ID of the search pipeline to use. If None,
                the default search pipeline will be used.
+            search_type: Label for Prometheus metrics. Does not affect search
+                behavior.

        Raises:
            Exception: There was an error searching the index.
@@ -858,21 +873,27 @@ class OpenSearchIndexClient(OpenSearchClient):
        )
        result: dict[str, Any]
        params = {"phase_took": "true"}
-        if search_pipeline_id:
-            result = self._client.search(
-                index=self._index_name,
-                search_pipeline=search_pipeline_id,
-                body=body,
-                params=params,
-            )
-        else:
-            result = self._client.search(
-                index=self._index_name, body=body, params=params
-            )
+        ctx = self._get_emit_metrics_context_manager(search_type)
+        t0 = time.perf_counter()
+        with ctx:
+            if search_pipeline_id:
+                result = self._client.search(
+                    index=self._index_name,
+                    search_pipeline=search_pipeline_id,
+                    body=body,
+                    params=params,
+                )
+            else:
+                result = self._client.search(
+                    index=self._index_name, body=body, params=params
+                )
+        client_duration_s = time.perf_counter() - t0

        hits, time_took, timed_out, phase_took, profile = (
            self._get_hits_and_profile_from_search_result(result)
        )
+        if self._emit_metrics:
+            observe_opensearch_search(search_type, client_duration_s, time_took)
        self._log_search_result_perf(
            time_took=time_took,
            timed_out=timed_out,
@@ -883,7 +904,7 @@ class OpenSearchIndexClient(OpenSearchClient):
            raise_on_timeout=True,
        )

-        search_hits: list[SearchHit[DocumentChunk]] = []
+        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
        for hit in hits:
            document_chunk_source: dict[str, Any] | None = hit.get("_source")
            if not document_chunk_source:
@@ -893,8 +914,10 @@ class OpenSearchIndexClient(OpenSearchClient):
            document_chunk_score = hit.get("_score", None)
            match_highlights: dict[str, list[str]] = hit.get("highlight", {})
            explanation: dict[str, Any] | None = hit.get("_explanation", None)
-            search_hit = SearchHit[DocumentChunk](
-                document_chunk=DocumentChunk.model_validate(document_chunk_source),
+            search_hit = SearchHit[DocumentChunkWithoutVectors](
+                document_chunk=DocumentChunkWithoutVectors.model_validate(
+                    document_chunk_source
+                ),
                score=document_chunk_score,
                match_highlights=match_highlights,
                explanation=explanation,
@@ -906,7 +929,11 @@ class OpenSearchIndexClient(OpenSearchClient):
        return search_hits

    @log_function_time(print_only=True, debug_only=True)
-    def search_for_document_ids(self, body: dict[str, Any]) -> list[str]:
+    def search_for_document_ids(
+        self,
+        body: dict[str, Any],
+        search_type: OpenSearchSearchType = OpenSearchSearchType.DOCUMENT_IDS,
+    ) -> list[str]:
        """Searches the index and returns only document chunk IDs.

        In order to take advantage of the performance benefits of only returning
@@ -923,6 +950,8 @@ class OpenSearchIndexClient(OpenSearchClient):
                documentation for more information on search request bodies.
                TODO(andrei): Make this a more deep interface; callers shouldn't
                need to know to set _source: False for example.
+            search_type: Label for Prometheus metrics. Does not affect search
+                behavior.

        Raises:
            Exception: There was an error searching the index.
@@ -940,13 +969,19 @@ class OpenSearchIndexClient(OpenSearchClient):
            )

        params = {"phase_took": "true"}
-        result: dict[str, Any] = self._client.search(
-            index=self._index_name, body=body, params=params
-        )
+        ctx = self._get_emit_metrics_context_manager(search_type)
+        t0 = time.perf_counter()
+        with ctx:
+            result: dict[str, Any] = self._client.search(
+                index=self._index_name, body=body, params=params
+            )
+        client_duration_s = time.perf_counter() - t0

        hits, time_took, timed_out, phase_took, profile = (
            self._get_hits_and_profile_from_search_result(result)
        )
+        if self._emit_metrics:
+            observe_opensearch_search(search_type, client_duration_s, time_took)
        self._log_search_result_perf(
            time_took=time_took,
            timed_out=timed_out,
@@ -1055,7 +1090,7 @@ class OpenSearchIndexClient(OpenSearchClient):
                f"Body: {get_new_body_without_vectors(body)}\n"
                f"Search pipeline ID: {search_pipeline_id}\n"
                f"Phase took: {phase_took}\n"
-                f"Profile: {profile}\n"
+                f"Profile: {json.dumps(profile, indent=2)}\n"
            )
        if timed_out:
            error_str = f"OpenSearch client error: Search timed out for index {self._index_name}."
@@ -1063,6 +1098,20 @@ class OpenSearchIndexClient(OpenSearchClient):
            if raise_on_timeout:
                raise RuntimeError(error_str)

+    def _get_emit_metrics_context_manager(
+        self, search_type: OpenSearchSearchType
+    ) -> AbstractContextManager[None]:
+        """
+        Returns a context manager that tracks in-flight OpenSearch searches via
+        a Gauge if emit_metrics is True, otherwise returns a null context
+        manager.
+        """
+        return (
+            track_opensearch_search_in_progress(search_type)
+            if self._emit_metrics
+            else nullcontext()
+        )
+

 def wait_for_opensearch_with_timeout(
    wait_interval_s: int = 5,
--- a/backend/onyx/document_index/opensearch/constants.py
+++ b/backend/onyx/document_index/opensearch/constants.py
@@ -1,12 +1,23 @@
 # Default value for the maximum number of tokens a chunk can hold, if none is
 # specified when creating an index.
-from onyx.configs.app_configs import (
-    OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES,
-)
+import os
+from enum import Enum


 DEFAULT_MAX_CHUNK_SIZE = 512

+
+# By default OpenSearch will only return a maximum of this many results in a
+# given search. This value is configurable in the index settings.
+DEFAULT_OPENSEARCH_MAX_RESULT_WINDOW = 10_000
+
+
+# For documents which do not have a value for LAST_UPDATED_FIELD_NAME, we assume
+# that the document was last updated this many days ago for the purpose of time
+# cutoff filtering during retrieval.
+ASSUMED_DOCUMENT_AGE_DAYS = 90
+
+
 # Size of the dynamic list used to consider elements during kNN graph creation.
 # Higher values improve search quality but increase indexing time. Values
 # typically range between 100 - 512.
@@ -26,10 +37,10 @@ M = 32  # Set relatively high for better accuracy.
 # we have a much higher chance of all 10 of the final desired docs showing up
 # and getting scored. In worse situations, the final 10 docs don't even show up
 # as the final 10 (worse than just a miss at the reranking step).
-DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
-    OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
-    if OPENSEARCH_OVERRIDE_DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES > 0
-    else 750
+# Defaults to 100 for now. Initially this defaulted to 750 but we were seeing
+# poor search performance.
+DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES = int(
+    os.environ.get("DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES", 100)
 )

 # Number of vectors to examine to decide the top k neighbors for the HNSW
@@ -39,23 +50,55 @@ DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES = (
 # larger than k, you can provide the size parameter to limit the final number of
 # results to k." from
 # https://docs.opensearch.org/latest/query-dsl/specialized/k-nn/index/#ef_search
-EF_SEARCH = DEFAULT_NUM_HYBRID_SEARCH_CANDIDATES
+EF_SEARCH = DEFAULT_NUM_HYBRID_SUBQUERY_CANDIDATES

-# Since the titles are included in the contents, the embedding matches are
-# heavily downweighted as they act as a boost rather than an independent scoring
-# component.
-SEARCH_TITLE_VECTOR_WEIGHT = 0.1
-SEARCH_CONTENT_VECTOR_WEIGHT = 0.45
-# Single keyword weight for both title and content (merged from former title
-# keyword + content keyword).
-SEARCH_KEYWORD_WEIGHT = 0.45

-# NOTE: It is critical that the order of these weights matches the order of the
-# sub-queries in the hybrid search.
-HYBRID_SEARCH_NORMALIZATION_WEIGHTS = [
-    SEARCH_TITLE_VECTOR_WEIGHT,
-    SEARCH_CONTENT_VECTOR_WEIGHT,
-    SEARCH_KEYWORD_WEIGHT,
-]
+class OpenSearchSearchType(str, Enum):
+    """Search type label used for Prometheus metrics."""

-assert sum(HYBRID_SEARCH_NORMALIZATION_WEIGHTS) == 1.0
+    HYBRID = "hybrid"
+    KEYWORD = "keyword"
+    SEMANTIC = "semantic"
+    RANDOM = "random"
+    ID_RETRIEVAL = "id_retrieval"
+    DOCUMENT_IDS = "document_ids"
+    UNKNOWN = "unknown"
+
+
+class HybridSearchSubqueryConfiguration(Enum):
+    TITLE_VECTOR_CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 1
+    # Current default.
+    CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD = 2
+
+
+# Will raise and block application start if HYBRID_SEARCH_SUBQUERY_CONFIGURATION
+# is set but not a valid value. If not set, defaults to
+# CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD.
+HYBRID_SEARCH_SUBQUERY_CONFIGURATION: HybridSearchSubqueryConfiguration = (
+    HybridSearchSubqueryConfiguration(
+        int(os.environ["HYBRID_SEARCH_SUBQUERY_CONFIGURATION"])
+    )
+    if os.environ.get("HYBRID_SEARCH_SUBQUERY_CONFIGURATION", None) is not None
+    else HybridSearchSubqueryConfiguration.CONTENT_VECTOR_TITLE_CONTENT_COMBINED_KEYWORD
+)
+
+
+class HybridSearchNormalizationPipeline(Enum):
+    # Current default.
+    MIN_MAX = 1
+    # NOTE: Using z-score normalization is better for hybrid search from a
+    # theoretical standpoint. Empirically on a small dataset of up to 10K docs,
+    # it's not very different. Likely more impactful at scale.
+    # https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
+    ZSCORE = 2
+
+
+# Will raise and block application start if HYBRID_SEARCH_NORMALIZATION_PIPELINE
+# is set but not a valid value. If not set, defaults to MIN_MAX.
+HYBRID_SEARCH_NORMALIZATION_PIPELINE: HybridSearchNormalizationPipeline = (
+    HybridSearchNormalizationPipeline(
+        int(os.environ["HYBRID_SEARCH_NORMALIZATION_PIPELINE"])
+    )
+    if os.environ.get("HYBRID_SEARCH_NORMALIZATION_PIPELINE", None) is not None
+    else HybridSearchNormalizationPipeline.MIN_MAX
+)
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -43,10 +43,12 @@ from onyx.document_index.opensearch.client import OpenSearchClient
 from onyx.document_index.opensearch.client import OpenSearchIndexClient
 from onyx.document_index.opensearch.client import SearchHit
 from onyx.document_index.opensearch.cluster_settings import OPENSEARCH_CLUSTER_SETTINGS
+from onyx.document_index.opensearch.constants import OpenSearchSearchType
 from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
 from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
 from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
 from onyx.document_index.opensearch.schema import DocumentChunk
+from onyx.document_index.opensearch.schema import DocumentChunkWithoutVectors
 from onyx.document_index.opensearch.schema import DocumentSchema
 from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
 from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
@@ -55,16 +57,13 @@ from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
 from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
 from onyx.document_index.opensearch.search import DocumentQuery
 from onyx.document_index.opensearch.search import (
-    MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
+    get_min_max_normalization_pipeline_name_and_config,
 )
 from onyx.document_index.opensearch.search import (
-    MIN_MAX_NORMALIZATION_PIPELINE_NAME,
+    get_normalization_pipeline_name_and_config,
 )
 from onyx.document_index.opensearch.search import (
-    ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
-)
-from onyx.document_index.opensearch.search import (
-    ZSCORE_NORMALIZATION_PIPELINE_NAME,
+    get_zscore_normalization_pipeline_name_and_config,
 )
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import Document
@@ -103,18 +102,24 @@ def set_cluster_state(client: OpenSearchClient) -> None:
            "is not the first time running Onyx against this instance of OpenSearch, these "
            "settings have likely already been set. Not taking any further action..."
        )
-    client.create_search_pipeline(
-        pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
-        pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
+    min_max_normalization_pipeline_name, min_max_normalization_pipeline_config = (
+        get_min_max_normalization_pipeline_name_and_config()
+    )
+    zscore_normalization_pipeline_name, zscore_normalization_pipeline_config = (
+        get_zscore_normalization_pipeline_name_and_config()
    )
    client.create_search_pipeline(
-        pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
-        pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
+        pipeline_id=min_max_normalization_pipeline_name,
+        pipeline_body=min_max_normalization_pipeline_config,
+    )
+    client.create_search_pipeline(
+        pipeline_id=zscore_normalization_pipeline_name,
+        pipeline_body=zscore_normalization_pipeline_config,
    )


 def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
-    chunk: DocumentChunk,
+    chunk: DocumentChunkWithoutVectors,
    score: float | None,
    highlights: dict[str, list[str]],
 ) -> InferenceChunkUncleaned:
@@ -877,7 +882,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
        )
        results: list[InferenceChunk] = []
        for chunk_request in chunk_requests:
-            search_hits: list[SearchHit[DocumentChunk]] = []
+            search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = []
            query_body = DocumentQuery.get_from_document_id_query(
                document_id=chunk_request.document_id,
                tenant_state=self._tenant_state,
@@ -896,6 +901,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
+                search_type=OpenSearchSearchType.ID_RETRIEVAL,
            )
            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
@@ -919,6 +925,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
        filters: IndexFilters,
        num_to_retrieve: int,
    ) -> list[InferenceChunk]:
+        # TODO(andrei): There is some duplicated logic in this function with
+        # others in this file.
        logger.debug(
            f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
        )
@@ -940,17 +948,99 @@ class OpenSearchDocumentIndex(DocumentIndex):
            index_filters=filters,
            include_hidden=False,
        )
-        # NOTE: Using z-score normalization here because it's better for hybrid
-        # search from a theoretical standpoint. Empirically on a small dataset
-        # of up to 10K docs, it's not very different. Likely more impactful at
-        # scale.
-        # https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
-        search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
+        normalization_pipeline_name, _ = get_normalization_pipeline_name_and_config()
+        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
-            search_pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
+            search_pipeline_id=normalization_pipeline_name,
+            search_type=OpenSearchSearchType.HYBRID,
+        )
+
+        # Good place for a breakpoint to inspect the search hits if you have
+        # "explain" enabled.
+        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
+            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
+                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
+            )
+            for search_hit in search_hits
+        ]
+        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
+            inference_chunks_uncleaned
+        )
+
+        return inference_chunks
+
+    def keyword_retrieval(
+        self,
+        query: str,
+        filters: IndexFilters,
+        num_to_retrieve: int,
+    ) -> list[InferenceChunk]:
+        # TODO(andrei): There is some duplicated logic in this function with
+        # others in this file.
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Keyword retrieving {num_to_retrieve} chunks for index {self._index_name}."
+        )
+        query_body = DocumentQuery.get_keyword_search_query(
+            query_text=query,
+            num_hits=num_to_retrieve,
+            tenant_state=self._tenant_state,
+            # NOTE: Index filters includes metadata tags which were filtered
+            # for invalid unicode at indexing time. In theory it would be
+            # ideal to do filtering here as well, in practice we never did
+            # that in the Vespa codepath and have not seen issues in
+            # production, so we deliberately conform to the existing logic
+            # in order to not unknowningly introduce a possible bug.
+            index_filters=filters,
+            include_hidden=False,
+        )
+        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
+            body=query_body,
+            search_pipeline_id=None,
+            search_type=OpenSearchSearchType.KEYWORD,
+        )
+
+        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
+            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
+                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
+            )
+            for search_hit in search_hits
+        ]
+        inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
+            inference_chunks_uncleaned
+        )
+
+        return inference_chunks
+
+    def semantic_retrieval(
+        self,
+        query_embedding: Embedding,
+        filters: IndexFilters,
+        num_to_retrieve: int,
+    ) -> list[InferenceChunk]:
+        # TODO(andrei): There is some duplicated logic in this function with
+        # others in this file.
+        logger.debug(
+            f"[OpenSearchDocumentIndex] Semantic retrieving {num_to_retrieve} chunks for index {self._index_name}."
+        )
+        query_body = DocumentQuery.get_semantic_search_query(
+            query_embedding=query_embedding,
+            num_hits=num_to_retrieve,
+            tenant_state=self._tenant_state,
+            # NOTE: Index filters includes metadata tags which were filtered
+            # for invalid unicode at indexing time. In theory it would be
+            # ideal to do filtering here as well, in practice we never did
+            # that in the Vespa codepath and have not seen issues in
+            # production, so we deliberately conform to the existing logic
+            # in order to not unknowningly introduce a possible bug.
+            index_filters=filters,
+            include_hidden=False,
+        )
+        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
+            body=query_body,
+            search_pipeline_id=None,
+            search_type=OpenSearchSearchType.SEMANTIC,
        )

-        # Good place for a breakpoint to inspect the search hits if you have "explain" enabled.
        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
                search_hit.document_chunk, search_hit.score, search_hit.match_highlights
@@ -977,9 +1067,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
            index_filters=filters,
            num_to_retrieve=num_to_retrieve,
        )
-        search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
+        search_hits: list[SearchHit[DocumentChunkWithoutVectors]] = self._client.search(
            body=query_body,
            search_pipeline_id=None,
+            search_type=OpenSearchSearchType.RANDOM,
        )
        inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
            _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -11,6 +11,8 @@ from pydantic import model_serializer
 from pydantic import model_validator
 from pydantic import SerializerFunctionWrapHandler

+from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_REPLICAS
+from onyx.configs.app_configs import OPENSEARCH_INDEX_NUM_SHARDS
 from onyx.configs.app_configs import OPENSEARCH_TEXT_ANALYZER
 from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
 from onyx.document_index.interfaces_new import TenantState
@@ -100,9 +102,9 @@ def set_or_convert_timezone_to_utc(value: datetime) -> datetime:
    return value


-class DocumentChunk(BaseModel):
+class DocumentChunkWithoutVectors(BaseModel):
    """
-    Represents a chunk of a document in the OpenSearch index.
+    Represents a chunk of a document in the OpenSearch index without vectors.

    The names of these fields are based on the OpenSearch schema. Changes to the
    schema require changes here. See get_document_schema.
@@ -124,9 +126,7 @@ class DocumentChunk(BaseModel):

    # Either both should be None or both should be non-None.
    title: str | None = None
-    title_vector: list[float] | None = None
    content: str
-    content_vector: list[float]

    source_type: str
    # A list of key-value pairs separated by INDEX_SEPARATOR. See
@@ -176,19 +176,9 @@ class DocumentChunk(BaseModel):
    def __str__(self) -> str:
        return (
            f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
-            f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
-            f"tenant_id={self.tenant_id.tenant_id})"
+            f"content length={len(self.content)}, tenant_id={self.tenant_id.tenant_id})."
        )

-    @model_validator(mode="after")
-    def check_title_and_title_vector_are_consistent(self) -> Self:
-        # title and title_vector should both either be None or not.
-        if self.title is not None and self.title_vector is None:
-            raise ValueError("Bug: Title vector must not be None if title is not None.")
-        if self.title_vector is not None and self.title is None:
-            raise ValueError("Bug: Title must not be None if title vector is not None.")
-        return self
-
    @model_serializer(mode="wrap")
    def serialize_model(
        self, handler: SerializerFunctionWrapHandler
@@ -305,6 +295,35 @@ class DocumentChunk(BaseModel):
            return TenantState(tenant_id=value, multitenant=MULTI_TENANT)


+class DocumentChunk(DocumentChunkWithoutVectors):
+    """Represents a chunk of a document in the OpenSearch index.
+
+    The names of these fields are based on the OpenSearch schema. Changes to the
+    schema require changes here. See get_document_schema.
+    """
+
+    model_config = {"frozen": True}
+
+    title_vector: list[float] | None = None
+    content_vector: list[float]
+
+    def __str__(self) -> str:
+        return (
+            f"DocumentChunk(document_id={self.document_id}, chunk_index={self.chunk_index}, "
+            f"content length={len(self.content)}, content vector length={len(self.content_vector)}, "
+            f"tenant_id={self.tenant_id.tenant_id})"
+        )
+
+    @model_validator(mode="after")
+    def check_title_and_title_vector_are_consistent(self) -> Self:
+        # title and title_vector should both either be None or not.
+        if self.title is not None and self.title_vector is None:
+            raise ValueError("Bug: Title vector must not be None if title is not None.")
+        if self.title_vector is not None and self.title is None:
+            raise ValueError("Bug: Title must not be None if title vector is not None.")
+        return self
+
+
 class DocumentSchema:
    """
    Represents the schema and indexing strategies of the OpenSearch index.
@@ -516,78 +535,35 @@ class DocumentSchema:

        return schema

-    @staticmethod
-    def get_index_settings() -> dict[str, Any]:
-        """
-        Standard settings for reasonable local index and search performance.
-        """
-        return {
-            "index": {
-                "number_of_shards": 1,
-                "number_of_replicas": 1,
-                # Required for vector search.
-                "knn": True,
-                "knn.algo_param.ef_search": EF_SEARCH,
-            }
-        }
-
-    @staticmethod
-    def get_index_settings_for_aws_managed_opensearch_st_dev() -> dict[str, Any]:
-        """
-        Settings for AWS-managed OpenSearch.
-
-        Our AWS-managed OpenSearch cluster has 3 data nodes in 3 availability
-        zones.
-          - We use 3 shards to distribute load across all data nodes.
-          - We use 2 replicas to ensure each shard has a copy in each
-            availability zone. This is a hard requirement from AWS. The number
-            of data copies, including the primary (not a replica) copy, must be
-            divisible by the number of AZs.
-        """
-        return {
-            "index": {
-                "number_of_shards": 3,
-                "number_of_replicas": 2,
-                # Required for vector search.
-                "knn": True,
-                "knn.algo_param.ef_search": EF_SEARCH,
-            }
-        }
-
-    @staticmethod
-    def get_index_settings_for_aws_managed_opensearch_mt_cloud() -> dict[str, Any]:
-        """
-        Settings for AWS-managed OpenSearch in multi-tenant cloud.
-
-        324 shards very roughly targets a storage load of ~30Gb per shard, which
-        according to AWS OpenSearch documentation is within a good target range.
-
-        As documented above we need 2 replicas for a total of 3 copies of the
-        data because the cluster is configured with 3-AZ awareness.
-        """
-        return {
-            "index": {
-                "number_of_shards": 324,
-                "number_of_replicas": 2,
-                # Required for vector search.
-                "knn": True,
-                "knn.algo_param.ef_search": EF_SEARCH,
-            }
-        }
-
    @staticmethod
    def get_index_settings_based_on_environment() -> dict[str, Any]:
        """
        Returns the index settings based on the environment.
        """
        if USING_AWS_MANAGED_OPENSEARCH:
+            # NOTE: The number of data copies, including the primary (not a
+            # replica) copy, must be divisible by the number of AZs.
            if MULTI_TENANT:
-                return (
-                    DocumentSchema.get_index_settings_for_aws_managed_opensearch_mt_cloud()
-                )
+                number_of_shards = 324
+                number_of_replicas = 2
            else:
-                return (
-                    DocumentSchema.get_index_settings_for_aws_managed_opensearch_st_dev()
-                )
+                number_of_shards = 3
+                number_of_replicas = 2
        else:
-            return DocumentSchema.get_index_settings()
+            number_of_shards = 1
+            number_of_replicas = 1
+
+        if OPENSEARCH_INDEX_NUM_SHARDS is not None:
+            number_of_shards = OPENSEARCH_INDEX_NUM_SHARDS
+        if OPENSEARCH_INDEX_NUM_REPLICAS is not None:
+            number_of_replicas = OPENSEARCH_INDEX_NUM_REPLICAS
+
+        return {
+            "index": {
+                "number_of_shards": number_of_shards,
+                "number_of_replicas": number_of_replicas,
+                # Required for vector search.
+                "knn": True,
+                "knn.algo_param.ef_search": EF_SEARCH,
+            }
+        }
--- a/backend/onyx/document_index/opensearch/search.py
+++ b/backend/onyx/document_index/opensearch/search.py
--- a/Show More
+++ b/Show More