Lookup by name

.
Test
2026-02-27 20:55:45 +00:00 · 2026-02-27 12:40:18 -08:00 · 2026-02-27 12:35:38 -08:00 · 2026-02-27 10:29:10 -08:00 · 2026-02-27 10:14:53 -08:00 · 2026-02-27 04:22:28 +00:00
216 changed files with 10120 additions and 2178 deletions
--- a/.github/actions/build-backend-image/action.yml
+++ b/.github/actions/build-backend-image/action.yml
@@ -0,0 +1,73 @@
+name: "Build Backend Image"
+description: "Builds and pushes the backend Docker image with cache reuse"
+inputs:
+  runs-on-ecr-cache:
+    description: "ECR cache registry from runs-on/action"
+    required: true
+  ref-name:
+    description: "Git ref name used for cache suffix fallback"
+    required: true
+  pr-number:
+    description: "Optional PR number for cache suffix"
+    required: false
+    default: ""
+  github-sha:
+    description: "Commit SHA used for cache keys"
+    required: true
+  run-id:
+    description: "GitHub run ID used in output image tag"
+    required: true
+  docker-username:
+    description: "Docker Hub username"
+    required: true
+  docker-token:
+    description: "Docker Hub token"
+    required: true
+  docker-no-cache:
+    description: "Set to 'true' to disable docker build cache"
+    required: false
+    default: "false"
+runs:
+  using: "composite"
+  steps:
+    - name: Format branch name for cache
+      id: format-branch
+      shell: bash
+      env:
+        PR_NUMBER: ${{ inputs.pr-number }}
+        REF_NAME: ${{ inputs.ref-name }}
+      run: |
+        if [ -n "${PR_NUMBER}" ]; then
+          CACHE_SUFFIX="${PR_NUMBER}"
+        else
+          # shellcheck disable=SC2001
+          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+        fi
+        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs.docker-username }}
+        password: ${{ inputs.docker-token }}
+
+    - name: Build and push Backend Docker image
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+      with:
+        context: ./backend
+        file: ./backend/Dockerfile
+        push: true
+        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-backend-${{ inputs.run-id }}
+        cache-from: |
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }}
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache
+          type=registry,ref=onyxdotapp/onyx-backend:latest
+        cache-to: |
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ inputs.github-sha }},mode=max
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:backend-cache,mode=max
+        no-cache: ${{ inputs.docker-no-cache == 'true' }}
--- a/.github/actions/build-integration-image/action.yml
+++ b/.github/actions/build-integration-image/action.yml
@@ -0,0 +1,75 @@
+name: "Build Integration Image"
+description: "Builds and pushes the integration test image with docker bake"
+inputs:
+  runs-on-ecr-cache:
+    description: "ECR cache registry from runs-on/action"
+    required: true
+  ref-name:
+    description: "Git ref name used for cache suffix fallback"
+    required: true
+  pr-number:
+    description: "Optional PR number for cache suffix"
+    required: false
+    default: ""
+  github-sha:
+    description: "Commit SHA used for cache keys"
+    required: true
+  run-id:
+    description: "GitHub run ID used in output image tag"
+    required: true
+  docker-username:
+    description: "Docker Hub username"
+    required: true
+  docker-token:
+    description: "Docker Hub token"
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs.docker-username }}
+        password: ${{ inputs.docker-token }}
+
+    - name: Format branch name for cache
+      id: format-branch
+      shell: bash
+      env:
+        PR_NUMBER: ${{ inputs.pr-number }}
+        REF_NAME: ${{ inputs.ref-name }}
+      run: |
+        if [ -n "${PR_NUMBER}" ]; then
+          CACHE_SUFFIX="${PR_NUMBER}"
+        else
+          # shellcheck disable=SC2001
+          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+        fi
+        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"
+
+    - name: Build and push integration test image with Docker Bake
+      shell: bash
+      env:
+        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
+        TAG: nightly-llm-it-${{ inputs.run-id }}
+        CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
+        HEAD_SHA: ${{ inputs.github-sha }}
+      run: |
+        docker buildx bake --push \
+          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
+          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
+          --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
+          --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
+          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
+          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
+          --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
+          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
+          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
+          --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
+          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
+          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
+          --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
+          integration
--- a/.github/actions/build-model-server-image/action.yml
+++ b/.github/actions/build-model-server-image/action.yml
@@ -0,0 +1,68 @@
+name: "Build Model Server Image"
+description: "Builds and pushes the model server Docker image with cache reuse"
+inputs:
+  runs-on-ecr-cache:
+    description: "ECR cache registry from runs-on/action"
+    required: true
+  ref-name:
+    description: "Git ref name used for cache suffix fallback"
+    required: true
+  pr-number:
+    description: "Optional PR number for cache suffix"
+    required: false
+    default: ""
+  github-sha:
+    description: "Commit SHA used for cache keys"
+    required: true
+  run-id:
+    description: "GitHub run ID used in output image tag"
+    required: true
+  docker-username:
+    description: "Docker Hub username"
+    required: true
+  docker-token:
+    description: "Docker Hub token"
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Format branch name for cache
+      id: format-branch
+      shell: bash
+      env:
+        PR_NUMBER: ${{ inputs.pr-number }}
+        REF_NAME: ${{ inputs.ref-name }}
+      run: |
+        if [ -n "${PR_NUMBER}" ]; then
+          CACHE_SUFFIX="${PR_NUMBER}"
+        else
+          # shellcheck disable=SC2001
+          CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+        fi
+        echo "cache-suffix=${CACHE_SUFFIX}" >> "$GITHUB_OUTPUT"
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs.docker-username }}
+        password: ${{ inputs.docker-token }}
+
+    - name: Build and push Model Server Docker image
+      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
+      with:
+        context: ./backend
+        file: ./backend/Dockerfile.model_server
+        push: true
+        tags: ${{ inputs.runs-on-ecr-cache }}:nightly-llm-it-model-server-${{ inputs.run-id }}
+        cache-from: |
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }}
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache
+          type=registry,ref=onyxdotapp/onyx-model-server:latest
+        cache-to: |
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ inputs.github-sha }},mode=max
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+          type=registry,ref=${{ inputs.runs-on-ecr-cache }}:model-server-cache,mode=max
--- a/.github/actions/run-nightly-provider-chat-test/action.yml
+++ b/.github/actions/run-nightly-provider-chat-test/action.yml
@@ -0,0 +1,130 @@
+name: "Run Nightly Provider Chat Test"
+description: "Starts required compose services and runs nightly provider integration test"
+inputs:
+  provider:
+    description: "Provider slug for NIGHTLY_LLM_PROVIDER"
+    required: true
+  models:
+    description: "Comma-separated model list for NIGHTLY_LLM_MODELS"
+    required: true
+  provider-api-key:
+    description: "API key for NIGHTLY_LLM_API_KEY"
+    required: false
+    default: ""
+  strict:
+    description: "String true/false for NIGHTLY_LLM_STRICT"
+    required: true
+  api-base:
+    description: "Optional NIGHTLY_LLM_API_BASE"
+    required: false
+    default: ""
+  api-version:
+    description: "Optional NIGHTLY_LLM_API_VERSION"
+    required: false
+    default: ""
+  deployment-name:
+    description: "Optional NIGHTLY_LLM_DEPLOYMENT_NAME"
+    required: false
+    default: ""
+  custom-config-json:
+    description: "Optional NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
+    required: false
+    default: ""
+  runs-on-ecr-cache:
+    description: "ECR cache registry from runs-on/action"
+    required: true
+  run-id:
+    description: "GitHub run ID used in image tags"
+    required: true
+  docker-username:
+    description: "Docker Hub username"
+    required: true
+  docker-token:
+    description: "Docker Hub token"
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Login to Docker Hub
+      uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+      with:
+        username: ${{ inputs.docker-username }}
+        password: ${{ inputs.docker-token }}
+
+    - name: Create .env file for Docker Compose
+      shell: bash
+      env:
+        ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
+        RUN_ID: ${{ inputs.run-id }}
+      run: |
+        cat <<EOF2 > deployment/docker_compose/.env
+        COMPOSE_PROFILES=s3-filestore
+        ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+        LICENSE_ENFORCEMENT_ENABLED=false
+        AUTH_TYPE=basic
+        POSTGRES_POOL_PRE_PING=true
+        POSTGRES_USE_NULL_POOL=true
+        REQUIRE_EMAIL_VERIFICATION=false
+        DISABLE_TELEMETRY=true
+        INTEGRATION_TESTS_MODE=true
+        AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
+        AWS_REGION_NAME=us-west-2
+        ONYX_BACKEND_IMAGE=${ECR_CACHE}:nightly-llm-it-backend-${RUN_ID}
+        ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:nightly-llm-it-model-server-${RUN_ID}
+        EOF2
+
+    - name: Start Docker containers
+      shell: bash
+      run: |
+        cd deployment/docker_compose
+        docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait \
+          relational_db \
+          index \
+          cache \
+          minio \
+          api_server \
+          inference_model_server
+
+    - name: Run nightly provider integration test
+      uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
+      env:
+        MODELS: ${{ inputs.models }}
+        NIGHTLY_LLM_PROVIDER: ${{ inputs.provider }}
+        NIGHTLY_LLM_API_KEY: ${{ inputs.provider-api-key }}
+        NIGHTLY_LLM_API_BASE: ${{ inputs.api-base }}
+        NIGHTLY_LLM_API_VERSION: ${{ inputs.api-version }}
+        NIGHTLY_LLM_DEPLOYMENT_NAME: ${{ inputs.deployment-name }}
+        NIGHTLY_LLM_CUSTOM_CONFIG_JSON: ${{ inputs.custom-config-json }}
+        NIGHTLY_LLM_STRICT: ${{ inputs.strict }}
+        RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
+        RUN_ID: ${{ inputs.run-id }}
+      with:
+        timeout_minutes: 20
+        max_attempts: 2
+        retry_wait_seconds: 10
+        command: |
+          docker run --rm --network onyx_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e AWS_REGION_NAME=us-west-2 \
+            -e NIGHTLY_LLM_PROVIDER="${NIGHTLY_LLM_PROVIDER}" \
+            -e NIGHTLY_LLM_MODELS="${MODELS}" \
+            -e NIGHTLY_LLM_API_KEY="${NIGHTLY_LLM_API_KEY}" \
+            -e NIGHTLY_LLM_API_BASE="${NIGHTLY_LLM_API_BASE}" \
+            -e NIGHTLY_LLM_API_VERSION="${NIGHTLY_LLM_API_VERSION}" \
+            -e NIGHTLY_LLM_DEPLOYMENT_NAME="${NIGHTLY_LLM_DEPLOYMENT_NAME}" \
+            -e NIGHTLY_LLM_CUSTOM_CONFIG_JSON="${NIGHTLY_LLM_CUSTOM_CONFIG_JSON}" \
+            -e NIGHTLY_LLM_STRICT="${NIGHTLY_LLM_STRICT}" \
+            ${RUNS_ON_ECR_CACHE}:nightly-llm-it-${RUN_ID} \
+            /app/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -0,0 +1,56 @@
+name: Nightly LLM Provider Chat Tests
+concurrency:
+  group: Nightly-LLM-Provider-Chat-${{ github.workflow }}-${{ github.ref_name }}
+  cancel-in-progress: true
+
+on:
+  schedule:
+    # Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)
+    - cron: "30 10 * * *"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  provider-chat-test:
+    uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
+    with:
+      openai_models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
+      anthropic_models: ${{ vars.NIGHTLY_LLM_ANTHROPIC_MODELS }}
+      bedrock_models: ${{ vars.NIGHTLY_LLM_BEDROCK_MODELS }}
+      vertex_ai_models: ${{ vars.NIGHTLY_LLM_VERTEX_AI_MODELS }}
+      azure_models: ${{ vars.NIGHTLY_LLM_AZURE_MODELS }}
+      azure_api_base: ${{ vars.NIGHTLY_LLM_AZURE_API_BASE }}
+      ollama_models: ${{ vars.NIGHTLY_LLM_OLLAMA_MODELS }}
+      openrouter_models: ${{ vars.NIGHTLY_LLM_OPENROUTER_MODELS }}
+      strict: true
+    secrets:
+      openai_api_key: ${{ secrets.OPENAI_API_KEY }}
+      anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+      bedrock_api_key: ${{ secrets.BEDROCK_API_KEY }}
+      vertex_ai_custom_config_json: ${{ secrets.NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON }}
+      azure_api_key: ${{ secrets.AZURE_API_KEY }}
+      ollama_api_key: ${{ secrets.OLLAMA_API_KEY }}
+      openrouter_api_key: ${{ secrets.OPENROUTER_API_KEY }}
+      DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+      DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
+
+  notify-slack-on-failure:
+    needs: [provider-chat-test]
+    if: failure() && github.event_name == 'schedule'
+    runs-on: ubuntu-slim
+    timeout-minutes: 5
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Send Slack notification
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
+          failed-jobs: provider-chat-test
+          title: "🚨 Scheduled LLM Provider Chat Tests failed!"
+          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -11,6 +11,11 @@ permissions:

 jobs:
  cherry-pick-to-latest-release:
+    outputs:
+      should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}
+      pr_number: ${{ steps.gate.outputs.pr_number }}
+      cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}
+      cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
@@ -75,10 +80,82 @@ jobs:
          git config user.email "github-actions[bot]@users.noreply.github.com"

      - name: Create cherry-pick PR to latest release
+        id: run_cherry_pick
        if: steps.gate.outputs.should_cherrypick == 'true'
+        continue-on-error: true
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_TOKEN: ${{ github.token }}
          CHERRY_PICK_ASSIGNEE: ${{ steps.gate.outputs.merged_by }}
        run: |
-          uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify
+          set -o pipefail
+          output_file="$(mktemp)"
+          uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify 2>&1 | tee "$output_file"
+          exit_code="${PIPESTATUS[0]}"
+
+          if [ "${exit_code}" -eq 0 ]; then
+            echo "status=success" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          echo "status=failure" >> "$GITHUB_OUTPUT"
+
+          reason="command-failed"
+          if grep -qiE "merge conflict during cherry-pick|CONFLICT|could not apply|cherry-pick in progress with staged changes" "$output_file"; then
+            reason="merge-conflict"
+          fi
+          echo "reason=${reason}" >> "$GITHUB_OUTPUT"
+
+          {
+            echo "details<<EOF"
+            tail -n 40 "$output_file"
+            echo "EOF"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Mark workflow as failed if cherry-pick failed
+        if: steps.gate.outputs.should_cherrypick == 'true' && steps.run_cherry_pick.outputs.status == 'failure'
+        run: |
+          echo "::error::Automated cherry-pick failed (${{ steps.run_cherry_pick.outputs.reason }})."
+          exit 1
+
+  notify-slack-on-cherry-pick-failure:
+    needs:
+      - cherry-pick-to-latest-release
+    if: always() && needs.cherry-pick-to-latest-release.outputs.should_cherrypick == 'true' && needs.cherry-pick-to-latest-release.result != 'success'
+    runs-on: ubuntu-slim
+    timeout-minutes: 10
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Build cherry-pick failure summary
+        id: failure-summary
+        env:
+          SOURCE_PR_NUMBER: ${{ needs.cherry-pick-to-latest-release.outputs.pr_number }}
+          CHERRY_PICK_REASON: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_reason }}
+          CHERRY_PICK_DETAILS: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_details }}
+        run: |
+          source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
+
+          reason_text="cherry-pick command failed"
+          if [ "${CHERRY_PICK_REASON}" = "merge-conflict" ]; then
+            reason_text="merge conflict during cherry-pick"
+          fi
+
+          details_excerpt="$(printf '%s' "${CHERRY_PICK_DETAILS}" | tail -n 8 | tr '\n' ' ' | sed "s/[[:space:]]\\+/ /g" | sed "s/\"/'/g" | cut -c1-350)"
+          failed_jobs="• cherry-pick-to-latest-release\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
+          if [ -n "${details_excerpt}" ]; then
+            failed_jobs="${failed_jobs}\\n• excerpt: ${details_excerpt}"
+          fi
+
+          echo "jobs=${failed_jobs}" >> "$GITHUB_OUTPUT"
+
+      - name: Notify #cherry-pick-prs about cherry-pick failure
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
+          failed-jobs: ${{ steps.failure-summary.outputs.jobs }}
+          title: "🚨 Automated Cherry-Pick Failed"
+          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -20,6 +20,7 @@ env:
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  SLACK_BOT_TOKEN_TEST_SPACE: ${{ secrets.SLACK_BOT_TOKEN_TEST_SPACE }}
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
@@ -423,6 +424,7 @@ jobs:
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+              -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
@@ -443,6 +445,7 @@ jobs:
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
+              -e ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${{ matrix.edition == 'ee' && 'true' || 'false' }} \
              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/${{ matrix.test-dir.path }}

@@ -701,6 +704,7 @@ jobs:
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
            -e MULTI_TENANT=true \
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -89,6 +89,10 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}
+  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
+  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
+  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
+  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}

  # Github
  ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
--- a/.github/workflows/reusable-nightly-llm-provider-chat.yml
+++ b/.github/workflows/reusable-nightly-llm-provider-chat.yml
@@ -0,0 +1,282 @@
+name: Reusable Nightly LLM Provider Chat Tests
+
+on:
+  workflow_call:
+    inputs:
+      openai_models:
+        description: "Comma-separated models for openai"
+        required: false
+        default: ""
+        type: string
+      anthropic_models:
+        description: "Comma-separated models for anthropic"
+        required: false
+        default: ""
+        type: string
+      bedrock_models:
+        description: "Comma-separated models for bedrock"
+        required: false
+        default: ""
+        type: string
+      vertex_ai_models:
+        description: "Comma-separated models for vertex_ai"
+        required: false
+        default: ""
+        type: string
+      azure_models:
+        description: "Comma-separated models for azure"
+        required: false
+        default: ""
+        type: string
+      ollama_models:
+        description: "Comma-separated models for ollama_chat"
+        required: false
+        default: ""
+        type: string
+      openrouter_models:
+        description: "Comma-separated models for openrouter"
+        required: false
+        default: ""
+        type: string
+      azure_api_base:
+        description: "API base for azure provider"
+        required: false
+        default: ""
+        type: string
+      strict:
+        description: "Default NIGHTLY_LLM_STRICT passed to tests"
+        required: false
+        default: true
+        type: boolean
+    secrets:
+      openai_api_key:
+        required: false
+      anthropic_api_key:
+        required: false
+      bedrock_api_key:
+        required: false
+      vertex_ai_custom_config_json:
+        required: false
+      azure_api_key:
+        required: false
+      ollama_api_key:
+        required: false
+      openrouter_api_key:
+        required: false
+      DOCKER_USERNAME:
+        required: true
+      DOCKER_TOKEN:
+        required: true
+
+permissions:
+  contents: read
+
+jobs:
+  build-backend-image:
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Build backend image
+        uses: ./.github/actions/build-backend-image
+        with:
+          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
+          ref-name: ${{ github.ref_name }}
+          pr-number: ${{ github.event.pull_request.number }}
+          github-sha: ${{ github.sha }}
+          run-id: ${{ github.run_id }}
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-token: ${{ secrets.DOCKER_TOKEN }}
+          docker-no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' && 'true' || 'false' }}
+
+  build-model-server-image:
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Build model server image
+        uses: ./.github/actions/build-model-server-image
+        with:
+          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
+          ref-name: ${{ github.ref_name }}
+          pr-number: ${{ github.event.pull_request.number }}
+          github-sha: ${{ github.sha }}
+          run-id: ${{ github.run_id }}
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-token: ${{ secrets.DOCKER_TOKEN }}
+
+  build-integration-image:
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Build integration image
+        uses: ./.github/actions/build-integration-image
+        with:
+          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
+          ref-name: ${{ github.ref_name }}
+          pr-number: ${{ github.event.pull_request.number }}
+          github-sha: ${{ github.sha }}
+          run-id: ${{ github.run_id }}
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-token: ${{ secrets.DOCKER_TOKEN }}
+
+  provider-chat-test:
+    needs:
+      [
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - provider: openai
+            models: ${{ inputs.openai_models }}
+            api_key_secret: openai_api_key
+            custom_config_secret: ""
+            api_base: ""
+            api_version: ""
+            deployment_name: ""
+            required: true
+          - provider: anthropic
+            models: ${{ inputs.anthropic_models }}
+            api_key_secret: anthropic_api_key
+            custom_config_secret: ""
+            api_base: ""
+            api_version: ""
+            deployment_name: ""
+            required: true
+          - provider: bedrock
+            models: ${{ inputs.bedrock_models }}
+            api_key_secret: bedrock_api_key
+            custom_config_secret: ""
+            api_base: ""
+            api_version: ""
+            deployment_name: ""
+            required: false
+          - provider: vertex_ai
+            models: ${{ inputs.vertex_ai_models }}
+            api_key_secret: ""
+            custom_config_secret: vertex_ai_custom_config_json
+            api_base: ""
+            api_version: ""
+            deployment_name: ""
+            required: false
+          - provider: azure
+            models: ${{ inputs.azure_models }}
+            api_key_secret: azure_api_key
+            custom_config_secret: ""
+            api_base: ${{ inputs.azure_api_base }}
+            api_version: "2025-04-01-preview"
+            deployment_name: ""
+            required: false
+          - provider: ollama_chat
+            models: ${{ inputs.ollama_models }}
+            api_key_secret: ollama_api_key
+            custom_config_secret: ""
+            api_base: "https://ollama.com"
+            api_version: ""
+            deployment_name: ""
+            required: false
+          - provider: openrouter
+            models: ${{ inputs.openrouter_models }}
+            api_key_secret: openrouter_api_key
+            custom_config_secret: ""
+            api_base: "https://openrouter.ai/api/v1"
+            api_version: ""
+            deployment_name: ""
+            required: false
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-nightly-${{ matrix.provider }}-provider-chat-test"
+      - extras=ecr-cache
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Run nightly provider chat test
+        uses: ./.github/actions/run-nightly-provider-chat-test
+        with:
+          provider: ${{ matrix.provider }}
+          models: ${{ matrix.models }}
+          provider-api-key: ${{ matrix.api_key_secret && secrets[matrix.api_key_secret] || '' }}
+          strict: ${{ inputs.strict && 'true' || 'false' }}
+          api-base: ${{ matrix.api_base }}
+          api-version: ${{ matrix.api_version }}
+          deployment-name: ${{ matrix.deployment_name }}
+          custom-config-json: ${{ matrix.custom_config_secret && secrets[matrix.custom_config_secret] || '' }}
+          runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
+          run-id: ${{ github.run_id }}
+          docker-username: ${{ secrets.DOCKER_USERNAME }}
+          docker-token: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Dump API server logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+
+      - name: Dump all-container logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        with:
+          name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
+          path: |
+            ${{ github.workspace }}/api_server.log
+            ${{ github.workspace }}/docker-compose.log
+
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose down -v
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -548,7 +548,7 @@ class in the utils over directly calling the APIs with a library like `requests`
 calling the utilities directly (e.g. do NOT create admin users with
 `admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).

-A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
+A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.

 To run them:

@@ -616,3 +616,9 @@ This is a minimal list - feel free to include more. Do NOT write code as part of
 Keep it high level. You can reference certain files or functions though.

 Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
+
+## Best Practices
+
+In addition to the other content in this file, best practices for contributing
+to the codebase can be found at `contributing_guides/best_practices.md`.
+Understand its contents and follow them.
--- a/backend/alembic/versions/57122d037335_add_python_tool_on_default.py
+++ b/backend/alembic/versions/57122d037335_add_python_tool_on_default.py
@@ -0,0 +1,69 @@
+"""add python tool on default
+
+Revision ID: 57122d037335
+Revises: c0c937d5c9e5
+Create Date: 2026-02-27 10:10:40.124925
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "57122d037335"
+down_revision = "c0c937d5c9e5"
+branch_labels = None
+depends_on = None
+
+
+PYTHON_TOOL_NAME = "python"
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Look up the PythonTool id
+    result = conn.execute(
+        sa.text("SELECT id FROM tool WHERE name = :name"),
+        {"name": PYTHON_TOOL_NAME},
+    ).fetchone()
+
+    if not result:
+        return
+
+    tool_id = result[0]
+
+    # Attach to the default persona (id=0) if not already attached
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO persona__tool (persona_id, tool_id)
+            VALUES (0, :tool_id)
+            ON CONFLICT DO NOTHING
+            """
+        ),
+        {"tool_id": tool_id},
+    )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+
+    result = conn.execute(
+        sa.text("SELECT id FROM tool WHERE name = :name"),
+        {"name": PYTHON_TOOL_NAME},
+    ).fetchone()
+
+    if not result:
+        return
+
+    conn.execute(
+        sa.text(
+            """
+            DELETE FROM persona__tool
+            WHERE persona_id = 0 AND tool_id = :tool_id
+            """
+        ),
+        {"tool_id": result[0]},
+    )
--- a/backend/alembic/versions/8ffcc2bcfc11_add_needs_persona_sync_to_user_file.py
+++ b/backend/alembic/versions/8ffcc2bcfc11_add_needs_persona_sync_to_user_file.py
@@ -0,0 +1,33 @@
+"""add needs_persona_sync to user_file
+
+Revision ID: 8ffcc2bcfc11
+Revises: 7616121f6e97
+Create Date: 2026-02-23 10:48:48.343826
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "8ffcc2bcfc11"
+down_revision = "7616121f6e97"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user_file",
+        sa.Column(
+            "needs_persona_sync",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user_file", "needs_persona_sync")
--- a/backend/alembic/versions/c0c937d5c9e5_llm_provider_deprecate_fields.py
+++ b/backend/alembic/versions/c0c937d5c9e5_llm_provider_deprecate_fields.py
@@ -0,0 +1,70 @@
+"""llm provider deprecate fields
+
+Revision ID: c0c937d5c9e5
+Revises: 8ffcc2bcfc11
+Create Date: 2026-02-25 17:35:46.125102
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "c0c937d5c9e5"
+down_revision = "8ffcc2bcfc11"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Make default_model_name nullable (was NOT NULL)
+    op.alter_column(
+        "llm_provider",
+        "default_model_name",
+        existing_type=sa.String(),
+        nullable=True,
+    )
+
+    # Drop unique constraint on is_default_provider (defaults now tracked via LLMModelFlow)
+    op.drop_constraint(
+        "llm_provider_is_default_provider_key",
+        "llm_provider",
+        type_="unique",
+    )
+
+    # Remove server_default from is_default_vision_provider (was server_default=false())
+    op.alter_column(
+        "llm_provider",
+        "is_default_vision_provider",
+        existing_type=sa.Boolean(),
+        server_default=None,
+    )
+
+
+def downgrade() -> None:
+    # Restore default_model_name to NOT NULL (set empty string for any NULLs first)
+    op.execute(
+        "UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
+    )
+    op.alter_column(
+        "llm_provider",
+        "default_model_name",
+        existing_type=sa.String(),
+        nullable=False,
+    )
+
+    # Restore unique constraint on is_default_provider
+    op.create_unique_constraint(
+        "llm_provider_is_default_provider_key",
+        "llm_provider",
+        ["is_default_provider"],
+    )
+
+    # Restore server_default for is_default_vision_provider
+    op.alter_column(
+        "llm_provider",
+        "is_default_vision_provider",
+        existing_type=sa.Boolean(),
+        server_default=sa.false(),
+    )
--- a/backend/ee/onyx/db/scim.py
+++ b/backend/ee/onyx/db/scim.py
@@ -34,6 +34,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert

 from ee.onyx.server.scim.filtering import ScimFilter
 from ee.onyx.server.scim.filtering import ScimFilterOperator
+from ee.onyx.server.scim.models import ScimMappingFields
 from onyx.db.dal import DAL
 from onyx.db.models import ScimGroupMapping
 from onyx.db.models import ScimToken
@@ -128,12 +129,19 @@ class ScimDAL(DAL):
        external_id: str,
        user_id: UUID,
        scim_username: str | None = None,
+        fields: ScimMappingFields | None = None,
    ) -> ScimUserMapping:
        """Create a mapping between a SCIM externalId and an Onyx user."""
+        f = fields or ScimMappingFields()
        mapping = ScimUserMapping(
            external_id=external_id,
            user_id=user_id,
            scim_username=scim_username,
+            department=f.department,
+            manager=f.manager,
+            given_name=f.given_name,
+            family_name=f.family_name,
+            scim_emails_json=f.scim_emails_json,
        )
        self._session.add(mapping)
        self._session.flush()
@@ -311,8 +319,14 @@ class ScimDAL(DAL):
        user_id: UUID,
        new_external_id: str | None,
        scim_username: str | None = None,
+        fields: ScimMappingFields | None = None,
    ) -> None:
-        """Create, update, or delete the external ID mapping for a user."""
+        """Create, update, or delete the external ID mapping for a user.
+
+        When *fields* is provided, all mapping fields are written
+        unconditionally — including ``None`` values — so that a caller can
+        clear a previously-set field (e.g. removing a department).
+        """
        mapping = self.get_user_mapping_by_user_id(user_id)
        if new_external_id:
            if mapping:
@@ -320,11 +334,18 @@ class ScimDAL(DAL):
                    mapping.external_id = new_external_id
                if scim_username is not None:
                    mapping.scim_username = scim_username
+                if fields is not None:
+                    mapping.department = fields.department
+                    mapping.manager = fields.manager
+                    mapping.given_name = fields.given_name
+                    mapping.family_name = fields.family_name
+                    mapping.scim_emails_json = fields.scim_emails_json
            else:
                self.create_user_mapping(
                    external_id=new_external_id,
                    user_id=user_id,
                    scim_username=scim_username,
+                    fields=fields,
                )
        elif mapping:
            self.delete_user_mapping(mapping.id)
--- a/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
@@ -4,7 +4,6 @@ from collections import deque
 from collections.abc import Callable
 from collections.abc import Generator
 from typing import Any
-from urllib.parse import unquote
 from urllib.parse import urlparse

 import requests as _requests
@@ -598,8 +597,12 @@ def get_external_access_from_sharepoint(
        )
    elif site_page:
        site_url = site_page.get("webUrl")
-        # Prefer server-relative URL to avoid OData filters that break on apostrophes
-        server_relative_url = unquote(urlparse(site_url).path)
+        # Keep percent-encoding intact so the path matches the encoding
+        # used by the Office365 library's SPResPath.create_relative(),
+        # which compares against urlparse(context.base_url).path.
+        # Decoding (e.g. %27 → ') causes a mismatch that duplicates
+        # the site prefix in the constructed URL.
+        server_relative_url = urlparse(site_url).path
        file_obj = client_context.web.get_file_by_server_relative_url(
            server_relative_url
        )
--- a/backend/ee/onyx/server/scim/api.py
+++ b/backend/ee/onyx/server/scim/api.py
@@ -26,14 +26,14 @@ from sqlalchemy.orm import Session
 from ee.onyx.db.scim import ScimDAL
 from ee.onyx.server.scim.auth import verify_scim_token
 from ee.onyx.server.scim.filtering import parse_scim_filter
+from ee.onyx.server.scim.models import SCIM_LIST_RESPONSE_SCHEMA
 from ee.onyx.server.scim.models import ScimError
 from ee.onyx.server.scim.models import ScimGroupMember
 from ee.onyx.server.scim.models import ScimGroupResource
 from ee.onyx.server.scim.models import ScimListResponse
+from ee.onyx.server.scim.models import ScimMappingFields
 from ee.onyx.server.scim.models import ScimName
 from ee.onyx.server.scim.models import ScimPatchRequest
-from ee.onyx.server.scim.models import ScimResourceType
-from ee.onyx.server.scim.models import ScimSchemaDefinition
 from ee.onyx.server.scim.models import ScimServiceProviderConfig
 from ee.onyx.server.scim.models import ScimUserResource
 from ee.onyx.server.scim.patch import apply_group_patch
@@ -41,6 +41,8 @@ from ee.onyx.server.scim.patch import apply_user_patch
 from ee.onyx.server.scim.patch import ScimPatchError
 from ee.onyx.server.scim.providers.base import get_default_provider
 from ee.onyx.server.scim.providers.base import ScimProvider
+from ee.onyx.server.scim.providers.base import serialize_emails
+from ee.onyx.server.scim.schema_definitions import ENTERPRISE_USER_SCHEMA_DEF
 from ee.onyx.server.scim.schema_definitions import GROUP_RESOURCE_TYPE
 from ee.onyx.server.scim.schema_definitions import GROUP_SCHEMA_DEF
 from ee.onyx.server.scim.schema_definitions import SERVICE_PROVIDER_CONFIG
@@ -48,15 +50,28 @@ from ee.onyx.server.scim.schema_definitions import USER_RESOURCE_TYPE
 from ee.onyx.server.scim.schema_definitions import USER_SCHEMA_DEF
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import ScimToken
+from onyx.db.models import ScimUserMapping
 from onyx.db.models import User
 from onyx.db.models import UserGroup
 from onyx.db.models import UserRole
+from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop

+logger = setup_logger()
+
+
+class ScimJSONResponse(JSONResponse):
+    """JSONResponse with Content-Type: application/scim+json (RFC 7644 §3.1)."""
+
+    media_type = "application/scim+json"
+
+
 # NOTE: All URL paths in this router (/ServiceProviderConfig, /ResourceTypes,
 # /Schemas, /Users, /Groups) are mandated by the SCIM spec (RFC 7643/7644).
 # IdPs like Okta and Azure AD hardcode these exact paths, so they cannot be
 # changed to kebab-case.
+
+
 scim_router = APIRouter(prefix="/scim/v2", tags=["SCIM"])

 _pw_helper = PasswordHelper()
@@ -86,15 +101,39 @@ def get_service_provider_config() -> ScimServiceProviderConfig:


@scim_router.get("/ResourceTypes")
-def get_resource_types() -> list[ScimResourceType]:
-    """List available SCIM resource types (RFC 7643 §6)."""
-    return [USER_RESOURCE_TYPE, GROUP_RESOURCE_TYPE]
+def get_resource_types() -> ScimJSONResponse:
+    """List available SCIM resource types (RFC 7643 §6).
+
+    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs
+    like Entra ID expect a JSON object, not a bare array.
+    """
+    resources = [USER_RESOURCE_TYPE, GROUP_RESOURCE_TYPE]
+    return ScimJSONResponse(
+        content={
+            "schemas": [SCIM_LIST_RESPONSE_SCHEMA],
+            "totalResults": len(resources),
+            "Resources": [
+                r.model_dump(exclude_none=True, by_alias=True) for r in resources
+            ],
+        }
+    )


@scim_router.get("/Schemas")
-def get_schemas() -> list[ScimSchemaDefinition]:
-    """Return SCIM schema definitions (RFC 7643 §7)."""
-    return [USER_SCHEMA_DEF, GROUP_SCHEMA_DEF]
+def get_schemas() -> ScimJSONResponse:
+    """Return SCIM schema definitions (RFC 7643 §7).
+
+    Wrapped in a ListResponse envelope (RFC 7644 §3.4.2) because IdPs
+    like Entra ID expect a JSON object, not a bare array.
+    """
+    schemas = [USER_SCHEMA_DEF, GROUP_SCHEMA_DEF, ENTERPRISE_USER_SCHEMA_DEF]
+    return ScimJSONResponse(
+        content={
+            "schemas": [SCIM_LIST_RESPONSE_SCHEMA],
+            "totalResults": len(schemas),
+            "Resources": [s.model_dump(exclude_none=True) for s in schemas],
+        }
+    )


 # ---------------------------------------------------------------------------
@@ -102,15 +141,45 @@ def get_schemas() -> list[ScimSchemaDefinition]:
 # ---------------------------------------------------------------------------


-def _scim_error_response(status: int, detail: str) -> JSONResponse:
+def _scim_error_response(status: int, detail: str) -> ScimJSONResponse:
    """Build a SCIM-compliant error response (RFC 7644 §3.12)."""
+    logger.warning("SCIM error response: status=%s detail=%s", status, detail)
    body = ScimError(status=str(status), detail=detail)
-    return JSONResponse(
+    return ScimJSONResponse(
        status_code=status,
        content=body.model_dump(exclude_none=True),
    )


+def _parse_excluded_attributes(raw: str | None) -> set[str]:
+    """Parse the ``excludedAttributes`` query parameter (RFC 7644 §3.4.2.5).
+
+    Returns a set of lowercased attribute names to omit from responses.
+    """
+    if not raw:
+        return set()
+    return {attr.strip().lower() for attr in raw.split(",") if attr.strip()}
+
+
+def _apply_exclusions(
+    resource: ScimUserResource | ScimGroupResource,
+    excluded: set[str],
+) -> dict:
+    """Serialize a SCIM resource, omitting attributes the IdP excluded.
+
+    RFC 7644 §3.4.2.5 lets the IdP pass ``?excludedAttributes=groups,emails``
+    to reduce response payload size. We strip those fields after serialization
+    so the rest of the pipeline doesn't need to know about them.
+    """
+    data = resource.model_dump(exclude_none=True, by_alias=True)
+    for attr in excluded:
+        # Match case-insensitively against the camelCase field names
+        keys_to_remove = [k for k in data if k.lower() == attr]
+        for k in keys_to_remove:
+            del data[k]
+    return data
+
+
 def _check_seat_availability(dal: ScimDAL) -> str | None:
    """Return an error message if seat limit is reached, else None."""
    check_fn = fetch_ee_implementation_or_noop(
@@ -124,7 +193,7 @@ def _check_seat_availability(dal: ScimDAL) -> str | None:
    return None


-def _fetch_user_or_404(user_id: str, dal: ScimDAL) -> User | JSONResponse:
+def _fetch_user_or_404(user_id: str, dal: ScimDAL) -> User | ScimJSONResponse:
    """Parse *user_id* as UUID, look up the user, or return a 404 error."""
    try:
        uid = UUID(user_id)
@@ -144,10 +213,95 @@ def _scim_name_to_str(name: ScimName | None) -> str | None:
    """
    if not name:
        return None
-    # Build from givenName/familyName first — IdPs like Okta may send a stale
-    # ``formatted`` value while updating the individual name components.
+    # If the client explicitly provides ``formatted``, prefer it — the client
+    # knows what display string it wants. Otherwise build from components.
+    if name.formatted:
+        return name.formatted
    parts = " ".join(part for part in [name.givenName, name.familyName] if part)
-    return parts or name.formatted
+    return parts or None
+
+
+def _scim_resource_response(
+    resource: ScimUserResource | ScimGroupResource | ScimListResponse,
+    status_code: int = 200,
+) -> ScimJSONResponse:
+    """Serialize a SCIM resource as ``application/scim+json``."""
+    content = resource.model_dump(exclude_none=True, by_alias=True)
+    return ScimJSONResponse(
+        status_code=status_code,
+        content=content,
+    )
+
+
+def _build_list_response(
+    resources: list[ScimUserResource | ScimGroupResource],
+    total: int,
+    start_index: int,
+    count: int,
+    excluded: set[str] | None = None,
+) -> ScimListResponse | ScimJSONResponse:
+    """Build a SCIM list response, optionally applying attribute exclusions.
+
+    RFC 7644 §3.4.2.5 — IdPs may request certain attributes be omitted via
+    the ``excludedAttributes`` query parameter.
+    """
+    if excluded:
+        envelope = ScimListResponse(
+            totalResults=total,
+            startIndex=start_index,
+            itemsPerPage=count,
+        )
+        data = envelope.model_dump(exclude_none=True)
+        data["Resources"] = [_apply_exclusions(r, excluded) for r in resources]
+        return ScimJSONResponse(content=data)
+
+    return _scim_resource_response(
+        ScimListResponse(
+            totalResults=total,
+            startIndex=start_index,
+            itemsPerPage=count,
+            Resources=resources,
+        )
+    )
+
+
+def _extract_enterprise_fields(
+    resource: ScimUserResource,
+) -> tuple[str | None, str | None]:
+    """Extract department and manager from enterprise extension."""
+    ext = resource.enterprise_extension
+    if not ext:
+        return None, None
+    department = ext.department
+    manager = ext.manager.value if ext.manager else None
+    return department, manager
+
+
+def _mapping_to_fields(
+    mapping: ScimUserMapping | None,
+) -> ScimMappingFields | None:
+    """Extract round-trip fields from a SCIM user mapping."""
+    if not mapping:
+        return None
+    return ScimMappingFields(
+        department=mapping.department,
+        manager=mapping.manager,
+        given_name=mapping.given_name,
+        family_name=mapping.family_name,
+        scim_emails_json=mapping.scim_emails_json,
+    )
+
+
+def _fields_from_resource(resource: ScimUserResource) -> ScimMappingFields:
+    """Build mapping fields from an incoming SCIM user resource."""
+    department, manager = _extract_enterprise_fields(resource)
+    return ScimMappingFields(
+        department=department,
+        manager=manager,
+        given_name=resource.name.givenName if resource.name else None,
+        family_name=resource.name.familyName if resource.name else None,
+        scim_emails_json=serialize_emails(resource.emails),
+    )


 # ---------------------------------------------------------------------------
@@ -158,15 +312,17 @@ def _scim_name_to_str(name: ScimName | None) -> str | None:
@scim_router.get("/Users", response_model=None)
 def list_users(
    filter: str | None = Query(None),
+    excludedAttributes: str | None = None,
    startIndex: int = Query(1, ge=1),
    count: int = Query(100, ge=0, le=500),
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimListResponse | JSONResponse:
+) -> ScimListResponse | ScimJSONResponse:
    """List users with optional SCIM filter and pagination."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
+    dal.commit()

    try:
        scim_filter = parse_scim_filter(filter)
@@ -185,42 +341,55 @@ def list_users(
            mapping.external_id if mapping else None,
            groups=user_groups_map.get(user.id, []),
            scim_username=mapping.scim_username if mapping else None,
+            fields=_mapping_to_fields(mapping),
        )
        for user, mapping in users_with_mappings
    ]

-    return ScimListResponse(
-        totalResults=total,
-        startIndex=startIndex,
-        itemsPerPage=count,
-        Resources=resources,
+    return _build_list_response(
+        resources,
+        total,
+        startIndex,
+        count,
+        excluded=_parse_excluded_attributes(excludedAttributes),
    )


@scim_router.get("/Users/{user_id}", response_model=None)
 def get_user(
    user_id: str,
+    excludedAttributes: str | None = None,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimUserResource | JSONResponse:
+) -> ScimUserResource | ScimJSONResponse:
    """Get a single user by ID."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
+    dal.commit()

    result = _fetch_user_or_404(user_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    mapping = dal.get_user_mapping_by_user_id(user.id)
-    return provider.build_user_resource(
+
+    resource = provider.build_user_resource(
        user,
        mapping.external_id if mapping else None,
        groups=dal.get_user_groups(user.id),
        scim_username=mapping.scim_username if mapping else None,
+        fields=_mapping_to_fields(mapping),
    )

+    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted
+    excluded = _parse_excluded_attributes(excludedAttributes)
+    if excluded:
+        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))
+
+    return _scim_resource_response(resource)
+

@scim_router.post("/Users", status_code=201, response_model=None)
 def create_user(
@@ -228,7 +397,7 @@ def create_user(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimUserResource | JSONResponse:
+) -> ScimUserResource | ScimJSONResponse:
    """Create a new user from a SCIM provisioning request."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
@@ -270,13 +439,25 @@ def create_user(
    # Create SCIM mapping (externalId is validated above, always present)
    external_id = user_resource.externalId
    scim_username = user_resource.userName.strip()
+    fields = _fields_from_resource(user_resource)
    dal.create_user_mapping(
-        external_id=external_id, user_id=user.id, scim_username=scim_username
+        external_id=external_id,
+        user_id=user.id,
+        scim_username=scim_username,
+        fields=fields,
    )

    dal.commit()

-    return provider.build_user_resource(user, external_id, scim_username=scim_username)
+    return _scim_resource_response(
+        provider.build_user_resource(
+            user,
+            external_id,
+            scim_username=scim_username,
+            fields=fields,
+        ),
+        status_code=201,
+    )


@scim_router.put("/Users/{user_id}", response_model=None)
@@ -286,13 +467,13 @@ def replace_user(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimUserResource | JSONResponse:
+) -> ScimUserResource | ScimJSONResponse:
    """Replace a user entirely (RFC 7644 §3.5.1)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    user = result

@@ -313,15 +494,24 @@ def replace_user(

    new_external_id = user_resource.externalId
    scim_username = user_resource.userName.strip()
-    dal.sync_user_external_id(user.id, new_external_id, scim_username=scim_username)
+    fields = _fields_from_resource(user_resource)
+    dal.sync_user_external_id(
+        user.id,
+        new_external_id,
+        scim_username=scim_username,
+        fields=fields,
+    )

    dal.commit()

-    return provider.build_user_resource(
-        user,
-        new_external_id,
-        groups=dal.get_user_groups(user.id),
-        scim_username=scim_username,
+    return _scim_resource_response(
+        provider.build_user_resource(
+            user,
+            new_external_id,
+            groups=dal.get_user_groups(user.id),
+            scim_username=scim_username,
+            fields=fields,
+        )
    )


@@ -332,7 +522,7 @@ def patch_user(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimUserResource | JSONResponse:
+) -> ScimUserResource | ScimJSONResponse:
    """Partially update a user (RFC 7644 §3.5.2).

    This is the primary endpoint for user deprovisioning — Okta sends
@@ -342,23 +532,25 @@ def patch_user(
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    user = result

    mapping = dal.get_user_mapping_by_user_id(user.id)
    external_id = mapping.external_id if mapping else None
    current_scim_username = mapping.scim_username if mapping else None
+    current_fields = _mapping_to_fields(mapping)

    current = provider.build_user_resource(
        user,
        external_id,
        groups=dal.get_user_groups(user.id),
        scim_username=current_scim_username,
+        fields=current_fields,
    )

    try:
-        patched = apply_user_patch(
+        patched, ent_data = apply_user_patch(
            patch_request.Operations, current, provider.ignored_patch_paths
        )
    except ScimPatchError as e:
@@ -393,17 +585,37 @@ def patch_user(
        personal_name=personal_name,
    )

+    # Build updated fields by merging PATCH enterprise data with current values
+    cf = current_fields or ScimMappingFields()
+    fields = ScimMappingFields(
+        department=ent_data.get("department", cf.department),
+        manager=ent_data.get("manager", cf.manager),
+        given_name=patched.name.givenName if patched.name else cf.given_name,
+        family_name=patched.name.familyName if patched.name else cf.family_name,
+        scim_emails_json=(
+            serialize_emails(patched.emails)
+            if patched.emails is not None
+            else cf.scim_emails_json
+        ),
+    )
+
    dal.sync_user_external_id(
-        user.id, patched.externalId, scim_username=new_scim_username
+        user.id,
+        patched.externalId,
+        scim_username=new_scim_username,
+        fields=fields,
    )

    dal.commit()

-    return provider.build_user_resource(
-        user,
-        patched.externalId,
-        groups=dal.get_user_groups(user.id),
-        scim_username=new_scim_username,
+    return _scim_resource_response(
+        provider.build_user_resource(
+            user,
+            patched.externalId,
+            groups=dal.get_user_groups(user.id),
+            scim_username=new_scim_username,
+            fields=fields,
+        )
    )


@@ -412,25 +624,29 @@ def delete_user(
    user_id: str,
    _token: ScimToken = Depends(verify_scim_token),
    db_session: Session = Depends(get_session),
-) -> Response | JSONResponse:
+) -> Response | ScimJSONResponse:
    """Delete a user (RFC 7644 §3.6).

    Deactivates the user and removes the SCIM mapping. Note that Okta
    typically uses PATCH active=false instead of DELETE.
+    A second DELETE returns 404 per RFC 7644 §3.6.
    """
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_user_or_404(user_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    user = result

-    dal.deactivate_user(user)
-
+    # If no SCIM mapping exists, the user was already deleted from
+    # SCIM's perspective — return 404 per RFC 7644 §3.6.
    mapping = dal.get_user_mapping_by_user_id(user.id)
-    if mapping:
-        dal.delete_user_mapping(mapping.id)
+    if not mapping:
+        return _scim_error_response(404, f"User {user_id} not found")
+
+    dal.deactivate_user(user)
+    dal.delete_user_mapping(mapping.id)

    dal.commit()

@@ -442,7 +658,7 @@ def delete_user(
 # ---------------------------------------------------------------------------


-def _fetch_group_or_404(group_id: str, dal: ScimDAL) -> UserGroup | JSONResponse:
+def _fetch_group_or_404(group_id: str, dal: ScimDAL) -> UserGroup | ScimJSONResponse:
    """Parse *group_id* as int, look up the group, or return a 404 error."""
    try:
        gid = int(group_id)
@@ -497,15 +713,17 @@ def _validate_and_parse_members(
@scim_router.get("/Groups", response_model=None)
 def list_groups(
    filter: str | None = Query(None),
+    excludedAttributes: str | None = None,
    startIndex: int = Query(1, ge=1),
    count: int = Query(100, ge=0, le=500),
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimListResponse | JSONResponse:
+) -> ScimListResponse | ScimJSONResponse:
    """List groups with optional SCIM filter and pagination."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
+    dal.commit()

    try:
        scim_filter = parse_scim_filter(filter)
@@ -522,37 +740,47 @@ def list_groups(
        for group, ext_id in groups_with_ext_ids
    ]

-    return ScimListResponse(
-        totalResults=total,
-        startIndex=startIndex,
-        itemsPerPage=count,
-        Resources=resources,
+    return _build_list_response(
+        resources,
+        total,
+        startIndex,
+        count,
+        excluded=_parse_excluded_attributes(excludedAttributes),
    )


@scim_router.get("/Groups/{group_id}", response_model=None)
 def get_group(
    group_id: str,
+    excludedAttributes: str | None = None,
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimGroupResource | JSONResponse:
+) -> ScimGroupResource | ScimJSONResponse:
    """Get a single group by ID."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
+    dal.commit()

    result = _fetch_group_or_404(group_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    group = result

    mapping = dal.get_group_mapping_by_group_id(group.id)
    members = dal.get_group_members(group.id)

-    return provider.build_group_resource(
+    resource = provider.build_group_resource(
        group, members, mapping.external_id if mapping else None
    )

+    # RFC 7644 §3.4.2.5 — IdP may request certain attributes be omitted
+    excluded = _parse_excluded_attributes(excludedAttributes)
+    if excluded:
+        return ScimJSONResponse(content=_apply_exclusions(resource, excluded))
+
+    return _scim_resource_response(resource)
+

@scim_router.post("/Groups", status_code=201, response_model=None)
 def create_group(
@@ -560,7 +788,7 @@ def create_group(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimGroupResource | JSONResponse:
+) -> ScimGroupResource | ScimJSONResponse:
    """Create a new group from a SCIM provisioning request."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)
@@ -596,7 +824,10 @@ def create_group(
    dal.commit()

    members = dal.get_group_members(db_group.id)
-    return provider.build_group_resource(db_group, members, external_id)
+    return _scim_resource_response(
+        provider.build_group_resource(db_group, members, external_id),
+        status_code=201,
+    )


@scim_router.put("/Groups/{group_id}", response_model=None)
@@ -606,13 +837,13 @@ def replace_group(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimGroupResource | JSONResponse:
+) -> ScimGroupResource | ScimJSONResponse:
    """Replace a group entirely (RFC 7644 §3.5.1)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    group = result

@@ -627,7 +858,9 @@ def replace_group(
    dal.commit()

    members = dal.get_group_members(group.id)
-    return provider.build_group_resource(group, members, group_resource.externalId)
+    return _scim_resource_response(
+        provider.build_group_resource(group, members, group_resource.externalId)
+    )


@scim_router.patch("/Groups/{group_id}", response_model=None)
@@ -637,7 +870,7 @@ def patch_group(
    _token: ScimToken = Depends(verify_scim_token),
    provider: ScimProvider = Depends(_get_provider),
    db_session: Session = Depends(get_session),
-) -> ScimGroupResource | JSONResponse:
+) -> ScimGroupResource | ScimJSONResponse:
    """Partially update a group (RFC 7644 §3.5.2).

    Handles member add/remove operations from Okta and Azure AD.
@@ -646,7 +879,7 @@ def patch_group(
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    group = result

@@ -685,7 +918,9 @@ def patch_group(
    dal.commit()

    members = dal.get_group_members(group.id)
-    return provider.build_group_resource(group, members, patched.externalId)
+    return _scim_resource_response(
+        provider.build_group_resource(group, members, patched.externalId)
+    )


@scim_router.delete("/Groups/{group_id}", status_code=204, response_model=None)
@@ -693,13 +928,13 @@ def delete_group(
    group_id: str,
    _token: ScimToken = Depends(verify_scim_token),
    db_session: Session = Depends(get_session),
-) -> Response | JSONResponse:
+) -> Response | ScimJSONResponse:
    """Delete a group (RFC 7644 §3.6)."""
    dal = ScimDAL(db_session)
    dal.update_token_last_used(_token.id)

    result = _fetch_group_or_404(group_id, dal)
-    if isinstance(result, JSONResponse):
+    if isinstance(result, ScimJSONResponse):
        return result
    group = result

--- a/backend/ee/onyx/server/scim/models.py
+++ b/backend/ee/onyx/server/scim/models.py
@@ -7,12 +7,14 @@ SCIM protocol schemas follow the wire format defined in:
 Admin API schemas are internal to Onyx and used for SCIM token management.
 """

+from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum

 from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
+from pydantic import field_validator


 # ---------------------------------------------------------------------------
@@ -31,6 +33,9 @@ SCIM_SERVICE_PROVIDER_CONFIG_SCHEMA = (
 )
 SCIM_RESOURCE_TYPE_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:ResourceType"
 SCIM_SCHEMA_SCHEMA = "urn:ietf:params:scim:schemas:core:2.0:Schema"
+SCIM_ENTERPRISE_USER_SCHEMA = (
+    "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
+)


 # ---------------------------------------------------------------------------
@@ -70,6 +75,36 @@ class ScimUserGroupRef(BaseModel):
    display: str | None = None


+class ScimManagerRef(BaseModel):
+    """Manager sub-attribute for the enterprise extension (RFC 7643 §4.3)."""
+
+    value: str | None = None
+
+
+class ScimEnterpriseExtension(BaseModel):
+    """Enterprise User extension attributes (RFC 7643 §4.3)."""
+
+    department: str | None = None
+    manager: ScimManagerRef | None = None
+
+
+@dataclass
+class ScimMappingFields:
+    """Stored SCIM mapping fields that need to round-trip through the IdP.
+
+    Entra ID sends structured name components, email metadata, and enterprise
+    extension attributes that must be returned verbatim in subsequent GET
+    responses. These fields are persisted on ScimUserMapping and threaded
+    through the DAL, provider, and endpoint layers.
+    """
+
+    department: str | None = None
+    manager: str | None = None
+    given_name: str | None = None
+    family_name: str | None = None
+    scim_emails_json: str | None = None
+
+
 class ScimUserResource(BaseModel):
    """SCIM User resource representation (RFC 7643 §4.1).

@@ -78,6 +113,8 @@ class ScimUserResource(BaseModel):
    to match the SCIM wire format (not Python convention).
    """

+    model_config = ConfigDict(populate_by_name=True)
+
    schemas: list[str] = Field(default_factory=lambda: [SCIM_USER_SCHEMA])
    id: str | None = None  # Onyx's internal user ID, set on responses
    externalId: str | None = None  # IdP's identifier for this user
@@ -88,6 +125,10 @@ class ScimUserResource(BaseModel):
    active: bool = True
    groups: list[ScimUserGroupRef] = Field(default_factory=list)
    meta: ScimMeta | None = None
+    enterprise_extension: ScimEnterpriseExtension | None = Field(
+        default=None,
+        alias="urn:ietf:params:scim:schemas:extension:enterprise:2.0:User",
+    )


 class ScimGroupMember(BaseModel):
@@ -165,6 +206,19 @@ class ScimPatchOperation(BaseModel):
    path: str | None = None
    value: ScimPatchValue = None

+    @field_validator("op", mode="before")
+    @classmethod
+    def normalize_operation(cls, v: object) -> object:
+        """Normalize op to lowercase for case-insensitive matching.
+
+        Some IdPs (e.g. Entra ID) send capitalized ops like ``"Replace"``
+        instead of ``"replace"``. This is safe for all providers since the
+        enum values are lowercase. If a future provider requires other
+        pre-processing quirks, move patch deserialization into the provider
+        subclass instead of adding more special cases here.
+        """
+        return v.lower() if isinstance(v, str) else v
+

 class ScimPatchRequest(BaseModel):
    """PATCH request body (RFC 7644 §3.5.2).
--- a/backend/ee/onyx/server/scim/patch.py
+++ b/backend/ee/onyx/server/scim/patch.py
@@ -14,8 +14,13 @@ responsible for persisting changes.

 from __future__ import annotations

+import logging
 import re
+from dataclasses import dataclass
+from dataclasses import field
+from typing import Any

+from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
 from ee.onyx.server.scim.models import ScimGroupMember
 from ee.onyx.server.scim.models import ScimGroupResource
 from ee.onyx.server.scim.models import ScimPatchOperation
@@ -24,6 +29,55 @@ from ee.onyx.server.scim.models import ScimPatchResourceValue
 from ee.onyx.server.scim.models import ScimPatchValue
 from ee.onyx.server.scim.models import ScimUserResource

+logger = logging.getLogger(__name__)
+
+# Lowercased enterprise extension URN for case-insensitive matching
+_ENTERPRISE_URN_LOWER = SCIM_ENTERPRISE_USER_SCHEMA.lower()
+
+# Pattern for email filter paths, e.g.:
+#   emails[primary eq true].value  (Okta)
+#   emails[type eq "work"].value   (Azure AD / Entra ID)
+_EMAIL_FILTER_RE = re.compile(
+    r"^emails\[.+\]\.value$",
+    re.IGNORECASE,
+)
+
+# Pattern for member removal path: members[value eq "user-id"]
+_MEMBER_FILTER_RE = re.compile(
+    r'^members\[value\s+eq\s+"([^"]+)"\]$',
+    re.IGNORECASE,
+)
+
+# ---------------------------------------------------------------------------
+# Dispatch tables for user PATCH paths
+#
+# Maps lowercased SCIM path → (camelCase key, target dict name).
+# "data" writes to the top-level resource dict, "name" writes to the
+# name sub-object dict. This replaces the elif chains for simple fields.
+# ---------------------------------------------------------------------------
+
+_USER_REPLACE_PATHS: dict[str, tuple[str, str]] = {
+    "active": ("active", "data"),
+    "username": ("userName", "data"),
+    "externalid": ("externalId", "data"),
+    "name.givenname": ("givenName", "name"),
+    "name.familyname": ("familyName", "name"),
+    "name.formatted": ("formatted", "name"),
+}
+
+_USER_REMOVE_PATHS: dict[str, tuple[str, str]] = {
+    "externalid": ("externalId", "data"),
+    "name.givenname": ("givenName", "name"),
+    "name.familyname": ("familyName", "name"),
+    "name.formatted": ("formatted", "name"),
+    "displayname": ("displayName", "data"),
+}
+
+_GROUP_REPLACE_PATHS: dict[str, tuple[str, str]] = {
+    "displayname": ("displayName", "data"),
+    "externalid": ("externalId", "data"),
+}
+

 class ScimPatchError(Exception):
    """Raised when a PATCH operation cannot be applied."""
@@ -34,18 +88,25 @@ class ScimPatchError(Exception):
        super().__init__(detail)


-# Pattern for member removal path: members[value eq "user-id"]
-_MEMBER_FILTER_RE = re.compile(
-    r'^members\[value\s+eq\s+"([^"]+)"\]$',
-    re.IGNORECASE,
-)
+@dataclass
+class _UserPatchCtx:
+    """Bundles the mutable state for user PATCH operations."""
+
+    data: dict[str, Any]
+    name_data: dict[str, Any]
+    ent_data: dict[str, str | None] = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# User PATCH
+# ---------------------------------------------------------------------------


 def apply_user_patch(
    operations: list[ScimPatchOperation],
    current: ScimUserResource,
    ignored_paths: frozenset[str] = frozenset(),
-) -> ScimUserResource:
+) -> tuple[ScimUserResource, dict[str, str | None]]:
    """Apply SCIM PATCH operations to a user resource.

    Args:
@@ -53,79 +114,185 @@ def apply_user_patch(
        current: The current user resource state.
        ignored_paths: SCIM attribute paths to silently skip (from provider).

-    Returns a new ``ScimUserResource`` with the modifications applied.
-    The original object is not mutated.
+    Returns:
+        A tuple of (modified user resource, enterprise extension data dict).
+        The enterprise dict has keys ``"department"`` and ``"manager"``
+        with values set only when a PATCH operation touched them.

    Raises:
        ScimPatchError: If an operation targets an unsupported path.
    """
    data = current.model_dump()
-    name_data = data.get("name") or {}
+    ctx = _UserPatchCtx(data=data, name_data=data.get("name") or {})

    for op in operations:
-        if op.op == ScimPatchOperationType.REPLACE:
-            _apply_user_replace(op, data, name_data, ignored_paths)
-        elif op.op == ScimPatchOperationType.ADD:
-            _apply_user_replace(op, data, name_data, ignored_paths)
+        if op.op in (ScimPatchOperationType.REPLACE, ScimPatchOperationType.ADD):
+            _apply_user_replace(op, ctx, ignored_paths)
+        elif op.op == ScimPatchOperationType.REMOVE:
+            _apply_user_remove(op, ctx, ignored_paths)
        else:
            raise ScimPatchError(
                f"Unsupported operation '{op.op.value}' on User resource"
            )

-    data["name"] = name_data
-    return ScimUserResource.model_validate(data)
+    ctx.data["name"] = ctx.name_data
+    return ScimUserResource.model_validate(ctx.data), ctx.ent_data


 def _apply_user_replace(
    op: ScimPatchOperation,
-    data: dict,
-    name_data: dict,
+    ctx: _UserPatchCtx,
    ignored_paths: frozenset[str],
 ) -> None:
    """Apply a replace/add operation to user data."""
    path = (op.path or "").lower()

    if not path:
-        # No path — value is a resource dict of top-level attributes to set
+        # No path — value is a resource dict of top-level attributes to set.
        if isinstance(op.value, ScimPatchResourceValue):
            for key, val in op.value.model_dump(exclude_unset=True).items():
-                _set_user_field(key.lower(), val, data, name_data, ignored_paths)
+                _set_user_field(key.lower(), val, ctx, ignored_paths, strict=False)
        else:
            raise ScimPatchError("Replace without path requires a dict value")
        return

-    _set_user_field(path, op.value, data, name_data, ignored_paths)
+    _set_user_field(path, op.value, ctx, ignored_paths)
+
+
+def _apply_user_remove(
+    op: ScimPatchOperation,
+    ctx: _UserPatchCtx,
+    ignored_paths: frozenset[str],
+) -> None:
+    """Apply a remove operation to user data — clears the target field."""
+    path = (op.path or "").lower()
+    if not path:
+        raise ScimPatchError("Remove operation requires a path")
+
+    if path in ignored_paths:
+        return
+
+    entry = _USER_REMOVE_PATHS.get(path)
+    if entry:
+        key, target = entry
+        target_dict = ctx.data if target == "data" else ctx.name_data
+        target_dict[key] = None
+        return
+
+    raise ScimPatchError(f"Unsupported remove path '{path}' for User PATCH")


 def _set_user_field(
    path: str,
    value: ScimPatchValue,
-    data: dict,
-    name_data: dict,
+    ctx: _UserPatchCtx,
    ignored_paths: frozenset[str],
+    *,
+    strict: bool = True,
 ) -> None:
-    """Set a single field on user data by SCIM path."""
+    """Set a single field on user data by SCIM path.
+
+    Args:
+        strict: When ``False`` (path-less replace), unknown attributes are
+            silently skipped.  When ``True`` (explicit path), they raise.
+    """
    if path in ignored_paths:
        return
-    elif path == "active":
-        data["active"] = value
-    elif path == "username":
-        data["userName"] = value
-    elif path == "externalid":
-        data["externalId"] = value
-    elif path == "name.givenname":
-        name_data["givenName"] = value
-    elif path == "name.familyname":
-        name_data["familyName"] = value
-    elif path == "name.formatted":
-        name_data["formatted"] = value
-    elif path == "displayname":
-        data["displayName"] = value
-        name_data["formatted"] = value
+
+    # Simple field writes handled by the dispatch table
+    entry = _USER_REPLACE_PATHS.get(path)
+    if entry:
+        key, target = entry
+        target_dict = ctx.data if target == "data" else ctx.name_data
+        target_dict[key] = value
+        return
+
+    # displayName sets both the top-level field and the name.formatted sub-field
+    if path == "displayname":
+        ctx.data["displayName"] = value
+        ctx.name_data["formatted"] = value
+    elif path == "name":
+        if isinstance(value, dict):
+            for k, v in value.items():
+                ctx.name_data[k] = v
+    elif path == "emails":
+        if isinstance(value, list):
+            ctx.data["emails"] = value
+    elif _EMAIL_FILTER_RE.match(path):
+        _update_primary_email(ctx.data, value)
+    elif path.startswith(_ENTERPRISE_URN_LOWER):
+        _set_enterprise_field(path, value, ctx.ent_data)
+    elif not strict:
+        return
    else:
        raise ScimPatchError(f"Unsupported path '{path}' for User PATCH")


+def _update_primary_email(data: dict[str, Any], value: ScimPatchValue) -> None:
+    """Update the primary email entry via an email filter path."""
+    emails: list[dict] = data.get("emails") or []
+    for email_entry in emails:
+        if email_entry.get("primary"):
+            email_entry["value"] = value
+            break
+    else:
+        emails.append({"value": value, "type": "work", "primary": True})
+    data["emails"] = emails
+
+
+def _to_dict(value: ScimPatchValue) -> dict | None:
+    """Coerce a SCIM patch value to a plain dict if possible.
+
+    Pydantic may parse raw dicts as ``ScimPatchResourceValue`` (which uses
+    ``extra="allow"``), so we also dump those back to a dict.
+    """
+    if isinstance(value, dict):
+        return value
+    if isinstance(value, ScimPatchResourceValue):
+        return value.model_dump(exclude_unset=True)
+    return None
+
+
+def _set_enterprise_field(
+    path: str,
+    value: ScimPatchValue,
+    ent_data: dict[str, str | None],
+) -> None:
+    """Handle enterprise extension URN paths or value dicts."""
+    # Full URN as key with dict value (path-less PATCH)
+    # e.g. key="urn:...:user", value={"department": "Eng", "manager": {...}}
+    if path == _ENTERPRISE_URN_LOWER:
+        d = _to_dict(value)
+        if d is not None:
+            if "department" in d:
+                ent_data["department"] = d["department"]
+            if "manager" in d:
+                mgr = d["manager"]
+                if isinstance(mgr, dict):
+                    ent_data["manager"] = mgr.get("value")
+        return
+
+    # Dotted URN path, e.g. "urn:...:user:department"
+    suffix = path[len(_ENTERPRISE_URN_LOWER) :].lstrip(":").lower()
+    if suffix == "department":
+        ent_data["department"] = str(value) if value is not None else None
+    elif suffix == "manager":
+        d = _to_dict(value)
+        if d is not None:
+            ent_data["manager"] = d.get("value")
+        elif isinstance(value, str):
+            ent_data["manager"] = value
+    else:
+        # Unknown enterprise attributes are silently ignored rather than
+        # rejected — IdPs may send attributes we don't model yet.
+        logger.warning("Ignoring unknown enterprise extension attribute '%s'", suffix)
+
+
+# ---------------------------------------------------------------------------
+# Group PATCH
+# ---------------------------------------------------------------------------
+
+
 def apply_group_patch(
    operations: list[ScimPatchOperation],
    current: ScimGroupResource,
@@ -235,12 +402,14 @@ def _set_group_field(
    """Set a single field on group data by SCIM path."""
    if path in ignored_paths:
        return
-    elif path == "displayname":
-        data["displayName"] = value
-    elif path == "externalid":
-        data["externalId"] = value
-    else:
-        raise ScimPatchError(f"Unsupported path '{path}' for Group PATCH")
+
+    entry = _GROUP_REPLACE_PATHS.get(path)
+    if entry:
+        key, _ = entry
+        data[key] = value
+        return
+
+    raise ScimPatchError(f"Unsupported path '{path}' for Group PATCH")


 def _apply_group_add(
--- a/backend/ee/onyx/server/scim/providers/base.py
+++ b/backend/ee/onyx/server/scim/providers/base.py
@@ -2,13 +2,22 @@

 from __future__ import annotations

+import json
+import logging
 from abc import ABC
 from abc import abstractmethod
 from uuid import UUID

+from pydantic import ValidationError
+
+from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
+from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
 from ee.onyx.server.scim.models import ScimEmail
+from ee.onyx.server.scim.models import ScimEnterpriseExtension
 from ee.onyx.server.scim.models import ScimGroupMember
 from ee.onyx.server.scim.models import ScimGroupResource
+from ee.onyx.server.scim.models import ScimManagerRef
+from ee.onyx.server.scim.models import ScimMappingFields
 from ee.onyx.server.scim.models import ScimMeta
 from ee.onyx.server.scim.models import ScimName
 from ee.onyx.server.scim.models import ScimUserGroupRef
@@ -17,6 +26,17 @@ from onyx.db.models import User
 from onyx.db.models import UserGroup


+logger = logging.getLogger(__name__)
+
+COMMON_IGNORED_PATCH_PATHS: frozenset[str] = frozenset(
+    {
+        "id",
+        "schemas",
+        "meta",
+    }
+)
+
+
 class ScimProvider(ABC):
    """Base class for provider-specific SCIM behavior.

@@ -41,12 +61,22 @@ class ScimProvider(ABC):
        """
        ...

+    @property
+    def user_schemas(self) -> list[str]:
+        """Schema URIs to include in User resource responses.
+
+        Override in subclasses to advertise additional schemas (e.g. the
+        enterprise extension for Entra ID).
+        """
+        return [SCIM_USER_SCHEMA]
+
    def build_user_resource(
        self,
        user: User,
        external_id: str | None = None,
        groups: list[tuple[int, str]] | None = None,
        scim_username: str | None = None,
+        fields: ScimMappingFields | None = None,
    ) -> ScimUserResource:
        """Build a SCIM User response from an Onyx User.

@@ -58,27 +88,48 @@ class ScimProvider(ABC):
                for newly-created users.
            scim_username: The original-case userName from the IdP. Falls
                back to ``user.email`` (lowercase) when not available.
+            fields: Stored mapping fields that the IdP expects round-tripped.
        """
+        f = fields or ScimMappingFields()
        group_refs = [
            ScimUserGroupRef(value=str(gid), display=gname)
            for gid, gname in (groups or [])
        ]

-        # Use original-case userName if stored, otherwise fall back to the
-        # lowercased email from the User model.
        username = scim_username or user.email

-        return ScimUserResource(
+        # Build enterprise extension when at least one value is present.
+        # Dynamically add the enterprise URN to schemas per RFC 7643 §3.0.
+        enterprise_ext: ScimEnterpriseExtension | None = None
+        schemas = list(self.user_schemas)
+        if f.department is not None or f.manager is not None:
+            manager_ref = (
+                ScimManagerRef(value=f.manager) if f.manager is not None else None
+            )
+            enterprise_ext = ScimEnterpriseExtension(
+                department=f.department,
+                manager=manager_ref,
+            )
+            if SCIM_ENTERPRISE_USER_SCHEMA not in schemas:
+                schemas.append(SCIM_ENTERPRISE_USER_SCHEMA)
+
+        name = self.build_scim_name(user, f)
+        emails = _deserialize_emails(f.scim_emails_json, username)
+
+        resource = ScimUserResource(
+            schemas=schemas,
            id=str(user.id),
            externalId=external_id,
            userName=username,
-            name=self._build_scim_name(user),
+            name=name,
            displayName=user.personal_name,
-            emails=[ScimEmail(value=username, type="work", primary=True)],
+            emails=emails,
            active=user.is_active,
            groups=group_refs,
            meta=ScimMeta(resourceType="User"),
        )
+        resource.enterprise_extension = enterprise_ext
+        return resource

    def build_group_resource(
        self,
@@ -98,9 +149,24 @@ class ScimProvider(ABC):
            meta=ScimMeta(resourceType="Group"),
        )

-    @staticmethod
-    def _build_scim_name(user: User) -> ScimName | None:
-        """Extract SCIM name components from a user's personal name."""
+    def build_scim_name(
+        self,
+        user: User,
+        fields: ScimMappingFields,
+    ) -> ScimName | None:
+        """Build SCIM name components for the response.
+
+        Round-trips stored ``given_name``/``family_name`` when available (so
+        the IdP gets back what it sent). Falls back to splitting
+        ``personal_name`` for users provisioned before we stored components.
+        Providers may override for custom behavior.
+        """
+        if fields.given_name is not None or fields.family_name is not None:
+            return ScimName(
+                givenName=fields.given_name,
+                familyName=fields.family_name,
+                formatted=user.personal_name,
+            )
        if not user.personal_name:
            return None
        parts = user.personal_name.split(" ", 1)
@@ -111,6 +177,27 @@ class ScimProvider(ABC):
        )


+def _deserialize_emails(stored_json: str | None, username: str) -> list[ScimEmail]:
+    """Deserialize stored email entries or build a default work email."""
+    if stored_json:
+        try:
+            entries = json.loads(stored_json)
+            if isinstance(entries, list) and entries:
+                return [ScimEmail(**e) for e in entries]
+        except (json.JSONDecodeError, TypeError, ValidationError):
+            logger.warning(
+                "Corrupt scim_emails_json, falling back to default: %s", stored_json
+            )
+    return [ScimEmail(value=username, type="work", primary=True)]
+
+
+def serialize_emails(emails: list[ScimEmail]) -> str | None:
+    """Serialize SCIM email entries to JSON for storage."""
+    if not emails:
+        return None
+    return json.dumps([e.model_dump(exclude_none=True) for e in emails])
+
+
 def get_default_provider() -> ScimProvider:
    """Return the default SCIM provider.

--- a/backend/ee/onyx/server/scim/providers/entra.py
+++ b/backend/ee/onyx/server/scim/providers/entra.py
@@ -0,0 +1,36 @@
+"""Entra ID (Azure AD) SCIM provider."""
+
+from __future__ import annotations
+
+from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
+from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
+from ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS
+from ee.onyx.server.scim.providers.base import ScimProvider
+
+_ENTRA_IGNORED_PATCH_PATHS = COMMON_IGNORED_PATCH_PATHS
+
+
+class EntraProvider(ScimProvider):
+    """Entra ID (Azure AD) SCIM provider.
+
+    Entra behavioral notes:
+      - Sends capitalized PATCH ops (``"Add"``, ``"Replace"``, ``"Remove"``)
+        — handled by ``ScimPatchOperation.normalize_op`` validator.
+      - Sends the enterprise extension URN as a key in path-less PATCH value
+        dicts — handled by ``_set_enterprise_field`` in ``patch.py`` to
+        store department/manager values.
+      - Expects the enterprise extension schema in ``schemas`` arrays and
+        ``/Schemas`` + ``/ResourceTypes`` discovery endpoints.
+    """
+
+    @property
+    def name(self) -> str:
+        return "entra"
+
+    @property
+    def ignored_patch_paths(self) -> frozenset[str]:
+        return _ENTRA_IGNORED_PATCH_PATHS
+
+    @property
+    def user_schemas(self) -> list[str]:
+        return [SCIM_USER_SCHEMA, SCIM_ENTERPRISE_USER_SCHEMA]
--- a/backend/ee/onyx/server/scim/providers/okta.py
+++ b/backend/ee/onyx/server/scim/providers/okta.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+from ee.onyx.server.scim.providers.base import COMMON_IGNORED_PATCH_PATHS
 from ee.onyx.server.scim.providers.base import ScimProvider


@@ -22,4 +23,4 @@ class OktaProvider(ScimProvider):

    @property
    def ignored_patch_paths(self) -> frozenset[str]:
-        return frozenset({"id", "schemas", "meta"})
+        return COMMON_IGNORED_PATCH_PATHS
--- a/backend/ee/onyx/server/scim/schema_definitions.py
+++ b/backend/ee/onyx/server/scim/schema_definitions.py
@@ -4,6 +4,7 @@ Pre-built at import time — these never change at runtime. Separated from
 api.py to keep the endpoint module focused on request handling.
 """

+from ee.onyx.server.scim.models import SCIM_ENTERPRISE_USER_SCHEMA
 from ee.onyx.server.scim.models import SCIM_GROUP_SCHEMA
 from ee.onyx.server.scim.models import SCIM_USER_SCHEMA
 from ee.onyx.server.scim.models import ScimResourceType
@@ -20,6 +21,9 @@ USER_RESOURCE_TYPE = ScimResourceType.model_validate(
        "endpoint": "/scim/v2/Users",
        "description": "SCIM User resource",
        "schema": SCIM_USER_SCHEMA,
+        "schemaExtensions": [
+            {"schema": SCIM_ENTERPRISE_USER_SCHEMA, "required": False}
+        ],
    }
 )

@@ -104,6 +108,31 @@ USER_SCHEMA_DEF = ScimSchemaDefinition(
    ],
 )

+ENTERPRISE_USER_SCHEMA_DEF = ScimSchemaDefinition(
+    id=SCIM_ENTERPRISE_USER_SCHEMA,
+    name="EnterpriseUser",
+    description="Enterprise User extension (RFC 7643 §4.3)",
+    attributes=[
+        ScimSchemaAttribute(
+            name="department",
+            type="string",
+            description="Department.",
+        ),
+        ScimSchemaAttribute(
+            name="manager",
+            type="complex",
+            description="The user's manager.",
+            subAttributes=[
+                ScimSchemaAttribute(
+                    name="value",
+                    type="string",
+                    description="Manager user ID.",
+                ),
+            ],
+        ),
+    ],
+)
+
 GROUP_SCHEMA_DEF = ScimSchemaDefinition(
    id=SCIM_GROUP_SCHEMA,
    name="Group",
--- a/backend/onyx/auth/oauth_token_manager.py
+++ b/backend/onyx/auth/oauth_token_manager.py
@@ -58,16 +58,27 @@ class OAuthTokenManager:
        if not user_token.token_data:
            raise ValueError("No token data available for refresh")

+        if (
+            self.oauth_config.client_id is None
+            or self.oauth_config.client_secret is None
+        ):
+            raise ValueError(
+                "OAuth client_id and client_secret are required for token refresh"
+            )
+
        token_data = self._unwrap_token_data(user_token.token_data)

+        data: dict[str, str] = {
+            "grant_type": "refresh_token",
+            "refresh_token": token_data["refresh_token"],
+            "client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
+            "client_secret": self._unwrap_sensitive_str(
+                self.oauth_config.client_secret
+            ),
+        }
        response = requests.post(
            self.oauth_config.token_url,
-            data={
-                "grant_type": "refresh_token",
-                "refresh_token": token_data["refresh_token"],
-                "client_id": self.oauth_config.client_id,
-                "client_secret": self.oauth_config.client_secret,
-            },
+            data=data,
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
@@ -115,15 +126,26 @@ class OAuthTokenManager:

    def exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:
        """Exchange authorization code for access token"""
+        if (
+            self.oauth_config.client_id is None
+            or self.oauth_config.client_secret is None
+        ):
+            raise ValueError(
+                "OAuth client_id and client_secret are required for code exchange"
+            )
+
+        data: dict[str, str] = {
+            "grant_type": "authorization_code",
+            "code": code,
+            "client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
+            "client_secret": self._unwrap_sensitive_str(
+                self.oauth_config.client_secret
+            ),
+            "redirect_uri": redirect_uri,
+        }
        response = requests.post(
            self.oauth_config.token_url,
-            data={
-                "grant_type": "authorization_code",
-                "code": code,
-                "client_id": self.oauth_config.client_id,
-                "client_secret": self.oauth_config.client_secret,
-                "redirect_uri": redirect_uri,
-            },
+            data=data,
            headers={"Accept": "application/json"},
        )
        response.raise_for_status()
@@ -141,8 +163,13 @@ class OAuthTokenManager:
        oauth_config: OAuthConfig, redirect_uri: str, state: str
    ) -> str:
        """Build OAuth authorization URL"""
+        if oauth_config.client_id is None:
+            raise ValueError("OAuth client_id is required to build authorization URL")
+
        params: dict[str, Any] = {
-            "client_id": oauth_config.client_id,
+            "client_id": OAuthTokenManager._unwrap_sensitive_str(
+                oauth_config.client_id
+            ),
            "redirect_uri": redirect_uri,
            "response_type": "code",
            "state": state,
@@ -161,6 +188,12 @@ class OAuthTokenManager:

        return f"{oauth_config.authorization_url}{separator}{urlencode(params)}"

+    @staticmethod
+    def _unwrap_sensitive_str(value: SensitiveValue[str] | str) -> str:
+        if isinstance(value, SensitiveValue):
+            return value.get_value(apply_mask=False)
+        return value
+
    @staticmethod
    def _unwrap_token_data(
        token_data: SensitiveValue[dict[str, Any]] | dict[str, Any],
--- a/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
@@ -48,6 +48,7 @@ from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
 )
 from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
+from onyx.indexing.models import IndexingSetting
 from onyx.redis.redis_pool import get_redis_client
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
@@ -149,8 +150,12 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            try_insert_opensearch_tenant_migration_record_with_commit(db_session)
            search_settings = get_current_search_settings(db_session)
            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
+            indexing_setting = IndexingSetting.from_db_model(search_settings)
            opensearch_document_index = OpenSearchDocumentIndex(
-                index_name=search_settings.index_name, tenant_state=tenant_state
+                tenant_state=tenant_state,
+                index_name=search_settings.index_name,
+                embedding_dim=indexing_setting.final_embedding_dim,
+                embedding_precision=indexing_setting.embedding_precision,
            )
            vespa_document_index = VespaDocumentIndex(
                index_name=search_settings.index_name,
--- a/backend/onyx/background/celery/tasks/opensearch_migration/transformer.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/transformer.py
@@ -22,6 +22,7 @@ from onyx.document_index.vespa_constants import HIDDEN
 from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
 from onyx.document_index.vespa_constants import METADATA_LIST
 from onyx.document_index.vespa_constants import METADATA_SUFFIX
+from onyx.document_index.vespa_constants import PERSONAS
 from onyx.document_index.vespa_constants import PRIMARY_OWNERS
 from onyx.document_index.vespa_constants import SECONDARY_OWNERS
 from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
@@ -58,6 +59,7 @@ FIELDS_NEEDED_FOR_TRANSFORMATION: list[str] = [
    METADATA_SUFFIX,
    DOCUMENT_SETS,
    USER_PROJECT,
+    PERSONAS,
    PRIMARY_OWNERS,
    SECONDARY_OWNERS,
    ACCESS_CONTROL_LIST,
@@ -276,6 +278,7 @@ def transform_vespa_chunks_to_opensearch_chunks(
                )
            )
            user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)
+            personas: list[int] | None = vespa_chunk.get(PERSONAS)
            primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)
            secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)

@@ -325,6 +328,7 @@ def transform_vespa_chunks_to_opensearch_chunks(
                metadata_suffix=metadata_suffix,
                document_sets=document_sets,
                user_projects=user_projects,
+                personas=personas,
                primary_owners=primary_owners,
                secondary_owners=secondary_owners,
                tenant_id=tenant_state,
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -12,6 +12,7 @@ from redis import Redis
 from redis.lock import Lock as RedisLock
 from retry import retry
 from sqlalchemy import select
+from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
@@ -75,7 +76,7 @@ def _user_file_queued_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"


-def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
+def user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"


@@ -712,7 +713,10 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
                db_session.execute(
                    select(UserFile.id).where(
                        sa.and_(
-                            UserFile.needs_project_sync.is_(True),
+                            sa.or_(
+                                UserFile.needs_project_sync.is_(True),
+                                UserFile.needs_persona_sync.is_(True),
+                            ),
                            UserFile.status == UserFileStatus.COMPLETED,
                        )
                    )
@@ -760,7 +764,7 @@ def process_single_user_file_project_sync(
    redis_client.delete(_user_file_project_sync_queued_key(user_file_id))

    file_lock: RedisLock = redis_client.lock(
-        _user_file_project_sync_lock_key(user_file_id),
+        user_file_project_sync_lock_key(user_file_id),
        timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
    )

@@ -772,7 +776,11 @@ def process_single_user_file_project_sync(

    try:
        with get_session_with_current_tenant() as db_session:
-            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
+            user_file = db_session.execute(
+                select(UserFile)
+                .where(UserFile.id == _as_uuid(user_file_id))
+                .options(selectinload(UserFile.assistants))
+            ).scalar_one_or_none()
            if not user_file:
                task_logger.info(
                    f"process_single_user_file_project_sync - User file not found id={user_file_id}"
@@ -800,13 +808,17 @@ def process_single_user_file_project_sync(
                ]

                project_ids = [project.id for project in user_file.projects]
+                persona_ids = [p.id for p in user_file.assistants if not p.deleted]
                for retry_document_index in retry_document_indices:
                    retry_document_index.update_single(
                        doc_id=str(user_file.id),
                        tenant_id=tenant_id,
                        chunk_count=user_file.chunk_count,
                        fields=None,
-                        user_fields=VespaDocumentUserFields(user_projects=project_ids),
+                        user_fields=VespaDocumentUserFields(
+                            user_projects=project_ids,
+                            personas=persona_ids,
+                        ),
                    )

            task_logger.info(
@@ -814,6 +826,7 @@ def process_single_user_file_project_sync(
            )

            user_file.needs_project_sync = False
+            user_file.needs_persona_sync = False
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
--- a/backend/onyx/chat/chat_state.py
+++ b/backend/onyx/chat/chat_state.py
@@ -3,7 +3,6 @@ import time
 from collections.abc import Callable
 from collections.abc import Generator
 from queue import Empty
-from typing import Any

 from onyx.chat.citation_processor import CitationMapping
 from onyx.chat.emitter import Emitter
@@ -163,13 +162,11 @@ class ChatStateContainer:


 def run_chat_loop_with_state_containers(
-    func: Callable[..., None],
+    chat_loop_func: Callable[[Emitter, ChatStateContainer], None],
    completion_callback: Callable[[ChatStateContainer], None],
    is_connected: Callable[[], bool],
    emitter: Emitter,
    state_container: ChatStateContainer,
-    *args: Any,
-    **kwargs: Any,
 ) -> Generator[Packet, None]:
    """
    Explicit wrapper function that runs a function in a background thread
@@ -180,19 +177,18 @@ def run_chat_loop_with_state_containers(

    Args:
        func: The function to wrap (should accept emitter and state_container as first and second args)
+        completion_callback: Callback function to call when the function completes
        emitter: Emitter instance for sending packets
        state_container: ChatStateContainer instance for accumulating state
        is_connected: Callable that returns False when stop signal is set
-        *args: Additional positional arguments for func
-        **kwargs: Additional keyword arguments for func

    Usage:
        packets = run_chat_loop_with_state_containers(
            my_func,
+            completion_callback=completion_callback,
            emitter=emitter,
            state_container=state_container,
            is_connected=check_func,
-            arg1, arg2, kwarg1=value1
        )
        for packet in packets:
            # Process packets
@@ -201,9 +197,7 @@ def run_chat_loop_with_state_containers(

    def run_with_exception_capture() -> None:
        try:
-            # Ensure state_container is passed explicitly, removing it from kwargs if present
-            kwargs_with_state = {**kwargs, "state_container": state_container}
-            func(emitter, *args, **kwargs_with_state)
+            chat_loop_func(emitter, state_container)
        except Exception as e:
            # If execution fails, emit an exception packet
            emitter.emit(
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -461,7 +461,7 @@ def _build_tool_call_response_history_message(
 def convert_chat_history(
    chat_history: list[ChatMessage],
    files: list[ChatLoadedFile],
-    project_image_files: list[ChatLoadedFile],
+    context_image_files: list[ChatLoadedFile],
    additional_context: str | None,
    token_counter: Callable[[str], int],
    tool_id_to_name_map: dict[int, str],
@@ -541,11 +541,11 @@ def convert_chat_history(
            )

            # Add the user message with image files attached
-            # If this is the last USER message, also include project_image_files
-            # Note: project image file tokens are NOT counted in the token count
+            # If this is the last USER message, also include context_image_files
+            # Note: context image file tokens are NOT counted in the token count
            if idx == last_user_message_idx:
-                if project_image_files:
-                    image_files.extend(project_image_files)
+                if context_image_files:
+                    image_files.extend(context_image_files)

                if additional_context:
                    simple_messages.append(
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -15,10 +15,10 @@ from onyx.chat.emitter import Emitter
 from onyx.chat.llm_step import extract_tool_calls_from_response_text
 from onyx.chat.llm_step import run_llm_step
 from onyx.chat.models import ChatMessageSimple
-from onyx.chat.models import ExtractedProjectFiles
+from onyx.chat.models import ContextFileMetadata
+from onyx.chat.models import ExtractedContextFiles
 from onyx.chat.models import FileToolMetadata
 from onyx.chat.models import LlmStepResult
-from onyx.chat.models import ProjectFileMetadata
 from onyx.chat.models import ToolCallSimple
 from onyx.chat.prompt_utils import build_reminder_message
 from onyx.chat.prompt_utils import build_system_prompt
@@ -203,17 +203,17 @@ def _try_fallback_tool_extraction(
 MAX_LLM_CYCLES = 6


-def _build_project_file_citation_mapping(
-    project_file_metadata: list[ProjectFileMetadata],
+def _build_context_file_citation_mapping(
+    file_metadata: list[ContextFileMetadata],
    starting_citation_num: int = 1,
 ) -> CitationMapping:
-    """Build citation mapping for project files.
+    """Build citation mapping for context files.

-    Converts project file metadata into SearchDoc objects that can be cited.
+    Converts context file metadata into SearchDoc objects that can be cited.
    Citation numbers start from the provided starting number.

    Args:
-        project_file_metadata: List of project file metadata
+        file_metadata: List of context file metadata
        starting_citation_num: Starting citation number (default: 1)

    Returns:
@@ -221,8 +221,7 @@ def _build_project_file_citation_mapping(
    """
    citation_mapping: CitationMapping = {}

-    for idx, file_meta in enumerate(project_file_metadata, start=starting_citation_num):
-        # Create a SearchDoc for each project file
+    for idx, file_meta in enumerate(file_metadata, start=starting_citation_num):
        search_doc = SearchDoc(
            document_id=file_meta.file_id,
            chunk_ind=0,
@@ -242,29 +241,28 @@ def _build_project_file_citation_mapping(


 def _build_project_message(
-    project_files: ExtractedProjectFiles | None,
+    context_files: ExtractedContextFiles | None,
    token_counter: Callable[[str], int] | None,
 ) -> list[ChatMessageSimple]:
-    """Build messages for project / tool-backed files.
+    """Build messages for context-injected / tool-backed files.

    Returns up to two messages:
-    1. The full-text project files message (if project_file_texts is populated).
+    1. The full-text files message (if file_texts is populated).
    2. A lightweight metadata message for files the LLM should access via the
-       FileReaderTool (e.g. oversized chat-attached files or project files that
-       don't fit in context).
+       FileReaderTool (e.g. oversized files that don't fit in context).
    """
-    if not project_files:
+    if not context_files:
        return []

    messages: list[ChatMessageSimple] = []
-    if project_files.project_file_texts:
+    if context_files.file_texts:
        messages.append(
-            _create_project_files_message(project_files, token_counter=None)
+            _create_context_files_message(context_files, token_counter=None)
        )
-    if project_files.file_metadata_for_tool and token_counter:
+    if context_files.file_metadata_for_tool and token_counter:
        messages.append(
            _create_file_tool_metadata_message(
-                project_files.file_metadata_for_tool, token_counter
+                context_files.file_metadata_for_tool, token_counter
            )
        )
    return messages
@@ -275,7 +273,7 @@ def construct_message_history(
    custom_agent_prompt: ChatMessageSimple | None,
    simple_chat_history: list[ChatMessageSimple],
    reminder_message: ChatMessageSimple | None,
-    project_files: ExtractedProjectFiles | None,
+    context_files: ExtractedContextFiles | None,
    available_tokens: int,
    last_n_user_messages: int | None = None,
    token_counter: Callable[[str], int] | None = None,
@@ -289,7 +287,7 @@ def construct_message_history(

    # Build the project / file-metadata messages up front so we can use their
    # actual token counts for the budget.
-    project_messages = _build_project_message(project_files, token_counter)
+    project_messages = _build_project_message(context_files, token_counter)
    project_messages_tokens = sum(m.token_count for m in project_messages)

    history_token_budget = available_tokens
@@ -445,17 +443,17 @@ def construct_message_history(
                    )

    # Attach project images to the last user message
-    if project_files and project_files.project_image_files:
+    if context_files and context_files.image_files:
        existing_images = last_user_message.image_files or []
        last_user_message = ChatMessageSimple(
            message=last_user_message.message,
            token_count=last_user_message.token_count,
            message_type=last_user_message.message_type,
-            image_files=existing_images + project_files.project_image_files,
+            image_files=existing_images + context_files.image_files,
        )

    # Build the final message list according to README ordering:
-    # [system], [history_before_last_user], [custom_agent], [project_files],
+    # [system], [history_before_last_user], [custom_agent], [context_files],
    # [forgotten_files], [last_user_message], [messages_after_last_user], [reminder]
    result = [system_prompt] if system_prompt else []

@@ -466,14 +464,14 @@ def construct_message_history(
    if custom_agent_prompt:
        result.append(custom_agent_prompt)

-    # 3. Add project files / file-metadata messages (inserted before last user message)
+    # 3. Add context files / file-metadata messages (inserted before last user message)
    result.extend(project_messages)

    # 4. Add forgotten-files metadata (right before the user's question)
    if forgotten_files_message:
        result.append(forgotten_files_message)

-    # 5. Add last user message (with project images attached)
+    # 5. Add last user message (with context images attached)
    result.append(last_user_message)

    # 6. Add messages after last user message (tool calls, responses, etc.)
@@ -547,11 +545,11 @@ def _create_file_tool_metadata_message(
    )


-def _create_project_files_message(
-    project_files: ExtractedProjectFiles,
+def _create_context_files_message(
+    context_files: ExtractedContextFiles,
    token_counter: Callable[[str], int] | None,  # noqa: ARG001
 ) -> ChatMessageSimple:
-    """Convert project files to a ChatMessageSimple message.
+    """Convert context files to a ChatMessageSimple message.

    Format follows the README specification for document representation.
    """
@@ -559,7 +557,7 @@ def _create_project_files_message(

    # Format as documents JSON as described in README
    documents_list = []
-    for idx, file_text in enumerate(project_files.project_file_texts, start=1):
+    for idx, file_text in enumerate(context_files.file_texts, start=1):
        documents_list.append(
            {
                "document": idx,
@@ -570,10 +568,10 @@ def _create_project_files_message(
    documents_json = json.dumps({"documents": documents_list}, indent=2)
    message_content = f"Here are some documents provided for context, they may not all be relevant:\n{documents_json}"

-    # Use pre-calculated token count from project_files
+    # Use pre-calculated token count from context_files
    return ChatMessageSimple(
        message=message_content,
-        token_count=project_files.total_token_count,
+        token_count=context_files.total_token_count,
        message_type=MessageType.USER,
    )

@@ -584,7 +582,7 @@ def run_llm_loop(
    simple_chat_history: list[ChatMessageSimple],
    tools: list[Tool],
    custom_agent_prompt: str | None,
-    project_files: ExtractedProjectFiles,
+    context_files: ExtractedContextFiles,
    persona: Persona | None,
    user_memory_context: UserMemoryContext | None,
    llm: LLM,
@@ -627,9 +625,9 @@ def run_llm_loop(

        # Add project file citation mappings if project files are present
        project_citation_mapping: CitationMapping = {}
-        if project_files.project_file_metadata:
-            project_citation_mapping = _build_project_file_citation_mapping(
-                project_files.project_file_metadata
+        if context_files.file_metadata:
+            project_citation_mapping = _build_context_file_citation_mapping(
+                context_files.file_metadata
            )
            citation_processor.update_citation_mapping(project_citation_mapping)

@@ -647,7 +645,7 @@ def run_llm_loop(
        # TODO allow citing of images in Projects. Since attached to the last user message, it has no text associated with it.
        # One future workaround is to include the images as separate user messages with citation information and process those.
        always_cite_documents: bool = bool(
-            project_files.project_as_filter or project_files.project_file_texts
+            context_files.use_as_search_filter or context_files.file_texts
        )
        should_cite_documents: bool = False
        ran_image_gen: bool = False
@@ -788,7 +786,7 @@ def run_llm_loop(
                custom_agent_prompt=custom_agent_prompt_msg,
                simple_chat_history=simple_chat_history,
                reminder_message=reminder_msg,
-                project_files=project_files,
+                context_files=context_files,
                available_tokens=available_tokens,
                token_counter=token_counter,
                all_injected_file_metadata=all_injected_file_metadata,
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -31,13 +31,6 @@ class CustomToolResponse(BaseModel):
    tool_name: str


-class ProjectSearchConfig(BaseModel):
-    """Configuration for search tool availability in project context."""
-
-    search_usage: SearchToolUsage
-    disable_forced_tool: bool
-
-
 class CreateChatSessionID(BaseModel):
    chat_session_id: UUID

@@ -132,8 +125,8 @@ class ChatMessageSimple(BaseModel):
    file_id: str | None = None


-class ProjectFileMetadata(BaseModel):
-    """Metadata for a project file to enable citation support."""
+class ContextFileMetadata(BaseModel):
+    """Metadata for a context-injected file to enable citation support."""

    file_id: str
    filename: str
@@ -167,20 +160,28 @@ class ChatHistoryResult(BaseModel):
    all_injected_file_metadata: dict[str, FileToolMetadata]


-class ExtractedProjectFiles(BaseModel):
-    project_file_texts: list[str]
-    project_image_files: list[ChatLoadedFile]
-    project_as_filter: bool
+class ExtractedContextFiles(BaseModel):
+    """Result of attempting to load user files (from a project or persona) into context."""
+
+    file_texts: list[str]
+    image_files: list[ChatLoadedFile]
+    use_as_search_filter: bool
    total_token_count: int
-    # Metadata for project files to enable citations
-    project_file_metadata: list[ProjectFileMetadata]
-    # None if not a project
-    project_uncapped_token_count: int | None
    # Lightweight metadata for files exposed via FileReaderTool
-    # (populated when files don't fit in context and vector DB is disabled)
+    # (populated when files don't fit in context and vector DB is disabled).
+    file_metadata: list[ContextFileMetadata]
+    uncapped_token_count: int | None
    file_metadata_for_tool: list[FileToolMetadata] = []


+class SearchParams(BaseModel):
+    """Resolved search filter IDs and search-tool usage for a chat turn."""
+
+    search_project_id: int | None
+    search_persona_id: int | None
+    search_usage: SearchToolUsage
+
+
 class LlmStepResult(BaseModel):
    reasoning: str | None
    answer: str | None
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -3,6 +3,7 @@ IMPORTANT: familiarize yourself with the design concepts prior to contributing t
 An overview can be found in the README.md file in this directory.
 """

+import io
 import re
 import traceback
 from collections.abc import Callable
@@ -33,11 +34,11 @@ from onyx.chat.models import ChatBasicResponse
 from onyx.chat.models import ChatFullResponse
 from onyx.chat.models import ChatLoadedFile
 from onyx.chat.models import ChatMessageSimple
+from onyx.chat.models import ContextFileMetadata
 from onyx.chat.models import CreateChatSessionID
-from onyx.chat.models import ExtractedProjectFiles
+from onyx.chat.models import ExtractedContextFiles
 from onyx.chat.models import FileToolMetadata
-from onyx.chat.models import ProjectFileMetadata
-from onyx.chat.models import ProjectSearchConfig
+from onyx.chat.models import SearchParams
 from onyx.chat.models import StreamingError
 from onyx.chat.models import ToolCallResponse
 from onyx.chat.prompt_utils import calculate_reserved_tokens
@@ -62,11 +63,12 @@ from onyx.db.models import ChatSession
 from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.db.models import UserFile
-from onyx.db.projects import get_project_token_count
 from onyx.db.projects import get_user_files_from_project
 from onyx.db.tools import get_tools
 from onyx.deep_research.dr_loop import run_deep_research_llm_loop
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_store.models import ChatFileType
+from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import load_in_memory_chat_files
 from onyx.file_store.utils import verify_user_files
 from onyx.llm.factory import get_llm_for_persona
@@ -139,12 +141,12 @@ def _collect_available_file_ids(
                pass

    if project_id:
-        project_files = get_user_files_from_project(
+        user_files = get_user_files_from_project(
            project_id=project_id,
            user_id=user_id,
            db_session=db_session,
        )
-        for uf in project_files:
+        for uf in user_files:
            user_file_ids.add(uf.id)

    return _AvailableFiles(
@@ -192,9 +194,67 @@ def _convert_loaded_files_to_chat_files(
    return chat_files


-def _extract_project_file_texts_and_images(
+def resolve_context_user_files(
+    persona: Persona,
    project_id: int | None,
    user_id: UUID | None,
+    db_session: Session,
+) -> list[UserFile]:
+    """Apply the precedence rule to decide which user files to load.
+
+    A custom persona fully supersedes the project.  When a chat uses a
+    custom persona, the project is purely organisational — its files are
+    never loaded and never made searchable.
+
+    Custom persona → persona's own user_files (may be empty).
+    Default persona inside a project → project files.
+    Otherwise → empty list.
+    """
+    if persona.id != DEFAULT_PERSONA_ID:
+        return list(persona.user_files) if persona.user_files else []
+    if project_id:
+        return get_user_files_from_project(
+            project_id=project_id,
+            user_id=user_id,
+            db_session=db_session,
+        )
+    return []
+
+
+def _empty_extracted_context_files() -> ExtractedContextFiles:
+    return ExtractedContextFiles(
+        file_texts=[],
+        image_files=[],
+        use_as_search_filter=False,
+        total_token_count=0,
+        file_metadata=[],
+        uncapped_token_count=None,
+    )
+
+
+def _extract_text_from_in_memory_file(f: InMemoryChatFile) -> str | None:
+    """Extract text content from an InMemoryChatFile.
+
+    PLAIN_TEXT: the content is pre-extracted UTF-8 plaintext stored during
+    ingestion — decode directly.
+    DOC / CSV / other text types: the content is the original file bytes —
+    use extract_file_text which handles encoding detection and format parsing.
+    """
+    try:
+        if f.file_type == ChatFileType.PLAIN_TEXT:
+            return f.content.decode("utf-8", errors="ignore").replace("\x00", "")
+        return extract_file_text(
+            file=io.BytesIO(f.content),
+            file_name=f.filename or "",
+            break_on_unprocessable=False,
+        )
+    except Exception:
+        logger.warning(f"Failed to extract text from file {f.file_id}", exc_info=True)
+        return None
+
+
+def extract_context_files(
+    user_files: list[UserFile],
    llm_max_context_window: int,
    reserved_token_count: int,
    db_session: Session,
@@ -203,8 +263,12 @@ def _extract_project_file_texts_and_images(
    # 60% of the LLM's max context window. The other benefit is that for projects with
    # more files, this makes it so that we don't throw away the history too quickly every time.
    max_llm_context_percentage: float = 0.6,
-) -> ExtractedProjectFiles:
-    """Extract text content from project files if they fit within the context window.
+) -> ExtractedContextFiles:
+    """Load user files into context if they fit; otherwise flag for search.
+
+    The caller is responsible for deciding *which* user files to pass in
+    (project files, persona files, etc.).  This function only cares about
+    the all-or-nothing fit check and the actual content loading.

    Args:
        project_id: The project ID to load files from
@@ -213,160 +277,95 @@ def _extract_project_file_texts_and_images(
        reserved_token_count: Number of tokens to reserve for other content
        db_session: Database session
        max_llm_context_percentage: Maximum percentage of the LLM context window to use.
-
    Returns:
-        ExtractedProjectFiles containing:
-        - List of text content strings from project files (text files only)
-        - List of image files from project (ChatLoadedFile objects)
-        - Project id if the the project should be provided as a filter in search or None if not.
+        ExtractedContextFiles containing:
+        - List of text content strings from context files (text files only)
+        - List of image files from context (ChatLoadedFile objects)
        - Total token count of all extracted files
+        - File metadata for context files
+        - Uncapped token count of all extracted files
+        - File metadata for files that don't fit in context and vector DB is disabled
    """
-    # TODO I believe this is not handling all file types correctly.
-    project_as_filter = False
-    if not project_id:
-        return ExtractedProjectFiles(
-            project_file_texts=[],
-            project_image_files=[],
-            project_as_filter=False,
-            total_token_count=0,
-            project_file_metadata=[],
-            project_uncapped_token_count=None,
-        )
+    # TODO(yuhong): I believe this is not handling all file types correctly.

+    if not user_files:
+        return _empty_extracted_context_files()
+
+    aggregate_tokens = sum(uf.token_count or 0 for uf in user_files)
    max_actual_tokens = (
        llm_max_context_window - reserved_token_count
    ) * max_llm_context_percentage

-    # Calculate total token count for all user files in the project
-    project_tokens = get_project_token_count(
-        project_id=project_id,
-        user_id=user_id,
+    if aggregate_tokens >= max_actual_tokens:
+        tool_metadata = []
+        use_as_search_filter = not DISABLE_VECTOR_DB
+        if DISABLE_VECTOR_DB:
+            tool_metadata = _build_file_tool_metadata_for_user_files(user_files)
+        return ExtractedContextFiles(
+            file_texts=[],
+            image_files=[],
+            use_as_search_filter=use_as_search_filter,
+            total_token_count=0,
+            file_metadata=[],
+            uncapped_token_count=aggregate_tokens,
+            file_metadata_for_tool=tool_metadata,
+        )
+
+    # Files fit — load them into context
+    user_file_map = {str(uf.id): uf for uf in user_files}
+    in_memory_files = load_in_memory_chat_files(
+        user_file_ids=[uf.id for uf in user_files],
        db_session=db_session,
    )

-    project_file_texts: list[str] = []
-    project_image_files: list[ChatLoadedFile] = []
-    project_file_metadata: list[ProjectFileMetadata] = []
+    file_texts: list[str] = []
+    image_files: list[ChatLoadedFile] = []
+    file_metadata: list[ContextFileMetadata] = []
    total_token_count = 0
-    if project_tokens < max_actual_tokens:
-        # Load project files into memory using cached plaintext when available
-        project_user_files = get_user_files_from_project(
-            project_id=project_id,
-            user_id=user_id,
-            db_session=db_session,
-        )
-        if project_user_files:
-            # Create a mapping from file_id to UserFile for token count lookup
-            user_file_map = {str(file.id): file for file in project_user_files}

-            project_file_ids = [file.id for file in project_user_files]
-            in_memory_project_files = load_in_memory_chat_files(
-                user_file_ids=project_file_ids,
-                db_session=db_session,
+    for f in in_memory_files:
+        uf = user_file_map.get(str(f.file_id))
+        if f.file_type.is_text_file():
+            text_content = _extract_text_from_in_memory_file(f)
+            if not text_content:
+                continue
+            file_texts.append(text_content)
+            file_metadata.append(
+                ContextFileMetadata(
+                    file_id=str(f.file_id),
+                    filename=f.filename or f"file_{f.file_id}",
+                    file_content=text_content,
+                )
+            )
+            if uf and uf.token_count:
+                total_token_count += uf.token_count
+        elif f.file_type == ChatFileType.IMAGE:
+            token_count = uf.token_count if uf and uf.token_count else 0
+            total_token_count += token_count
+            image_files.append(
+                ChatLoadedFile(
+                    file_id=f.file_id,
+                    content=f.content,
+                    file_type=f.file_type,
+                    filename=f.filename,
+                    content_text=None,
+                    token_count=token_count,
+                )
            )

-            # Extract text content from loaded files
-            for file in in_memory_project_files:
-                if file.file_type.is_text_file():
-                    try:
-                        text_content = file.content.decode("utf-8", errors="ignore")
-                        # Strip null bytes
-                        text_content = text_content.replace("\x00", "")
-                        if text_content:
-                            project_file_texts.append(text_content)
-                            # Add metadata for citation support
-                            project_file_metadata.append(
-                                ProjectFileMetadata(
-                                    file_id=str(file.file_id),
-                                    filename=file.filename or f"file_{file.file_id}",
-                                    file_content=text_content,
-                                )
-                            )
-                            # Add token count for text file
-                            user_file = user_file_map.get(str(file.file_id))
-                            if user_file and user_file.token_count:
-                                total_token_count += user_file.token_count
-                    except Exception:
-                        # Skip files that can't be decoded
-                        pass
-                elif file.file_type == ChatFileType.IMAGE:
-                    # Convert InMemoryChatFile to ChatLoadedFile
-                    user_file = user_file_map.get(str(file.file_id))
-                    token_count = (
-                        user_file.token_count
-                        if user_file and user_file.token_count
-                        else 0
-                    )
-                    total_token_count += token_count
-                    chat_loaded_file = ChatLoadedFile(
-                        file_id=file.file_id,
-                        content=file.content,
-                        file_type=file.file_type,
-                        filename=file.filename,
-                        content_text=None,  # Images don't have text content
-                        token_count=token_count,
-                    )
-                    project_image_files.append(chat_loaded_file)
-    else:
-        if DISABLE_VECTOR_DB:
-            # Without a vector DB we can't use project-as-filter search.
-            # Instead, build lightweight metadata so the LLM can call the
-            # FileReaderTool to inspect individual files on demand.
-            file_metadata_for_tool = _build_file_tool_metadata_for_project(
-                project_id=project_id,
-                user_id=user_id,
-                db_session=db_session,
-            )
-            return ExtractedProjectFiles(
-                project_file_texts=[],
-                project_image_files=[],
-                project_as_filter=False,
-                total_token_count=0,
-                project_file_metadata=[],
-                project_uncapped_token_count=project_tokens,
-                file_metadata_for_tool=file_metadata_for_tool,
-            )
-        project_as_filter = True
-
-    return ExtractedProjectFiles(
-        project_file_texts=project_file_texts,
-        project_image_files=project_image_files,
-        project_as_filter=project_as_filter,
+    return ExtractedContextFiles(
+        file_texts=file_texts,
+        image_files=image_files,
+        use_as_search_filter=False,
        total_token_count=total_token_count,
-        project_file_metadata=project_file_metadata,
-        project_uncapped_token_count=project_tokens,
+        file_metadata=file_metadata,
+        uncapped_token_count=aggregate_tokens,
    )


 APPROX_CHARS_PER_TOKEN = 4


-def _build_file_tool_metadata_for_project(
-    project_id: int,
-    user_id: UUID | None,
-    db_session: Session,
-) -> list[FileToolMetadata]:
-    """Build lightweight FileToolMetadata for every file in a project.
-
-    Used when files are too large to fit in context and the vector DB is
-    disabled, so the LLM needs to know which files it can read via the
-    FileReaderTool.
-    """
-    project_user_files = get_user_files_from_project(
-        project_id=project_id,
-        user_id=user_id,
-        db_session=db_session,
-    )
-    return [
-        FileToolMetadata(
-            file_id=str(uf.id),
-            filename=uf.name,
-            approx_char_count=(uf.token_count or 0) * APPROX_CHARS_PER_TOKEN,
-        )
-        for uf in project_user_files
-    ]
-
-
 def _build_file_tool_metadata_for_user_files(
    user_files: list[UserFile],
 ) -> list[FileToolMetadata]:
@@ -381,55 +380,46 @@ def _build_file_tool_metadata_for_user_files(
    ]


-def _get_project_search_availability(
+def determine_search_params(
+    persona_id: int,
    project_id: int | None,
-    persona_id: int | None,
-    loaded_project_files: bool,
-    project_has_files: bool,
-    forced_tool_id: int | None,
-    search_tool_id: int | None,
-) -> ProjectSearchConfig:
-    """Determine search tool availability based on project context.
+    extracted_context_files: ExtractedContextFiles,
+) -> SearchParams:
+    """Decide which search filter IDs and search-tool usage apply for a chat turn.

-    Search is disabled when ALL of the following are true:
-    - User is in a project
-    - Using the default persona (not a custom agent)
-    - Project files are already loaded in context
+    A custom persona fully supersedes the project — project files are never
+    searchable and the search tool config is entirely controlled by the
+    persona.  The project_id filter is only set for the default persona.

-    When search is disabled and the user tried to force the search tool,
-    that forcing is also disabled.
-
-    Returns AUTO (follow persona config) in all other cases.
+    For the default persona inside a project:
+      - Files overflow  → ENABLED  (vector DB scopes to these files)
+      - Files fit       → DISABLED (content already in prompt)
+      - No files at all → DISABLED (nothing to search)
    """
-    # Not in a project, this should have no impact on search tool availability
-    if not project_id:
-        return ProjectSearchConfig(
-            search_usage=SearchToolUsage.AUTO, disable_forced_tool=False
-        )
+    is_custom_persona = persona_id != DEFAULT_PERSONA_ID

-    # Custom persona in project - let persona config decide
-    # Even if there are no files in the project, it's still guided by the persona config.
-    if persona_id != DEFAULT_PERSONA_ID:
-        return ProjectSearchConfig(
-            search_usage=SearchToolUsage.AUTO, disable_forced_tool=False
-        )
+    search_project_id: int | None = None
+    search_persona_id: int | None = None
+    if extracted_context_files.use_as_search_filter:
+        if is_custom_persona:
+            search_persona_id = persona_id
+        else:
+            search_project_id = project_id

-    # If in a project with the default persona and the files have been already loaded into the context or
-    # there are no files in the project, disable search as there is nothing to search for.
-    if loaded_project_files or not project_has_files:
-        user_forced_search = (
-            forced_tool_id is not None
-            and search_tool_id is not None
-            and forced_tool_id == search_tool_id
-        )
-        return ProjectSearchConfig(
-            search_usage=SearchToolUsage.DISABLED,
-            disable_forced_tool=user_forced_search,
-        )
+    search_usage = SearchToolUsage.AUTO
+    if not is_custom_persona and project_id:
+        has_context_files = bool(extracted_context_files.uncapped_token_count)
+        files_loaded_in_context = bool(extracted_context_files.file_texts)

-    # Default persona in a project with files, but also the files have not been loaded into the context already.
-    return ProjectSearchConfig(
-        search_usage=SearchToolUsage.ENABLED, disable_forced_tool=False
+        if extracted_context_files.use_as_search_filter:
+            search_usage = SearchToolUsage.ENABLED
+        elif files_loaded_in_context or not has_context_files:
+            search_usage = SearchToolUsage.DISABLED
+
+    return SearchParams(
+        search_project_id=search_project_id,
+        search_persona_id=search_persona_id,
+        search_usage=search_usage,
    )


@@ -661,26 +651,37 @@ def handle_stream_message_objects(
            user_memory_context=prompt_memory_context,
        )

-        # Process projects, if all of the files fit in the context, it doesn't need to use RAG
-        extracted_project_files = _extract_project_file_texts_and_images(
+        # Determine which user files to use.  A custom persona fully
+        # supersedes the project — project files are never loaded or
+        # searchable when a custom persona is in play.  Only the default
+        # persona inside a project uses the project's files.
+        context_user_files = resolve_context_user_files(
+            persona=persona,
            project_id=chat_session.project_id,
            user_id=user_id,
+            db_session=db_session,
+        )
+
+        extracted_context_files = extract_context_files(
+            user_files=context_user_files,
            llm_max_context_window=llm.config.max_input_tokens,
            reserved_token_count=reserved_token_count,
            db_session=db_session,
        )

-        # When the vector DB is disabled, persona-attached user_files have no
-        # search pipeline path. Inject them as file_metadata_for_tool so the
-        # LLM can read them via the FileReaderTool.
-        if DISABLE_VECTOR_DB and persona.user_files:
-            persona_file_metadata = _build_file_tool_metadata_for_user_files(
-                persona.user_files
-            )
-            # Merge persona file metadata into the extracted project files
-            extracted_project_files.file_metadata_for_tool.extend(persona_file_metadata)
+        search_params = determine_search_params(
+            persona_id=persona.id,
+            project_id=chat_session.project_id,
+            extracted_context_files=extracted_context_files,
+        )
+
+        # Also grant access to persona-attached user files for FileReaderTool
+        if persona.user_files:
+            existing = set(available_files.user_file_ids)
+            for uf in persona.user_files:
+                if uf.id not in existing:
+                    available_files.user_file_ids.append(uf.id)

-        # Build a mapping of tool_id to tool_name for history reconstruction
        all_tools = get_tools(db_session)
        tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}

@@ -689,30 +690,17 @@ def handle_stream_message_objects(
            None,
        )

-        # Determine if search should be disabled for this project context
        forced_tool_id = new_msg_req.forced_tool_id
-        project_search_config = _get_project_search_availability(
-            project_id=chat_session.project_id,
-            persona_id=persona.id,
-            loaded_project_files=bool(extracted_project_files.project_file_texts),
-            project_has_files=bool(
-                extracted_project_files.project_uncapped_token_count
-            ),
-            forced_tool_id=new_msg_req.forced_tool_id,
-            search_tool_id=search_tool_id,
-        )
-        if project_search_config.disable_forced_tool:
+        if (
+            search_params.search_usage == SearchToolUsage.DISABLED
+            and forced_tool_id is not None
+            and search_tool_id is not None
+            and forced_tool_id == search_tool_id
+        ):
            forced_tool_id = None

        emitter = get_default_emitter()

-        # Also grant access to persona-attached user files
-        if persona.user_files:
-            existing = set(available_files.user_file_ids)
-            for uf in persona.user_files:
-                if uf.id not in existing:
-                    available_files.user_file_ids.append(uf.id)
-
        # Construct tools based on the persona configurations
        tool_dict = construct_tools(
            persona=persona,
@@ -722,11 +710,8 @@ def handle_stream_message_objects(
            llm=llm,
            search_tool_config=SearchToolConfig(
                user_selected_filters=new_msg_req.internal_search_filters,
-                project_id=(
-                    chat_session.project_id
-                    if extracted_project_files.project_as_filter
-                    else None
-                ),
+                project_id=search_params.search_project_id,
+                persona_id=search_params.search_persona_id,
                bypass_acl=bypass_acl,
                slack_context=slack_context,
                enable_slack_search=_should_enable_slack_search(
@@ -744,7 +729,7 @@ def handle_stream_message_objects(
                chat_file_ids=available_files.chat_file_ids,
            ),
            allowed_tool_ids=new_msg_req.allowed_tool_ids,
-            search_usage_forcing_setting=project_search_config.search_usage,
+            search_usage_forcing_setting=search_params.search_usage,
        )
        tools: list[Tool] = []
        for tool_list in tool_dict.values():
@@ -783,7 +768,7 @@ def handle_stream_message_objects(
        chat_history_result = convert_chat_history(
            chat_history=chat_history,
            files=files,
-            project_image_files=extracted_project_files.project_image_files,
+            context_image_files=extracted_context_files.image_files,
            additional_context=additional_context,
            token_counter=token_counter,
            tool_id_to_name_map=tool_id_to_name_map,
@@ -879,46 +864,54 @@ def handle_stream_message_objects(
            # (user has already responded to a clarification question)
            skip_clarification = is_last_assistant_message_clarification(chat_history)

+            # NOTE: we _could_ pass in a zero argument function since emitter and state_container
+            # are just passed in immediately anyways, but the abstraction is cleaner this way.
            yield from run_chat_loop_with_state_containers(
-                run_deep_research_llm_loop,
+                lambda emitter, state_container: run_deep_research_llm_loop(
+                    emitter=emitter,
+                    state_container=state_container,
+                    simple_chat_history=simple_chat_history,
+                    tools=tools,
+                    custom_agent_prompt=custom_agent_prompt,
+                    llm=llm,
+                    token_counter=token_counter,
+                    db_session=db_session,
+                    skip_clarification=skip_clarification,
+                    user_identity=user_identity,
+                    chat_session_id=str(chat_session.id),
+                    all_injected_file_metadata=all_injected_file_metadata,
+                ),
                llm_loop_completion_callback,
                is_connected=check_is_connected,
                emitter=emitter,
                state_container=state_container,
-                simple_chat_history=simple_chat_history,
-                tools=tools,
-                custom_agent_prompt=custom_agent_prompt,
-                llm=llm,
-                token_counter=token_counter,
-                db_session=db_session,
-                skip_clarification=skip_clarification,
-                user_identity=user_identity,
-                chat_session_id=str(chat_session.id),
-                all_injected_file_metadata=all_injected_file_metadata,
            )
        else:
            yield from run_chat_loop_with_state_containers(
-                run_llm_loop,
+                lambda emitter, state_container: run_llm_loop(
+                    emitter=emitter,
+                    state_container=state_container,
+                    simple_chat_history=simple_chat_history,
+                    tools=tools,
+                    custom_agent_prompt=custom_agent_prompt,
+                    context_files=extracted_context_files,
+                    persona=persona,
+                    user_memory_context=user_memory_context,
+                    llm=llm,
+                    token_counter=token_counter,
+                    db_session=db_session,
+                    forced_tool_id=forced_tool_id,
+                    user_identity=user_identity,
+                    chat_session_id=str(chat_session.id),
+                    chat_files=chat_files_for_tools,
+                    include_citations=new_msg_req.include_citations,
+                    all_injected_file_metadata=all_injected_file_metadata,
+                    inject_memories_in_prompt=user.use_memories,
+                ),
                llm_loop_completion_callback,
                is_connected=check_is_connected,  # Not passed through to run_llm_loop
                emitter=emitter,
                state_container=state_container,
-                simple_chat_history=simple_chat_history,
-                tools=tools,
-                custom_agent_prompt=custom_agent_prompt,
-                project_files=extracted_project_files,
-                persona=persona,
-                user_memory_context=user_memory_context,
-                llm=llm,
-                token_counter=token_counter,
-                db_session=db_session,
-                forced_tool_id=forced_tool_id,
-                user_identity=user_identity,
-                chat_session_id=str(chat_session.id),
-                chat_files=chat_files_for_tools,
-                include_citations=new_msg_req.include_citations,
-                all_injected_file_metadata=all_injected_file_metadata,
-                inject_memories_in_prompt=user.use_memories,
            )

    except ValueError as e:
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -210,10 +210,10 @@ AUTH_COOKIE_EXPIRE_TIME_SECONDS = int(
 REQUIRE_EMAIL_VERIFICATION = (
    os.environ.get("REQUIRE_EMAIL_VERIFICATION", "").lower() == "true"
 )
-SMTP_SERVER = os.environ.get("SMTP_SERVER") or "smtp.gmail.com"
+SMTP_SERVER = os.environ.get("SMTP_SERVER") or ""
 SMTP_PORT = int(os.environ.get("SMTP_PORT") or "587")
-SMTP_USER = os.environ.get("SMTP_USER", "your-email@gmail.com")
-SMTP_PASS = os.environ.get("SMTP_PASS", "your-gmail-password")
+SMTP_USER = os.environ.get("SMTP_USER") or ""
+SMTP_PASS = os.environ.get("SMTP_PASS") or ""
 EMAIL_FROM = os.environ.get("EMAIL_FROM") or SMTP_USER

 SENDGRID_API_KEY = os.environ.get("SENDGRID_API_KEY") or ""
@@ -294,6 +294,12 @@ ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
    ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
    and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
 )
+# Whether we should check for and create an index if necessary every time we
+# instantiate an OpenSearchDocumentIndex on multitenant cloud. Defaults to True.
+VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
+    os.environ.get("VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT", "true").lower()
+    == "true"
+)

 VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
 # NOTE: this is used if and only if the vespa config server is accessible via a
--- a/backend/onyx/connectors/google_utils/google_utils.py
+++ b/backend/onyx/connectors/google_utils/google_utils.py
@@ -16,6 +16,22 @@ from onyx.utils.retry_wrapper import retry_builder

 logger = setup_logger()

+_RATE_LIMIT_REASONS = {"userRateLimitExceeded", "rateLimitExceeded"}
+
+
+def _is_rate_limit_error(error: HttpError) -> bool:
+    """Google sometimes returns rate-limit errors as 403 with reason
+    'userRateLimitExceeded' instead of 429. This helper detects both."""
+    if error.resp.status == 429:
+        return True
+    if error.resp.status != 403:
+        return False
+    error_details = getattr(error, "error_details", None) or []
+    for detail in error_details:
+        if isinstance(detail, dict) and detail.get("reason") in _RATE_LIMIT_REASONS:
+            return True
+    return "userRateLimitExceeded" in str(error) or "rateLimitExceeded" in str(error)
+

 # Google Drive APIs are quite flakey and may 500 for an
 # extended period of time. This is now addressed by checkpointing.
@@ -57,7 +73,7 @@ def _execute_with_retry(request: Any) -> Any:
        except HttpError as error:
            attempt += 1

-            if error.resp.status == 429:
+            if _is_rate_limit_error(error):
                # Attempt to get 'Retry-After' from headers
                retry_after = error.resp.get("Retry-After")
                if retry_after:
@@ -140,16 +156,16 @@ def _execute_single_retrieval(
                )
            logger.error(f"Error executing request: {e}")
            raise e
+        elif _is_rate_limit_error(e):
+            results = _execute_with_retry(
+                lambda: retrieval_function(**request_kwargs).execute()
+            )
        elif e.resp.status == 404 or e.resp.status == 403:
            if continue_on_404_or_403:
                logger.debug(f"Error executing request: {e}")
                results = {}
            else:
                raise e
-        elif e.resp.status == 429:
-            results = _execute_with_retry(
-                lambda: retrieval_function(**request_kwargs).execute()
-            )
        else:
            logger.exception("Error executing request:")
            raise e
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -23,7 +23,6 @@ from cryptography.hazmat.primitives import hashes
 from cryptography.hazmat.primitives import serialization
 from cryptography.hazmat.primitives.serialization import pkcs12
 from office365.graph_client import GraphClient  # type: ignore[import-untyped]
-from office365.intune.organizations.organization import Organization  # type: ignore[import-untyped]
 from office365.onedrive.driveitems.driveItem import DriveItem  # type: ignore[import-untyped]
 from office365.onedrive.sites.site import Site  # type: ignore[import-untyped]
 from office365.onedrive.sites.sites_with_root import SitesWithRoot  # type: ignore[import-untyped]
@@ -147,7 +146,9 @@ class DriveItemData(BaseModel):
            self.id,
            ResourcePath("items", ResourcePath(self.drive_id, ResourcePath("drives"))),
        )
-        return DriveItem(graph_client, path)
+        item = DriveItem(graph_client, path)
+        item.set_property("id", self.id)
+        return item


 # The office365 library's ClientContext caches the access token from its
@@ -870,6 +871,56 @@ class SharepointConnector(
                    "Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)"
                )

+    def _extract_tenant_domain_from_sites(self) -> str | None:
+        """Extract the tenant domain from configured site URLs.
+
+        Site URLs look like https://{tenant}.sharepoint.com/sites/... so the
+        tenant domain is the first label of the hostname.
+        """
+        for site_url in self.sites:
+            try:
+                hostname = urlsplit(site_url.strip()).hostname
+            except ValueError:
+                continue
+            if not hostname:
+                continue
+            tenant = hostname.split(".")[0]
+            if tenant:
+                return tenant
+        logger.warning(f"No tenant domain found from {len(self.sites)} sites")
+        return None
+
+    def _resolve_tenant_domain_from_root_site(self) -> str:
+        """Resolve tenant domain via GET /v1.0/sites/root which only requires
+        Sites.Read.All (a permission the connector already needs)."""
+        root_site = self.graph_client.sites.root.get().execute_query()
+        hostname = root_site.site_collection.hostname
+        if not hostname:
+            raise ConnectorValidationError(
+                "Could not determine tenant domain from root site"
+            )
+        tenant_domain = hostname.split(".")[0]
+        logger.info(
+            "Resolved tenant domain '%s' from root site hostname '%s'",
+            tenant_domain,
+            hostname,
+        )
+        return tenant_domain
+
+    def _resolve_tenant_domain(self) -> str:
+        """Determine the tenant domain, preferring site URLs over a Graph API
+        call to avoid needing extra permissions."""
+        from_sites = self._extract_tenant_domain_from_sites()
+        if from_sites:
+            logger.info(
+                "Resolved tenant domain '%s' from site URLs",
+                from_sites,
+            )
+            return from_sites
+
+        logger.info("No site URLs available; resolving tenant domain from root site")
+        return self._resolve_tenant_domain_from_root_site()
+
    @property
    def graph_client(self) -> GraphClient:
        if self._graph_client is None:
@@ -1587,6 +1638,11 @@ class SharepointConnector(
        sp_private_key = credentials.get("sp_private_key")
        sp_certificate_password = credentials.get("sp_certificate_password")

+        if not sp_client_id:
+            raise ConnectorValidationError("Client ID is required")
+        if not sp_directory_id:
+            raise ConnectorValidationError("Directory (tenant) ID is required")
+
        authority_url = f"{self.authority_host}/{sp_directory_id}"

        if auth_method == SharepointAuthMethod.CERTIFICATE.value:
@@ -1639,21 +1695,7 @@ class SharepointConnector(
            _acquire_token_for_graph, environment=self._azure_environment
        )
        if auth_method == SharepointAuthMethod.CERTIFICATE.value:
-            org = self.graph_client.organization.get().execute_query()
-            if not org or len(org) == 0:
-                raise ConnectorValidationError("No organization found")
-
-            tenant_info: Organization = org[
-                0
-            ]  # Access first item directly from collection
-            if not tenant_info.verified_domains:
-                raise ConnectorValidationError("No verified domains found for tenant")
-
-            sp_tenant_domain = tenant_info.verified_domains[0].name
-            if not sp_tenant_domain:
-                raise ConnectorValidationError("No verified domains found for tenant")
-            # remove the .onmicrosoft.com part
-            self.sp_tenant_domain = sp_tenant_domain.split(".")[0]
+            self.sp_tenant_domain = self._resolve_tenant_domain()
        return None

    def _get_drive_names_for_site(self, site_url: str) -> list[str]:
--- a/backend/onyx/connectors/slab/connector.py
+++ b/backend/onyx/connectors/slab/connector.py
@@ -11,6 +11,7 @@ from dateutil import parser

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import LoadConnector
@@ -258,3 +259,21 @@ class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                slim_doc_batch = []
        if slim_doc_batch:
            yield slim_doc_batch
+
+    def validate_connector_settings(self) -> None:
+        """
+        Very basic validation, we could do more here
+        """
+        if not self.base_url.startswith("https://") and not self.base_url.startswith(
+            "http://"
+        ):
+            raise ConnectorValidationError(
+                "Base URL must start with https:// or http://"
+            )
+
+        try:
+            get_all_post_ids(self.slab_bot_token)
+        except ConnectorMissingCredentialError:
+            raise
+        except Exception as e:
+            raise ConnectorValidationError(f"Failed to fetch posts from Slab: {e}")
--- a/backend/onyx/context/search/models.py
+++ b/backend/onyx/context/search/models.py
@@ -72,6 +72,7 @@ class BaseFilters(BaseModel):
 class UserFileFilters(BaseModel):
    user_file_ids: list[UUID] | None = None
    project_id: int | None = None
+    persona_id: int | None = None


 class AssistantKnowledgeFilters(BaseModel):
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -40,6 +40,7 @@ def _build_index_filters(
    user_provided_filters: BaseFilters | None,
    user: User,  # Used for ACLs, anonymous users only see public docs
    project_id: int | None,
+    persona_id: int | None,
    user_file_ids: list[UUID] | None,
    persona_document_sets: list[str] | None,
    persona_time_cutoff: datetime | None,
@@ -118,6 +119,7 @@ def _build_index_filters(
    final_filters = IndexFilters(
        user_file_ids=user_file_ids,
        project_id=project_id,
+        persona_id=persona_id,
        source_type=source_filter,
        document_set=document_set_filter,
        time_cutoff=time_filter,
@@ -265,6 +267,8 @@ def search_pipeline(
    llm: LLM | None = None,
    # If a project ID is provided, it will be exclusively scoped to that project
    project_id: int | None = None,
+    # If a persona_id is provided, search scopes to files attached to this persona
+    persona_id: int | None = None,
    # Pre-fetched data — when provided, avoids DB queries (no session needed)
    acl_filters: list[str] | None = None,
    embedding_model: EmbeddingModel | None = None,
@@ -299,6 +303,7 @@ def search_pipeline(
        user_provided_filters=chunk_search_request.user_selected_filters,
        user=user,
        project_id=project_id,
+        persona_id=persona_id,
        user_file_ids=user_uploaded_persona_files,
        persona_document_sets=persona_document_sets,
        persona_time_cutoff=persona_time_cutoff,
--- a/backend/onyx/db/code_interpreter.py
+++ b/backend/onyx/db/code_interpreter.py
@@ -4,6 +4,13 @@ from sqlalchemy.orm import Session
 from onyx.db.models import CodeInterpreterServer


+def fetch_code_interpreter_server(
+    db_session: Session,
+) -> CodeInterpreterServer:
+    server = db_session.scalars(select(CodeInterpreterServer)).one()
+    return server
+
+
 def update_code_interpreter_server_enabled(
    db_session: Session,
    enabled: bool,
--- a/backend/onyx/db/engine/async_sql_engine.py
+++ b/backend/onyx/db/engine/async_sql_engine.py
@@ -21,8 +21,8 @@ from onyx.configs.app_configs import POSTGRES_POOL_RECYCLE
 from onyx.configs.app_configs import POSTGRES_PORT
 from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
 from onyx.configs.app_configs import POSTGRES_USER
+from onyx.db.engine.iam_auth import create_ssl_context_if_iam
 from onyx.db.engine.iam_auth import get_iam_auth_token
-from onyx.db.engine.iam_auth import ssl_context
 from onyx.db.engine.sql_engine import ASYNC_DB_API
 from onyx.db.engine.sql_engine import build_connection_string
 from onyx.db.engine.sql_engine import is_valid_schema_name
@@ -66,7 +66,7 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
        if app_name:
            connect_args["server_settings"] = {"application_name": app_name}

-        connect_args["ssl"] = ssl_context
+        connect_args["ssl"] = create_ssl_context_if_iam()

        engine_kwargs = {
            "connect_args": connect_args,
@@ -97,7 +97,7 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
                user = POSTGRES_USER
                token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)
                cparams["password"] = token
-                cparams["ssl"] = ssl_context
+                cparams["ssl"] = create_ssl_context_if_iam()

    return _ASYNC_ENGINE

--- a/backend/onyx/db/engine/iam_auth.py
+++ b/backend/onyx/db/engine/iam_auth.py
@@ -1,3 +1,4 @@
+import functools
 import os
 import ssl
 from typing import Any
@@ -48,11 +49,9 @@ def provide_iam_token(
        configure_psycopg2_iam_auth(cparams, host, port, user, region)


+@functools.cache
 def create_ssl_context_if_iam() -> ssl.SSLContext | None:
    """Create an SSL context if IAM authentication is enabled, else return None."""
    if USE_IAM_AUTH:
        return ssl.create_default_context(cafile=SSL_CERT_FILE)
    return None
-
-
-ssl_context = create_ssl_context_if_iam()
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -619,7 +619,7 @@ def update_default_provider(provider_id: int, db_session: Session) -> None:
    _update_default_model(
        db_session,
        provider_id,
-        provider.default_model_name,
+        provider.default_model_name,  # type: ignore[arg-type]
        LLMModelFlowType.CHAT,
    )

--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -2822,13 +2822,17 @@ class LLMProvider(Base):
    custom_config: Mapped[dict[str, str] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
-    default_model_name: Mapped[str] = mapped_column(String)
+
+    # Deprecated: use LLMModelFlow with CHAT flow type instead
+    default_model_name: Mapped[str | None] = mapped_column(String, nullable=True)

    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)

-    # should only be set for a single provider
-    is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True)
+    # Deprecated: use LLMModelFlow.is_default with CHAT flow type instead
+    is_default_provider: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
+    # Deprecated: use LLMModelFlow.is_default with VISION flow type instead
    is_default_vision_provider: Mapped[bool | None] = mapped_column(Boolean)
+    # Deprecated: use LLMModelFlow with VISION flow type instead
    default_vision_model: Mapped[str | None] = mapped_column(String, nullable=True)
    # EE only
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
@@ -2879,6 +2883,7 @@ class ModelConfiguration(Base):
    # - The end-user is configuring a model and chooses not to set a max-input-tokens limit.
    max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)

+    # Deprecated: use LLMModelFlow with VISION flow type instead
    supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)

    # Human-readable display name for the model.
@@ -4270,6 +4275,9 @@ class UserFile(Base):
    needs_project_sync: Mapped[bool] = mapped_column(
        Boolean, nullable=False, default=False
    )
+    needs_persona_sync: Mapped[bool] = mapped_column(
+        Boolean, nullable=False, default=False
+    )
    last_project_sync_at: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), nullable=True
    )
--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -256,9 +256,6 @@ def create_update_persona(
    try:
        # Default persona validation
        if create_persona_request.is_default_persona:
-            if not create_persona_request.is_public:
-                raise ValueError("Cannot make a default persona non public")
-
            # Curators can edit default personas, but not make them
            if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:
                pass
@@ -335,6 +332,7 @@ def update_persona_shared(
    db_session: Session,
    group_ids: list[int] | None = None,
    is_public: bool | None = None,
+    label_ids: list[int] | None = None,
 ) -> None:
    """Simplified version of `create_update_persona` which only touches the
    accessibility rather than any of the logic (e.g. prompt, connected data sources,
@@ -344,9 +342,7 @@ def update_persona_shared(
    )

    if user and user.role != UserRole.ADMIN and persona.user_id != user.id:
-        raise HTTPException(
-            status_code=403, detail="You don't have permission to modify this persona"
-        )
+        raise PermissionError("You don't have permission to modify this persona")

    versioned_update_persona_access = fetch_versioned_implementation(
        "onyx.db.persona", "update_persona_access"
@@ -360,6 +356,15 @@ def update_persona_shared(
        group_ids=group_ids,
    )

+    if label_ids is not None:
+        labels = (
+            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
+        )
+        if len(labels) != len(label_ids):
+            raise ValueError("Some label IDs were not found in the database")
+        persona.labels.clear()
+        persona.labels = labels
+
    db_session.commit()


@@ -765,6 +770,9 @@ def mark_persona_as_deleted(
 ) -> None:
    persona = get_persona_by_id(persona_id=persona_id, user=user, db_session=db_session)
    persona.deleted = True
+    affected_file_ids = [uf.id for uf in persona.user_files]
+    if affected_file_ids:
+        _mark_files_need_persona_sync(db_session, affected_file_ids)
    db_session.commit()


@@ -776,11 +784,13 @@ def mark_persona_as_not_deleted(
    persona = get_persona_by_id(
        persona_id=persona_id, user=user, db_session=db_session, include_deleted=True
    )
-    if persona.deleted:
-        persona.deleted = False
-        db_session.commit()
-    else:
+    if not persona.deleted:
        raise ValueError(f"Persona with ID {persona_id} is not deleted.")
+    persona.deleted = False
+    affected_file_ids = [uf.id for uf in persona.user_files]
+    if affected_file_ids:
+        _mark_files_need_persona_sync(db_session, affected_file_ids)
+    db_session.commit()


 def mark_delete_persona_by_name(
@@ -846,6 +856,20 @@ def update_personas_display_priority(
        db_session.commit()


+def _mark_files_need_persona_sync(
+    db_session: Session,
+    user_file_ids: list[UUID],
+) -> None:
+    """Flag the given UserFile rows so the background sync task picks them up
+    and updates their persona metadata in the vector DB."""
+    if not user_file_ids:
+        return
+    db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).update(
+        {UserFile.needs_persona_sync: True},
+        synchronize_session=False,
+    )
+
+
 def upsert_persona(
    user: User | None,
    name: str,
@@ -946,6 +970,8 @@ def upsert_persona(
        labels = (
            db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
        )
+        if len(labels) != len(label_ids):
+            raise ValueError("Some label IDs were not found in the database")

    # Fetch and attach hierarchy_nodes by IDs
    hierarchy_nodes = None
@@ -1034,8 +1060,13 @@ def upsert_persona(
            existing_persona.tools = tools or []

        if user_file_ids is not None:
+            old_file_ids = {uf.id for uf in existing_persona.user_files}
+            new_file_ids = {uf.id for uf in (user_files or [])}
+            affected_file_ids = old_file_ids | new_file_ids
            existing_persona.user_files.clear()
            existing_persona.user_files = user_files or []
+            if affected_file_ids:
+                _mark_files_need_persona_sync(db_session, list(affected_file_ids))

        if hierarchy_node_ids is not None:
            existing_persona.hierarchy_nodes.clear()
@@ -1089,6 +1120,8 @@ def upsert_persona(
            attached_documents=attached_documents or [],
        )
        db_session.add(new_persona)
+        if user_files:
+            _mark_files_need_persona_sync(db_session, [uf.id for uf in user_files])
        persona = new_persona
    if commit:
        db_session.commit()
@@ -1135,9 +1168,6 @@ def update_persona_is_default(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )

-    if not persona.is_public:
-        persona.is_public = True
-
    persona.is_default_persona = is_default
    db_session.commit()

--- a/backend/onyx/db/seeding/chat_history_seeding.py
+++ b/backend/onyx/db/seeding/chat_history_seeding.py
@@ -2,6 +2,7 @@ import random
 from datetime import datetime
 from datetime import timedelta
 from logging import getLogger
+from uuid import UUID

 from onyx.configs.constants import MessageType
 from onyx.db.chat import create_chat_session
@@ -13,18 +14,26 @@ from onyx.db.models import ChatSession
 logger = getLogger(__name__)


-def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
+def seed_chat_history(
+    num_sessions: int,
+    num_messages: int,
+    days: int,
+    user_id: UUID | None = None,
+    persona_id: int | None = None,
+) -> None:
    """Utility function to seed chat history for testing.

    num_sessions: the number of sessions to seed
    num_messages: the number of messages to seed per sessions
    days: the number of days looking backwards from the current time over which to randomize
    the times.
+    user_id: optional user to associate with sessions
+    persona_id: optional persona/assistant to associate with sessions
    """
    with get_session_with_current_tenant() as db_session:
        logger.info(f"Seeding {num_sessions} sessions.")
        for y in range(0, num_sessions):
-            create_chat_session(db_session, f"pytest_session_{y}", None, None)
+            create_chat_session(db_session, f"pytest_session_{y}", user_id, persona_id)

        # randomize all session times
        logger.info(f"Seeding {num_messages} messages per session.")
--- a/backend/onyx/db/user_file.py
+++ b/backend/onyx/db/user_file.py
@@ -3,8 +3,10 @@ from uuid import UUID

 from sqlalchemy import func
 from sqlalchemy import select
+from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

+from onyx.db.models import Project__UserFile
 from onyx.db.models import UserFile


@@ -56,10 +58,34 @@ def fetch_user_project_ids_for_user_files(
    db_session: Session,
 ) -> dict[str, list[int]]:
    """Fetch user project ids for specified user files"""
-    stmt = select(UserFile).where(UserFile.id.in_(user_file_ids))
+    user_file_uuid_ids = [UUID(user_file_id) for user_file_id in user_file_ids]
+    stmt = select(Project__UserFile.user_file_id, Project__UserFile.project_id).where(
+        Project__UserFile.user_file_id.in_(user_file_uuid_ids)
+    )
+    rows = db_session.execute(stmt).all()
+
+    user_file_id_to_project_ids: dict[str, list[int]] = {
+        user_file_id: [] for user_file_id in user_file_ids
+    }
+    for user_file_id, project_id in rows:
+        user_file_id_to_project_ids[str(user_file_id)].append(project_id)
+
+    return user_file_id_to_project_ids
+
+
+def fetch_persona_ids_for_user_files(
+    user_file_ids: list[str],
+    db_session: Session,
+) -> dict[str, list[int]]:
+    """Fetch persona (assistant) ids for specified user files."""
+    stmt = (
+        select(UserFile)
+        .where(UserFile.id.in_(user_file_ids))
+        .options(selectinload(UserFile.assistants))
+    )
    results = db_session.execute(stmt).scalars().all()
    return {
-        str(user_file.id): [project.id for project in user_file.projects]
+        str(user_file.id): [persona.id for persona in user_file.assistants]
        for user_file in results
    }

--- a/backend/onyx/deep_research/dr_loop.py
+++ b/backend/onyx/deep_research/dr_loop.py
@@ -139,7 +139,7 @@ def generate_final_report(
            custom_agent_prompt=None,
            simple_chat_history=history,
            reminder_message=reminder_message,
-            project_files=None,
+            context_files=None,
            available_tokens=llm.config.max_input_tokens,
            all_injected_file_metadata=all_injected_file_metadata,
        )
@@ -257,7 +257,7 @@ def run_deep_research_llm_loop(
                    custom_agent_prompt=None,
                    simple_chat_history=simple_chat_history,
                    reminder_message=None,
-                    project_files=None,
+                    context_files=None,
                    available_tokens=available_tokens,
                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
                    all_injected_file_metadata=all_injected_file_metadata,
@@ -321,7 +321,7 @@ def run_deep_research_llm_loop(
                custom_agent_prompt=None,
                simple_chat_history=simple_chat_history + [reminder_message],
                reminder_message=None,
-                project_files=None,
+                context_files=None,
                available_tokens=available_tokens,
                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,
                all_injected_file_metadata=all_injected_file_metadata,
@@ -485,7 +485,7 @@ def run_deep_research_llm_loop(
                    custom_agent_prompt=None,
                    simple_chat_history=simple_chat_history,
                    reminder_message=first_cycle_reminder_message,
-                    project_files=None,
+                    context_files=None,
                    available_tokens=available_tokens,
                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
                    all_injected_file_metadata=all_injected_file_metadata,
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -11,6 +11,7 @@ from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchOldDocumentIndex,
 )
 from onyx.document_index.vespa.index import VespaIndex
+from onyx.indexing.models import IndexingSetting
 from shared_configs.configs import MULTI_TENANT


@@ -49,8 +50,11 @@ def get_default_document_index(

    opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
    if opensearch_retrieval_enabled:
+        indexing_setting = IndexingSetting.from_db_model(search_settings)
        return OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
+            embedding_dim=indexing_setting.final_embedding_dim,
+            embedding_precision=indexing_setting.embedding_precision,
            secondary_index_name=secondary_index_name,
            large_chunks_enabled=search_settings.large_chunks_enabled,
            secondary_large_chunks_enabled=secondary_large_chunks_enabled,
@@ -118,8 +122,11 @@ def get_all_document_indices(
    )
    opensearch_document_index: OpenSearchOldDocumentIndex | None = None
    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
+        indexing_setting = IndexingSetting.from_db_model(search_settings)
        opensearch_document_index = OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
+            embedding_dim=indexing_setting.final_embedding_dim,
+            embedding_precision=indexing_setting.embedding_precision,
            secondary_index_name=None,
            large_chunks_enabled=False,
            secondary_large_chunks_enabled=None,
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -121,6 +121,7 @@ class VespaDocumentUserFields:
    """

    user_projects: list[int] | None = None
+    personas: list[int] | None = None


@dataclass
--- a/backend/onyx/document_index/interfaces_new.py
+++ b/backend/onyx/document_index/interfaces_new.py
@@ -148,6 +148,7 @@ class MetadataUpdateRequest(BaseModel):
    hidden: bool | None = None
    secondary_index_updated: bool | None = None
    project_ids: set[int] | None = None
+    persona_ids: set[int] | None = None


 class IndexRetrievalFilters(BaseModel):
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -1,5 +1,7 @@
 import logging
 import time
+from contextlib import AbstractContextManager
+from contextlib import nullcontext
 from typing import Any
 from typing import Generic
 from typing import TypeVar
@@ -83,22 +85,26 @@ def get_new_body_without_vectors(body: dict[str, Any]) -> dict[str, Any]:
    return new_body


-class OpenSearchClient:
-    """Client for interacting with OpenSearch.
+class OpenSearchClient(AbstractContextManager):
+    """Client for interacting with OpenSearch for cluster-level operations.

-    OpenSearch's Python module has pretty bad typing support so this client
-    attempts to protect the rest of the codebase from this. As a consequence,
-    most methods here return the minimum data needed for the rest of Onyx, and
-    tend to rely on Exceptions to handle errors.
-
-    TODO(andrei): This class currently assumes the structure of the database
-    schema when it returns a DocumentChunk. Make the class, or at least the
-    search method, templated on the structure the caller can expect.
+    Args:
+        host: The host of the OpenSearch cluster.
+        port: The port of the OpenSearch cluster.
+        auth: The authentication credentials for the OpenSearch cluster. A tuple
+            of (username, password).
+        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
+            True.
+        verify_certs: Whether to verify the SSL certificates for the OpenSearch
+            cluster. Defaults to False.
+        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
+            to False.
+        timeout: The timeout for the OpenSearch cluster. Defaults to
+            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
    """

    def __init__(
        self,
-        index_name: str,
        host: str = OPENSEARCH_HOST,
        port: int = OPENSEARCH_REST_API_PORT,
        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
@@ -107,9 +113,8 @@ class OpenSearchClient:
        ssl_show_warn: bool = False,
        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
    ):
-        self._index_name = index_name
        logger.debug(
-            f"Creating OpenSearch client for index {index_name} with host {host} and port {port} and timeout {timeout} seconds."
+            f"Creating OpenSearch client with host {host}, port {port} and timeout {timeout} seconds."
        )
        self._client = OpenSearch(
            hosts=[{"host": host, "port": port}],
@@ -125,6 +130,142 @@ class OpenSearchClient:
            # your request body that is less than this value.
            timeout=timeout,
        )
+
+    def __exit__(self, *_: Any) -> None:
+        self.close()
+
+    def __del__(self) -> None:
+        try:
+            self.close()
+        except Exception:
+            pass
+
+    @log_function_time(print_only=True, debug_only=True, include_args=True)
+    def create_search_pipeline(
+        self,
+        pipeline_id: str,
+        pipeline_body: dict[str, Any],
+    ) -> None:
+        """Creates a search pipeline.
+
+        See the OpenSearch documentation for more information on the search
+        pipeline body.
+        https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/
+
+        Args:
+            pipeline_id: The ID of the search pipeline to create.
+            pipeline_body: The body of the search pipeline to create.
+
+        Raises:
+            Exception: There was an error creating the search pipeline.
+        """
+        result = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
+        if not result.get("acknowledged", False):
+            raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")
+
+    @log_function_time(print_only=True, debug_only=True, include_args=True)
+    def delete_search_pipeline(self, pipeline_id: str) -> None:
+        """Deletes a search pipeline.
+
+        Args:
+            pipeline_id: The ID of the search pipeline to delete.
+
+        Raises:
+            Exception: There was an error deleting the search pipeline.
+        """
+        result = self._client.search_pipeline.delete(id=pipeline_id)
+        if not result.get("acknowledged", False):
+            raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")
+
+    @log_function_time(print_only=True, debug_only=True, include_args=True)
+    def put_cluster_settings(self, settings: dict[str, Any]) -> bool:
+        """Puts cluster settings.
+
+        Args:
+            settings: The settings to put.
+
+        Raises:
+            Exception: There was an error putting the cluster settings.
+
+        Returns:
+            True if the settings were put successfully, False otherwise.
+        """
+        response = self._client.cluster.put_settings(body=settings)
+        if response.get("acknowledged", False):
+            logger.info("Successfully put cluster settings.")
+            return True
+        else:
+            logger.error(f"Failed to put cluster settings: {response}.")
+            return False
+
+    @log_function_time(print_only=True, debug_only=True)
+    def ping(self) -> bool:
+        """Pings the OpenSearch cluster.
+
+        Returns:
+            True if OpenSearch could be reached, False if it could not.
+        """
+        return self._client.ping()
+
+    @log_function_time(print_only=True, debug_only=True)
+    def close(self) -> None:
+        """Closes the client.
+
+        Raises:
+            Exception: There was an error closing the client.
+        """
+        self._client.close()
+
+
+class OpenSearchIndexClient(OpenSearchClient):
+    """Client for interacting with OpenSearch for index-level operations.
+
+    OpenSearch's Python module has pretty bad typing support so this client
+    attempts to protect the rest of the codebase from this. As a consequence,
+    most methods here return the minimum data needed for the rest of Onyx, and
+    tend to rely on Exceptions to handle errors.
+
+    TODO(andrei): This class currently assumes the structure of the database
+    schema when it returns a DocumentChunk. Make the class, or at least the
+    search method, templated on the structure the caller can expect.
+
+    Args:
+        index_name: The name of the index to interact with.
+        host: The host of the OpenSearch cluster.
+        port: The port of the OpenSearch cluster.
+        auth: The authentication credentials for the OpenSearch cluster. A tuple
+            of (username, password).
+        use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
+            True.
+        verify_certs: Whether to verify the SSL certificates for the OpenSearch
+            cluster. Defaults to False.
+        ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
+            to False.
+        timeout: The timeout for the OpenSearch cluster. Defaults to
+            DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
+    """
+
+    def __init__(
+        self,
+        index_name: str,
+        host: str = OPENSEARCH_HOST,
+        port: int = OPENSEARCH_REST_API_PORT,
+        auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
+        use_ssl: bool = True,
+        verify_certs: bool = False,
+        ssl_show_warn: bool = False,
+        timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
+    ):
+        super().__init__(
+            host=host,
+            port=port,
+            auth=auth,
+            use_ssl=use_ssl,
+            verify_certs=verify_certs,
+            ssl_show_warn=ssl_show_warn,
+            timeout=timeout,
+        )
+        self._index_name = index_name
        logger.debug(
            f"OpenSearch client created successfully for index {self._index_name}."
        )
@@ -192,6 +333,38 @@ class OpenSearchClient:
        """
        return self._client.indices.exists(index=self._index_name)

+    @log_function_time(print_only=True, debug_only=True, include_args=True)
+    def put_mapping(self, mappings: dict[str, Any]) -> None:
+        """Updates the index mapping in an idempotent manner.
+
+        - Existing fields with the same definition: No-op (succeeds silently).
+        - New fields: Added to the index.
+        - Existing fields with different types: Raises exception (requires
+          reindex).
+
+        See the OpenSearch documentation for more information:
+        https://docs.opensearch.org/latest/api-reference/index-apis/put-mapping/
+
+        Args:
+            mappings: The complete mapping definition to apply. This will be
+                merged with existing mappings in the index.
+
+        Raises:
+            Exception: There was an error updating the mappings, such as
+                attempting to change the type of an existing field.
+        """
+        logger.debug(
+            f"Putting mappings for index {self._index_name} with mappings {mappings}."
+        )
+        response = self._client.indices.put_mapping(
+            index=self._index_name, body=mappings
+        )
+        if not response.get("acknowledged", False):
+            raise RuntimeError(
+                f"Failed to put the mapping update for index {self._index_name}."
+            )
+        logger.debug(f"Successfully put mappings for index {self._index_name}.")
+
    @log_function_time(print_only=True, debug_only=True, include_args=True)
    def validate_index(self, expected_mappings: dict[str, Any]) -> bool:
        """Validates the index.
@@ -610,43 +783,6 @@ class OpenSearchClient:
        )
        return DocumentChunk.model_validate(document_chunk_source)

-    @log_function_time(print_only=True, debug_only=True, include_args=True)
-    def create_search_pipeline(
-        self,
-        pipeline_id: str,
-        pipeline_body: dict[str, Any],
-    ) -> None:
-        """Creates a search pipeline.
-
-        See the OpenSearch documentation for more information on the search
-        pipeline body.
-        https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/
-
-        Args:
-            pipeline_id: The ID of the search pipeline to create.
-            pipeline_body: The body of the search pipeline to create.
-
-        Raises:
-            Exception: There was an error creating the search pipeline.
-        """
-        result = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
-        if not result.get("acknowledged", False):
-            raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")
-
-    @log_function_time(print_only=True, debug_only=True, include_args=True)
-    def delete_search_pipeline(self, pipeline_id: str) -> None:
-        """Deletes a search pipeline.
-
-        Args:
-            pipeline_id: The ID of the search pipeline to delete.
-
-        Raises:
-            Exception: There was an error deleting the search pipeline.
-        """
-        result = self._client.search_pipeline.delete(id=pipeline_id)
-        if not result.get("acknowledged", False):
-            raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")
-
    @log_function_time(print_only=True, debug_only=True)
    def search(
        self, body: dict[str, Any], search_pipeline_id: str | None
@@ -807,48 +943,6 @@ class OpenSearchClient:
        """
        self._client.indices.refresh(index=self._index_name)

-    @log_function_time(print_only=True, debug_only=True, include_args=True)
-    def put_cluster_settings(self, settings: dict[str, Any]) -> bool:
-        """Puts cluster settings.
-
-        Args:
-            settings: The settings to put.
-
-        Raises:
-            Exception: There was an error putting the cluster settings.
-
-        Returns:
-            True if the settings were put successfully, False otherwise.
-        """
-        response = self._client.cluster.put_settings(body=settings)
-        if response.get("acknowledged", False):
-            logger.info("Successfully put cluster settings.")
-            return True
-        else:
-            logger.error(f"Failed to put cluster settings: {response}.")
-            return False
-
-    @log_function_time(print_only=True, debug_only=True)
-    def ping(self) -> bool:
-        """Pings the OpenSearch cluster.
-
-        Returns:
-            True if OpenSearch could be reached, False if it could not.
-        """
-        return self._client.ping()
-
-    @log_function_time(print_only=True, debug_only=True)
-    def close(self) -> None:
-        """Closes the client.
-
-        TODO(andrei): Can we have some way to auto close when the client no
-        longer has any references?
-
-        Raises:
-            Exception: There was an error closing the client.
-        """
-        self._client.close()
-
    def _get_hits_and_profile_from_search_result(
        self, result: dict[str, Any]
    ) -> tuple[list[Any], int | None, bool | None, dict[str, Any], dict[str, Any]]:
@@ -945,14 +1039,7 @@ def wait_for_opensearch_with_timeout(
    Returns:
        True if OpenSearch is ready, False otherwise.
    """
-    made_client = False
-    try:
-        if client is None:
-            # NOTE: index_name does not matter because we are only using this object
-            # to ping.
-            # TODO(andrei): Make this better.
-            client = OpenSearchClient(index_name="")
-            made_client = True
+    with nullcontext(client) if client else OpenSearchClient() as client:
        time_start = time.monotonic()
        while True:
            if client.ping():
@@ -969,7 +1056,3 @@ def wait_for_opensearch_with_timeout(
                f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
            )
            time.sleep(wait_interval_s)
-    finally:
-        if made_client:
-            assert client is not None
-            client.close()
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -7,6 +7,7 @@ from opensearchpy import NotFoundError

 from onyx.access.models import DocumentAccess
 from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
+from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
 from onyx.configs.constants import PUBLIC_DOC_PAT
@@ -40,6 +41,7 @@ from onyx.document_index.interfaces_new import IndexingMetadata
 from onyx.document_index.interfaces_new import MetadataUpdateRequest
 from onyx.document_index.interfaces_new import TenantState
 from onyx.document_index.opensearch.client import OpenSearchClient
+from onyx.document_index.opensearch.client import OpenSearchIndexClient
 from onyx.document_index.opensearch.client import SearchHit
 from onyx.document_index.opensearch.cluster_settings import OPENSEARCH_CLUSTER_SETTINGS
 from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
@@ -50,6 +52,7 @@ from onyx.document_index.opensearch.schema import DocumentSchema
 from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
 from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
 from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
+from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
 from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
 from onyx.document_index.opensearch.search import DocumentQuery
 from onyx.document_index.opensearch.search import (
@@ -92,6 +95,25 @@ def generate_opensearch_filtered_access_control_list(
    return list(access_control_list)


+def set_cluster_state(client: OpenSearchClient) -> None:
+    if not client.put_cluster_settings(settings=OPENSEARCH_CLUSTER_SETTINGS):
+        logger.error(
+            "Failed to put cluster settings. If the settings have never been set before, "
+            "this may cause unexpected index creation when indexing documents into an "
+            "index that does not exist, or may cause expected logs to not appear. If this "
+            "is not the first time running Onyx against this instance of OpenSearch, these "
+            "settings have likely already been set. Not taking any further action..."
+        )
+    client.create_search_pipeline(
+        pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
+        pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
+    )
+    client.create_search_pipeline(
+        pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
+        pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
+    )
+
+
 def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
    chunk: DocumentChunk,
    score: float | None,
@@ -215,6 +237,7 @@ def _convert_onyx_chunk_to_opensearch_document(
        # OpenSearch and it will not store any data at all for this field, which
        # is different from supplying an empty list.
        user_projects=chunk.user_project or None,
+        personas=chunk.personas or None,
        primary_owners=get_experts_stores_representations(
            chunk.source_document.primary_owners
        ),
@@ -246,6 +269,8 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
    def __init__(
        self,
        index_name: str,
+        embedding_dim: int,
+        embedding_precision: EmbeddingPrecision,
        secondary_index_name: str | None,
        large_chunks_enabled: bool,  # noqa: ARG002
        secondary_large_chunks_enabled: bool | None,  # noqa: ARG002
@@ -256,10 +281,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
            index_name=index_name,
            secondary_index_name=secondary_index_name,
        )
-        if multitenant:
-            raise ValueError(
-                "Bug: OpenSearch is not yet ready for multitenant environments but something tried to use it."
-            )
        if multitenant != MULTI_TENANT:
            raise ValueError(
                "Bug: Multitenant mismatch when initializing an OpenSearchDocumentIndex. "
@@ -267,8 +288,10 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
            )
        tenant_id = get_current_tenant_id()
        self._real_index = OpenSearchDocumentIndex(
-            index_name=index_name,
            tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
+            index_name=index_name,
+            embedding_dim=embedding_dim,
+            embedding_precision=embedding_precision,
        )

    @staticmethod
@@ -277,9 +300,8 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
        embedding_dims: list[int],
        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
-        # TODO(andrei): Implement.
        raise NotImplementedError(
-            "Multitenant index registration is not yet implemented for OpenSearch."
+            "Bug: Multitenant index registration is not supported for OpenSearch."
        )

    def ensure_indices_exist(
@@ -362,6 +384,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
                if user_fields and user_fields.user_projects
                else None
            ),
+            persona_ids=(
+                set(user_fields.personas)
+                if user_fields and user_fields.personas
+                else None
+            ),
        )

        try:
@@ -464,19 +491,37 @@ class OpenSearchDocumentIndex(DocumentIndex):
    for an OpenSearch search engine instance. It handles the complete lifecycle
    of document chunks within a specific OpenSearch index/schema.

-    Although not yet used in this way in the codebase, each kind of embedding
-    used should correspond to a different instance of this class, and therefore
-    a different index in OpenSearch.
+    Each kind of embedding used should correspond to a different instance of
+    this class, and therefore a different index in OpenSearch.
+
+    If in a multitenant environment and
+    VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT, will verify and create the index
+    if necessary on initialization. This is because there is no logic which runs
+    on cluster restart which scans through all search settings over all tenants
+    and creates the relevant indices.
+
+    Args:
+        tenant_state: The tenant state of the caller.
+        index_name: The name of the index to interact with.
+        embedding_dim: The dimensionality of the embeddings used for the index.
+        embedding_precision: The precision of the embeddings used for the index.
    """

    def __init__(
        self,
-        index_name: str,
        tenant_state: TenantState,
+        index_name: str,
+        embedding_dim: int,
+        embedding_precision: EmbeddingPrecision,
    ) -> None:
        self._index_name: str = index_name
        self._tenant_state: TenantState = tenant_state
-        self._os_client = OpenSearchClient(index_name=self._index_name)
+        self._client = OpenSearchIndexClient(index_name=self._index_name)
+
+        if self._tenant_state.multitenant and VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT:
+            self.verify_and_create_index_if_necessary(
+                embedding_dim=embedding_dim, embedding_precision=embedding_precision
+            )

    def verify_and_create_index_if_necessary(
        self,
@@ -485,10 +530,15 @@ class OpenSearchDocumentIndex(DocumentIndex):
    ) -> None:
        """Verifies and creates the index if necessary.

-        Also puts the desired cluster settings.
+        Also puts the desired cluster settings if not in a multitenant
+        environment.

-        Also puts the desired search pipeline state, creating the pipelines if
-        they do not exist and updating them otherwise.
+        Also puts the desired search pipeline state if not in a multitenant
+        environment, creating the pipelines if they do not exist and updating
+        them otherwise.
+
+        In a multitenant environment, the above steps happen explicitly on
+        setup.

        Args:
            embedding_dim: Vector dimensionality for the vector similarity part
@@ -501,47 +551,38 @@ class OpenSearchDocumentIndex(DocumentIndex):
                search pipelines.
        """
        logger.debug(
-            f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary, "
-            f"with embedding dimension {embedding_dim}."
+            f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if "
+            f"necessary, with embedding dimension {embedding_dim}."
        )
+
+        if not self._tenant_state.multitenant:
+            set_cluster_state(self._client)
+
        expected_mappings = DocumentSchema.get_document_schema(
            embedding_dim, self._tenant_state.multitenant
        )
-        if not self._os_client.put_cluster_settings(
-            settings=OPENSEARCH_CLUSTER_SETTINGS
-        ):
-            logger.error(
-                f"Failed to put cluster settings for index {self._index_name}. If the settings have never been set before this "
-                "may cause unexpected index creation when indexing documents into an index that does not exist, or may cause "
-                "expected logs to not appear. If this is not the first time running Onyx against this instance of OpenSearch, "
-                "these settings have likely already been set. Not taking any further action..."
-            )
-        if not self._os_client.index_exists():
+
+        if not self._client.index_exists():
            if USING_AWS_MANAGED_OPENSEARCH:
                index_settings = (
                    DocumentSchema.get_index_settings_for_aws_managed_opensearch()
                )
            else:
                index_settings = DocumentSchema.get_index_settings()
-            self._os_client.create_index(
+            self._client.create_index(
                mappings=expected_mappings,
                settings=index_settings,
            )
-        if not self._os_client.validate_index(
-            expected_mappings=expected_mappings,
-        ):
-            raise RuntimeError(
-                f"The index {self._index_name} is not valid. The expected mappings do not match the actual mappings."
-            )
-
-        self._os_client.create_search_pipeline(
-            pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
-            pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
-        )
-        self._os_client.create_search_pipeline(
-            pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
-            pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
-        )
+        else:
+            # Ensure schema is up to date by applying the current mappings.
+            try:
+                self._client.put_mapping(expected_mappings)
+            except Exception as e:
+                logger.error(
+                    f"Failed to update mappings for index {self._index_name}. This likely means a "
+                    f"field type was changed which requires reindexing. Error: {e}"
+                )
+                raise

    def index(
        self,
@@ -613,7 +654,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            )
            # Now index. This will raise if a chunk of the same ID exists, which
            # we do not expect because we should have deleted all chunks.
-            self._os_client.bulk_index_documents(
+            self._client.bulk_index_documents(
                documents=chunk_batch,
                tenant_state=self._tenant_state,
            )
@@ -653,7 +694,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            tenant_state=self._tenant_state,
        )

-        return self._os_client.delete_by_query(query_body)
+        return self._client.delete_by_query(query_body)

    def update(
        self,
@@ -709,6 +750,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
                properties_to_update[USER_PROJECTS_FIELD_NAME] = list(
                    update_request.project_ids
                )
+            if update_request.persona_ids is not None:
+                properties_to_update[PERSONAS_FIELD_NAME] = list(
+                    update_request.persona_ids
+                )

            if not properties_to_update:
                if len(update_request.document_ids) > 1:
@@ -749,7 +794,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
                        document_id=doc_id,
                        chunk_index=chunk_index,
                    )
-                    self._os_client.update_document(
+                    self._client.update_document(
                        document_chunk_id=document_chunk_id,
                        properties_to_update=properties_to_update,
                    )
@@ -788,7 +833,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
                min_chunk_index=chunk_request.min_chunk_ind,
                max_chunk_index=chunk_request.max_chunk_ind,
            )
-            search_hits = self._os_client.search(
+            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
            )
@@ -838,7 +883,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
        # NOTE: Using z-score normalization here because it's better for hybrid search from a theoretical standpoint.
        # Empirically on a small dataset of up to 10K docs, it's not very different. Likely more impactful at scale.
        # https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
-        search_hits: list[SearchHit[DocumentChunk]] = self._os_client.search(
+        search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
            body=query_body,
            search_pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
        )
@@ -870,7 +915,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            index_filters=filters,
            num_to_retrieve=num_to_retrieve,
        )
-        search_hits: list[SearchHit[DocumentChunk]] = self._os_client.search(
+        search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
            body=query_body,
            search_pipeline_id=None,
        )
@@ -898,6 +943,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
        # Do not raise if the document already exists, just update. This is
        # because the document may already have been indexed during the
        # OpenSearch transition period.
-        self._os_client.bulk_index_documents(
+        self._client.bulk_index_documents(
            documents=chunks, tenant_state=self._tenant_state, update_if_exists=True
        )
--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -41,6 +41,7 @@ IMAGE_FILE_ID_FIELD_NAME = "image_file_id"
 SOURCE_LINKS_FIELD_NAME = "source_links"
 DOCUMENT_SETS_FIELD_NAME = "document_sets"
 USER_PROJECTS_FIELD_NAME = "user_projects"
+PERSONAS_FIELD_NAME = "personas"
 DOCUMENT_ID_FIELD_NAME = "document_id"
 CHUNK_INDEX_FIELD_NAME = "chunk_index"
 MAX_CHUNK_SIZE_FIELD_NAME = "max_chunk_size"
@@ -156,6 +157,7 @@ class DocumentChunk(BaseModel):

    document_sets: list[str] | None = None
    user_projects: list[int] | None = None
+    personas: list[int] | None = None
    primary_owners: list[str] | None = None
    secondary_owners: list[str] | None = None

@@ -485,6 +487,7 @@ class DocumentSchema:
                # Product-specific fields.
                DOCUMENT_SETS_FIELD_NAME: {"type": "keyword"},
                USER_PROJECTS_FIELD_NAME: {"type": "integer"},
+                PERSONAS_FIELD_NAME: {"type": "integer"},
                PRIMARY_OWNERS_FIELD_NAME: {"type": "keyword"},
                SECONDARY_OWNERS_FIELD_NAME: {"type": "keyword"},
                # OpenSearch metadata fields.
--- a/backend/onyx/document_index/opensearch/search.py
+++ b/backend/onyx/document_index/opensearch/search.py
@@ -28,6 +28,7 @@ from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
 from onyx.document_index.opensearch.schema import LAST_UPDATED_FIELD_NAME
 from onyx.document_index.opensearch.schema import MAX_CHUNK_SIZE_FIELD_NAME
 from onyx.document_index.opensearch.schema import METADATA_LIST_FIELD_NAME
+from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
 from onyx.document_index.opensearch.schema import PUBLIC_FIELD_NAME
 from onyx.document_index.opensearch.schema import set_or_convert_timezone_to_utc
 from onyx.document_index.opensearch.schema import SOURCE_TYPE_FIELD_NAME
@@ -144,6 +145,7 @@ class DocumentQuery:
            document_sets=index_filters.document_set or [],
            user_file_ids=index_filters.user_file_ids or [],
            project_id=index_filters.project_id,
+            persona_id=index_filters.persona_id,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=min_chunk_index,
            max_chunk_index=max_chunk_index,
@@ -202,6 +204,7 @@ class DocumentQuery:
            document_sets=[],
            user_file_ids=[],
            project_id=None,
+            persona_id=None,
            time_cutoff=None,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -267,6 +270,7 @@ class DocumentQuery:
            document_sets=index_filters.document_set or [],
            user_file_ids=index_filters.user_file_ids or [],
            project_id=index_filters.project_id,
+            persona_id=index_filters.persona_id,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -334,6 +338,7 @@ class DocumentQuery:
            document_sets=index_filters.document_set or [],
            user_file_ids=index_filters.user_file_ids or [],
            project_id=index_filters.project_id,
+            persona_id=index_filters.persona_id,
            time_cutoff=index_filters.time_cutoff,
            min_chunk_index=None,
            max_chunk_index=None,
@@ -496,6 +501,7 @@ class DocumentQuery:
        document_sets: list[str],
        user_file_ids: list[UUID],
        project_id: int | None,
+        persona_id: int | None,
        time_cutoff: datetime | None,
        min_chunk_index: int | None,
        max_chunk_index: int | None,
@@ -530,6 +536,8 @@ class DocumentQuery:
                retrieved.
            project_id: If not None, only documents with this project ID in user
                projects will be retrieved.
+            persona_id: If not None, only documents whose personas array
+                contains this persona ID will be retrieved.
            time_cutoff: Time cutoff for the documents to retrieve. If not None,
                Documents which were last updated before this date will not be
                returned. For documents which do not have a value for their last
@@ -627,6 +635,9 @@ class DocumentQuery:
            )
            return user_project_filter

+        def _get_persona_filter(persona_id: int) -> dict[str, Any]:
+            return {"term": {PERSONAS_FIELD_NAME: {"value": persona_id}}}
+
        def _get_time_cutoff_filter(time_cutoff: datetime) -> dict[str, Any]:
            # Convert to UTC if not already so the cutoff is comparable to the
            # document data.
@@ -780,6 +791,9 @@ class DocumentQuery:
            # document's user projects list.
            filter_clauses.append(_get_user_project_filter(project_id))

+        if persona_id is not None:
+            filter_clauses.append(_get_persona_filter(persona_id))
+
        if time_cutoff is not None:
            # If a time cutoff is provided, the caller will only retrieve
            # documents where the document was last updated at or after the time
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja
@@ -181,6 +181,11 @@ schema {{ schema_name }} {
            rank: filter
            attribute: fast-search
        }
+        field personas type array<int> {
+            indexing: summary | attribute
+            rank: filter
+            attribute: fast-search
+        }
    }

    # If using different tokenization settings, the fieldset has to be removed, and the field must
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -689,6 +689,9 @@ class VespaIndex(DocumentIndex):
        project_ids: set[int] | None = None
        if user_fields is not None and user_fields.user_projects is not None:
            project_ids = set(user_fields.user_projects)
+        persona_ids: set[int] | None = None
+        if user_fields is not None and user_fields.personas is not None:
+            persona_ids = set(user_fields.personas)
        update_request = MetadataUpdateRequest(
            document_ids=[doc_id],
            doc_id_to_chunk_cnt={
@@ -699,6 +702,7 @@ class VespaIndex(DocumentIndex):
            boost=fields.boost if fields is not None else None,
            hidden=fields.hidden if fields is not None else None,
            project_ids=project_ids,
+            persona_ids=persona_ids,
        )

        vespa_document_index.update([update_request])
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@@ -46,6 +46,7 @@ from onyx.document_index.vespa_constants import METADATA
 from onyx.document_index.vespa_constants import METADATA_LIST
 from onyx.document_index.vespa_constants import METADATA_SUFFIX
 from onyx.document_index.vespa_constants import NUM_THREADS
+from onyx.document_index.vespa_constants import PERSONAS
 from onyx.document_index.vespa_constants import PRIMARY_OWNERS
 from onyx.document_index.vespa_constants import SECONDARY_OWNERS
 from onyx.document_index.vespa_constants import SECTION_CONTINUATION
@@ -218,6 +219,7 @@ def _index_vespa_chunk(
        # still called `image_file_name` in Vespa for backwards compatibility
        IMAGE_FILE_NAME: chunk.image_file_id,
        USER_PROJECT: chunk.user_project if chunk.user_project is not None else [],
+        PERSONAS: chunk.personas if chunk.personas is not None else [],
        BOOST: chunk.boost,
        AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor,
    }
--- a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
+++ b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
@@ -12,6 +12,7 @@ from onyx.document_index.vespa_constants import DOCUMENT_ID
 from onyx.document_index.vespa_constants import DOCUMENT_SETS
 from onyx.document_index.vespa_constants import HIDDEN
 from onyx.document_index.vespa_constants import METADATA_LIST
+from onyx.document_index.vespa_constants import PERSONAS
 from onyx.document_index.vespa_constants import SOURCE_TYPE
 from onyx.document_index.vespa_constants import TENANT_ID
 from onyx.document_index.vespa_constants import USER_PROJECT
@@ -149,6 +150,18 @@ def build_vespa_filters(
        # Vespa YQL 'contains' expects a string literal; quote the integer
        return f'({USER_PROJECT} contains "{pid}") and '

+    def _build_persona_filter(
+        persona_id: int | None,
+    ) -> str:
+        if persona_id is None:
+            return ""
+        try:
+            pid = int(persona_id)
+        except Exception:
+            logger.warning(f"Invalid persona ID: {persona_id}")
+            return ""
+        return f'({PERSONAS} contains "{pid}") and '
+
    # Start building the filter string
    filter_str = f"!({HIDDEN}=true) and " if not include_hidden else ""

@@ -192,6 +205,9 @@ def build_vespa_filters(
    # User project filter (array<int> attribute membership)
    filter_str += _build_user_project_filter(filters.project_id)

+    # Persona filter (array<int> attribute membership)
+    filter_str += _build_persona_filter(filters.persona_id)
+
    # Time filter
    filter_str += _build_time_filter(filters.time_cutoff)

--- a/backend/onyx/document_index/vespa/vespa_document_index.py
+++ b/backend/onyx/document_index/vespa/vespa_document_index.py
@@ -183,6 +183,10 @@ def _update_single_chunk(
        model_config = {"frozen": True}
        assign: list[int]

+    class _Personas(BaseModel):
+        model_config = {"frozen": True}
+        assign: list[int]
+
    class _VespaPutFields(BaseModel):
        model_config = {"frozen": True}
        # The names of these fields are based the Vespa schema. Changes to the
@@ -193,6 +197,7 @@ def _update_single_chunk(
        access_control_list: _AccessControl | None = None
        hidden: _Hidden | None = None
        user_project: _UserProjects | None = None
+        personas: _Personas | None = None

    class _VespaPutRequest(BaseModel):
        model_config = {"frozen": True}
@@ -227,6 +232,11 @@ def _update_single_chunk(
        if update_request.project_ids is not None
        else None
    )
+    personas_update: _Personas | None = (
+        _Personas(assign=list(update_request.persona_ids))
+        if update_request.persona_ids is not None
+        else None
+    )

    vespa_put_fields = _VespaPutFields(
        boost=boost_update,
@@ -234,6 +244,7 @@ def _update_single_chunk(
        access_control_list=access_update,
        hidden=hidden_update,
        user_project=user_projects_update,
+        personas=personas_update,
    )

    vespa_put_request = _VespaPutRequest(
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@@ -58,6 +58,7 @@ DOCUMENT_SETS = "document_sets"
 USER_FILE = "user_file"
 USER_FOLDER = "user_folder"
 USER_PROJECT = "user_project"
+PERSONAS = "personas"
 LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids"
 METADATA = "metadata"
 METADATA_LIST = "metadata_list"
--- a/backend/onyx/indexing/adapters/document_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/document_indexing_adapter.py
@@ -146,6 +146,7 @@ class DocumentIndexingBatchAdapter:
                    doc_id_to_document_set.get(chunk.source_document.id, [])
                ),
                user_project=[],
+                personas=[],
                boost=(
                    context.id_to_boost_map[chunk.source_document.id]
                    if chunk.source_document.id in context.id_to_boost_map
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -20,6 +20,7 @@ from onyx.db.models import Persona
 from onyx.db.models import UserFile
 from onyx.db.notification import create_notification
 from onyx.db.user_file import fetch_chunk_counts_for_user_files
+from onyx.db.user_file import fetch_persona_ids_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
@@ -119,6 +120,10 @@ class UserFileIndexingAdapter:
            user_file_ids=updatable_ids,
            db_session=self.db_session,
        )
+        user_file_id_to_persona_ids = fetch_persona_ids_for_user_files(
+            user_file_ids=updatable_ids,
+            db_session=self.db_session,
+        )
        user_file_id_to_access: dict[str, DocumentAccess] = get_access_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
@@ -182,7 +187,7 @@ class UserFileIndexingAdapter:
                user_project=user_file_id_to_project_ids.get(
                    chunk.source_document.id, []
                ),
-                # we are going to index userfiles only once, so we just set the boost to the default
+                personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
                boost=DEFAULT_BOOST,
                tenant_id=tenant_id,
                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -112,6 +112,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
    access: "DocumentAccess"
    document_sets: set[str]
    user_project: list[int]
+    personas: list[int]
    boost: int
    aggregated_chunk_boost_factor: float
    # Full ancestor path from root hierarchy node to document's parent.
@@ -126,6 +127,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
        access: "DocumentAccess",
        document_sets: set[str],
        user_project: list[int],
+        personas: list[int],
        boost: int,
        aggregated_chunk_boost_factor: float,
        tenant_id: str,
@@ -137,6 +139,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
            access=access,
            document_sets=document_sets,
            user_project=user_project,
+            personas=personas,
            boost=boost,
            aggregated_chunk_boost_factor=aggregated_chunk_boost_factor,
            tenant_id=tenant_id,
--- a/backend/onyx/onyxbot/slack/blocks.py
+++ b/backend/onyx/onyxbot/slack/blocks.py
@@ -592,11 +592,8 @@ def build_slack_response_blocks(
        )

    citations_blocks = []
-    document_blocks = []
    if answer.citation_info:
        citations_blocks = _build_citations_blocks(answer)
-    else:
-        document_blocks = _priority_ordered_documents_blocks(answer)

    citations_divider = [DividerBlock()] if citations_blocks else []
    buttons_divider = [DividerBlock()] if web_follow_up_block or follow_up_block else []
@@ -608,7 +605,6 @@ def build_slack_response_blocks(
        + ai_feedback_block
        + citations_divider
        + citations_blocks
-        + document_blocks
        + buttons_divider
        + web_follow_up_block
        + follow_up_block
--- a/backend/onyx/onyxbot/slack/formatting.py
+++ b/backend/onyx/onyxbot/slack/formatting.py
@@ -1,20 +1,149 @@
+import re
+from collections.abc import Callable
 from typing import Any

 from mistune import create_markdown
 from mistune import HTMLRenderer

+# Tags that should be replaced with a newline (line-break and block-level elements)
+_HTML_NEWLINE_TAG_PATTERN = re.compile(
+    r"<br\s*/?>|</(?:p|div|li|h[1-6]|tr|blockquote|section|article)>",
+    re.IGNORECASE,
+)
+
+# Strips HTML tags but excludes autolinks like <https://...> and <mailto:...>
+_HTML_TAG_PATTERN = re.compile(
+    r"<(?!https?://|mailto:)/?[a-zA-Z][^>]*>",
+)
+
+# Matches fenced code blocks (``` ... ```) so we can skip sanitization inside them
+_FENCED_CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```")
+
+# Matches the start of any markdown link: [text]( or [[n]](
+# The inner group handles nested brackets for citation links like [[1]](.
+_MARKDOWN_LINK_PATTERN = re.compile(r"\[(?:[^\[\]]|\[[^\]]*\])*\]\(")
+
+# Matches Slack-style links <url|text> that LLMs sometimes output directly.
+# Mistune doesn't recognise this syntax, so text() would escape the angle
+# brackets and Slack would render them as literal text instead of links.
+_SLACK_LINK_PATTERN = re.compile(r"<(https?://[^|>]+)\|([^>]+)>")
+
+
+def _sanitize_html(text: str) -> str:
+    """Strip HTML tags from a text fragment.
+
+    Block-level closing tags and <br> are converted to newlines.
+    All other HTML tags are removed. Autolinks (<https://...>) are preserved.
+    """
+    text = _HTML_NEWLINE_TAG_PATTERN.sub("\n", text)
+    text = _HTML_TAG_PATTERN.sub("", text)
+    return text
+
+
+def _transform_outside_code_blocks(
+    message: str, transform: Callable[[str], str]
+) -> str:
+    """Apply *transform* only to text outside fenced code blocks."""
+    parts = _FENCED_CODE_BLOCK_PATTERN.split(message)
+    code_blocks = _FENCED_CODE_BLOCK_PATTERN.findall(message)
+
+    result: list[str] = []
+    for i, part in enumerate(parts):
+        result.append(transform(part))
+        if i < len(code_blocks):
+            result.append(code_blocks[i])
+
+    return "".join(result)
+
+
+def _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:
+    """Extract markdown link destination, allowing nested parentheses in the URL."""
+    depth = 0
+    i = start_idx
+
+    while i < len(message):
+        curr = message[i]
+        if curr == "\\":
+            i += 2
+            continue
+
+        if curr == "(":
+            depth += 1
+        elif curr == ")":
+            if depth == 0:
+                return message[start_idx:i], i
+            depth -= 1
+        i += 1
+
+    return message[start_idx:], None
+
+
+def _normalize_link_destinations(message: str) -> str:
+    """Wrap markdown link URLs in angle brackets so the parser handles special chars safely.
+
+    Markdown link syntax [text](url) breaks when the URL contains unescaped
+    parentheses, spaces, or other special characters. Wrapping the URL in angle
+    brackets — [text](<url>) — tells the parser to treat everything inside as
+    a literal URL. This applies to all links, not just citations.
+    """
+    if "](" not in message:
+        return message
+
+    normalized_parts: list[str] = []
+    cursor = 0
+
+    while match := _MARKDOWN_LINK_PATTERN.search(message, cursor):
+        normalized_parts.append(message[cursor : match.end()])
+        destination_start = match.end()
+        destination, end_idx = _extract_link_destination(message, destination_start)
+        if end_idx is None:
+            normalized_parts.append(message[destination_start:])
+            return "".join(normalized_parts)
+
+        already_wrapped = destination.startswith("<") and destination.endswith(">")
+        if destination and not already_wrapped:
+            destination = f"<{destination}>"
+
+        normalized_parts.append(destination)
+        normalized_parts.append(")")
+        cursor = end_idx + 1
+
+    normalized_parts.append(message[cursor:])
+    return "".join(normalized_parts)
+
+
+def _convert_slack_links_to_markdown(message: str) -> str:
+    """Convert Slack-style <url|text> links to standard markdown [text](url).
+
+    LLMs sometimes emit Slack mrkdwn link syntax directly. Mistune doesn't
+    recognise it, so the angle brackets would be escaped by text() and Slack
+    would render the link as literal text instead of a clickable link.
+    """
+    return _transform_outside_code_blocks(
+        message, lambda text: _SLACK_LINK_PATTERN.sub(r"[\2](\1)", text)
+    )
+

 def format_slack_message(message: str | None) -> str:
    if message is None:
        return ""
+    message = _transform_outside_code_blocks(message, _sanitize_html)
+    message = _convert_slack_links_to_markdown(message)
+    normalized_message = _normalize_link_destinations(message)
    md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough"])
-    result = md(message)
+    result = md(normalized_message)
    # With HTMLRenderer, result is always str (not AST list)
    assert isinstance(result, str)
-    return result
+    return result.rstrip("\n")


 class SlackRenderer(HTMLRenderer):
+    """Renders markdown as Slack mrkdwn format instead of HTML.
+
+    Overrides all HTMLRenderer methods that produce HTML tags to ensure
+    no raw HTML ever appears in Slack messages.
+    """
+
    SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}

    def escape_special(self, text: str) -> str:
@@ -23,7 +152,7 @@ class SlackRenderer(HTMLRenderer):
        return text

    def heading(self, text: str, level: int, **attrs: Any) -> str:  # noqa: ARG002
-        return f"*{text}*\n"
+        return f"*{text}*\n\n"

    def emphasis(self, text: str) -> str:
        return f"_{text}_"
@@ -42,7 +171,7 @@ class SlackRenderer(HTMLRenderer):
                count += 1
                prefix = f"{count}. " if ordered else "• "
                lines[i] = f"{prefix}{line[4:]}"
-        return "\n".join(lines)
+        return "\n".join(lines) + "\n"

    def list_item(self, text: str) -> str:
        return f"li: {text}\n"
@@ -64,7 +193,30 @@ class SlackRenderer(HTMLRenderer):
        return f"`{text}`"

    def block_code(self, code: str, info: str | None = None) -> str:  # noqa: ARG002
-        return f"```\n{code}\n```\n"
+        return f"```\n{code.rstrip(chr(10))}\n```\n\n"
+
+    def linebreak(self) -> str:
+        return "\n"
+
+    def thematic_break(self) -> str:
+        return "---\n\n"
+
+    def block_quote(self, text: str) -> str:
+        lines = text.strip().split("\n")
+        quoted = "\n".join(f">{line}" for line in lines)
+        return quoted + "\n\n"
+
+    def block_html(self, html: str) -> str:
+        return _sanitize_html(html) + "\n\n"
+
+    def block_error(self, text: str) -> str:
+        return f"```\n{text}\n```\n\n"
+
+    def text(self, text: str) -> str:
+        # Only escape the three entities Slack recognizes: & < >
+        # HTMLRenderer.text() also escapes " to &quot; which Slack renders
+        # as literal &quot; text since Slack doesn't recognize that entity.
+        return self.escape_special(text)

    def paragraph(self, text: str) -> str:
-        return f"{text}\n"
+        return f"{text}\n\n"
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/demo_data.zip
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/demo_data.zip
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/generate_agents_md.py
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/generate_agents_md.py
@@ -1,15 +1,19 @@
 #!/usr/bin/env python3
 """Generate AGENTS.md by scanning the files directory and populating the template.

-This script runs at container startup, AFTER the init container has synced files
-from S3. It scans the /workspace/files directory to discover what knowledge sources
-are available and generates appropriate documentation.
+This script runs during session setup, AFTER files have been synced from S3
+and the files symlink has been created. It reads an existing AGENTS.md (which
+contains the {{KNOWLEDGE_SOURCES_SECTION}} placeholder), replaces the
+placeholder by scanning the knowledge source directory, and writes it back.

-Environment variables:
- AGENT_INSTRUCTIONS: The template content with placeholders to replace
+Usage:
+    python3 generate_agents_md.py <agents_md_path> <files_path>
+
+Arguments:
+    agents_md_path: Path to the AGENTS.md file to update in place
+    files_path: Path to the files directory to scan for knowledge sources
 """

-import os
 import sys
 from pathlib import Path

@@ -189,49 +193,39 @@ def build_knowledge_sources_section(files_path: Path) -> str:
 def main() -> None:
    """Main entry point for container startup script.

-    Is called by the container startup script to scan /workspace/files and populate
-    the knowledge sources section.
+    Reads an existing AGENTS.md, replaces the {{KNOWLEDGE_SOURCES_SECTION}}
+    placeholder by scanning the files directory, and writes it back.
+
+    Usage:
+        python3 generate_agents_md.py <agents_md_path> <files_path>
    """
-    # Read template from environment variable
-    template = os.environ.get("AGENT_INSTRUCTIONS", "")
-    if not template:
-        print("Warning: No AGENT_INSTRUCTIONS template provided", file=sys.stderr)
-        template = "# Agent Instructions\n\nNo instructions provided."
+    if len(sys.argv) != 3:
+        print(
+            f"Usage: {sys.argv[0]} <agents_md_path> <files_path>",
+            file=sys.stderr,
+        )
+        sys.exit(1)

-    # Scan files directory - check /workspace/files first, then /workspace/demo_data
-    files_path = Path("/workspace/files")
-    demo_data_path = Path("/workspace/demo_data")
+    agents_md_path = Path(sys.argv[1])
+    files_path = Path(sys.argv[2])

-    # Use demo_data if files doesn't exist or is empty
-    if not files_path.exists() or not any(files_path.iterdir()):
-        if demo_data_path.exists():
-            files_path = demo_data_path
+    if not agents_md_path.exists():
+        print(f"Error: {agents_md_path} not found", file=sys.stderr)
+        sys.exit(1)

-    knowledge_sources_section = build_knowledge_sources_section(files_path)
+    template = agents_md_path.read_text()

-    # Replace placeholders
-    content = template
-    content = content.replace(
+    # Resolve symlinks (handles both direct symlinks and dirs containing symlinks)
+    resolved_files_path = files_path.resolve()
+
+    knowledge_sources_section = build_knowledge_sources_section(resolved_files_path)
+
+    # Replace placeholder and write back
+    content = template.replace(
        "{{KNOWLEDGE_SOURCES_SECTION}}", knowledge_sources_section
    )
-
-    # Write AGENTS.md
-    output_path = Path("/workspace/AGENTS.md")
-    output_path.write_text(content)
-
-    # Log result
-    source_count = 0
-    if files_path.exists():
-        source_count = len(
-            [
-                d
-                for d in files_path.iterdir()
-                if d.is_dir() and not d.name.startswith(".")
-            ]
-        )
-    print(
-        f"Generated AGENTS.md with {source_count} knowledge sources from {files_path}"
-    )
+    agents_md_path.write_text(content)
+    print(f"Populated knowledge sources in {agents_md_path}")


 if __name__ == "__main__":
--- a/backend/onyx/server/features/build/sandbox/kubernetes/kubernetes_sandbox_manager.py
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/kubernetes_sandbox_manager.py
@@ -1352,6 +1352,9 @@ fi
 echo "Writing AGENTS.md"
 printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md

+# Populate knowledge sources by scanning the files directory
+python3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true
+
 # Write opencode config
 echo "Writing opencode.json"
 printf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json
@@ -1780,6 +1783,9 @@ ln -sf {symlink_target} {session_path}/files
 echo "Writing AGENTS.md"
 printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md

+# Populate knowledge sources by scanning the files directory
+python3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files || true
+
 # Write opencode config
 echo "Writing opencode.json"
 printf '%s' '{opencode_json_escaped}' > {session_path}/opencode.json
--- a/backend/onyx/server/features/persona/api.py
+++ b/backend/onyx/server/features/persona/api.py
@@ -405,6 +405,7 @@ class PersonaShareRequest(BaseModel):
    user_ids: list[UUID] | None = None
    group_ids: list[int] | None = None
    is_public: bool | None = None
+    label_ids: list[int] | None = None


 # We notify each user when a user is shared with them
@@ -415,14 +416,22 @@ def share_persona(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
 ) -> None:
-    update_persona_shared(
-        persona_id=persona_id,
-        user=user,
-        db_session=db_session,
-        user_ids=persona_share_request.user_ids,
-        group_ids=persona_share_request.group_ids,
-        is_public=persona_share_request.is_public,
-    )
+    try:
+        update_persona_shared(
+            persona_id=persona_id,
+            user=user,
+            db_session=db_session,
+            user_ids=persona_share_request.user_ids,
+            group_ids=persona_share_request.group_ids,
+            is_public=persona_share_request.is_public,
+            label_ids=persona_share_request.label_ids,
+        )
+    except PermissionError as e:
+        logger.exception("Failed to share persona")
+        raise HTTPException(status_code=403, detail=str(e))
+    except ValueError as e:
+        logger.exception("Failed to share persona")
+        raise HTTPException(status_code=400, detail=str(e))


@basic_router.delete("/{persona_id}", tags=PUBLIC_API_TAGS)
--- a/backend/onyx/server/manage/code_interpreter/api.py
+++ b/backend/onyx/server/manage/code_interpreter/api.py
@@ -3,11 +3,12 @@ from fastapi import Depends
 from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
+from onyx.db.code_interpreter import fetch_code_interpreter_server
 from onyx.db.code_interpreter import update_code_interpreter_server_enabled
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
+from onyx.server.manage.code_interpreter.models import CodeInterpreterServer
 from onyx.server.manage.code_interpreter.models import CodeInterpreterServerHealth
-from onyx.server.manage.code_interpreter.models import CodeInterpreterServerUpdate
 from onyx.tools.tool_implementations.python.code_interpreter_client import (
    CodeInterpreterClient,
 )
@@ -26,9 +27,17 @@ def get_code_interpreter_health(
        return CodeInterpreterServerHealth(healthy=False)


+@admin_router.get("")
+def get_code_interpreter(
+    _: User = Depends(current_admin_user), db_session: Session = Depends(get_session)
+) -> CodeInterpreterServer:
+    ci_server = fetch_code_interpreter_server(db_session)
+    return CodeInterpreterServer(enabled=ci_server.server_enabled)
+
+
@admin_router.put("")
 def update_code_interpreter(
-    update: CodeInterpreterServerUpdate,
+    update: CodeInterpreterServer,
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> None:
--- a/backend/onyx/server/manage/code_interpreter/models.py
+++ b/backend/onyx/server/manage/code_interpreter/models.py
@@ -1,7 +1,7 @@
 from pydantic import BaseModel


-class CodeInterpreterServerUpdate(BaseModel):
+class CodeInterpreterServer(BaseModel):
    enabled: bool


--- a/backend/onyx/server/manage/llm/models.py
+++ b/backend/onyx/server/manage/llm/models.py
@@ -105,7 +105,9 @@ class LLMProviderDescriptor(BaseModel):
        is_default_provider = bool(default_model_name)
        is_default_vision_provider = default_vision_model is not None

-        default_model_name = default_model_name or llm_provider_model.default_model_name
+        default_model_name = (
+            default_model_name or llm_provider_model.default_model_name or ""
+        )

        return cls(
            name=llm_provider_model.name,
@@ -184,7 +186,9 @@ class LLMProviderView(LLMProvider):
        is_default_provider = bool(default_model_name)
        is_default_vision_provider = default_vision_model is not None

-        default_model_name = default_model_name or llm_provider_model.default_model_name
+        default_model_name = (
+            default_model_name or llm_provider_model.default_model_name or ""
+        )

        return cls(
            id=llm_provider_model.id,
--- a/backend/onyx/server/manage/models.py
+++ b/backend/onyx/server/manage/models.py
@@ -35,6 +35,18 @@ if TYPE_CHECKING:
    pass


+class EmailInviteStatus(str, Enum):
+    SENT = "SENT"
+    NOT_CONFIGURED = "NOT_CONFIGURED"
+    SEND_FAILED = "SEND_FAILED"
+    DISABLED = "DISABLED"
+
+
+class BulkInviteResponse(BaseModel):
+    invited_count: int
+    email_invite_status: EmailInviteStatus
+
+
 class VersionResponse(BaseModel):
    backend_version: str

--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -36,6 +36,7 @@ from onyx.configs.app_configs import AUTH_BACKEND
 from onyx.configs.app_configs import AUTH_TYPE
 from onyx.configs.app_configs import AuthBackend
 from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.app_configs import EMAIL_CONFIGURED
 from onyx.configs.app_configs import ENABLE_EMAIL_INVITES
 from onyx.configs.app_configs import NUM_FREE_TRIAL_USER_INVITES
 from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
@@ -78,8 +79,10 @@ from onyx.server.documents.models import PaginatedReturn
 from onyx.server.features.projects.models import UserFileSnapshot
 from onyx.server.manage.models import AllUsersResponse
 from onyx.server.manage.models import AutoScrollRequest
+from onyx.server.manage.models import BulkInviteResponse
 from onyx.server.manage.models import ChatBackgroundRequest
 from onyx.server.manage.models import DefaultAppModeRequest
+from onyx.server.manage.models import EmailInviteStatus
 from onyx.server.manage.models import MemoryItem
 from onyx.server.manage.models import PersonalizationUpdateRequest
 from onyx.server.manage.models import TenantInfo
@@ -368,7 +371,7 @@ def bulk_invite_users(
    emails: list[str] = Body(..., embed=True),
    current_user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
-) -> int:
+) -> BulkInviteResponse:
    """emails are string validated. If any email fails validation, no emails are
    invited and an exception is raised."""
    tenant_id = get_current_tenant_id()
@@ -427,34 +430,41 @@ def bulk_invite_users(
    number_of_invited_users = write_invited_users(all_emails)

    # send out email invitations only to new users (not already invited or existing)
-    if ENABLE_EMAIL_INVITES:
+    if not ENABLE_EMAIL_INVITES:
+        email_invite_status = EmailInviteStatus.DISABLED
+    elif not EMAIL_CONFIGURED:
+        email_invite_status = EmailInviteStatus.NOT_CONFIGURED
+    else:
        try:
            for email in emails_needing_seats:
                send_user_email_invite(email, current_user, AUTH_TYPE)
+            email_invite_status = EmailInviteStatus.SENT
        except Exception as e:
            logger.error(f"Error sending email invite to invited users: {e}")
+            email_invite_status = EmailInviteStatus.SEND_FAILED

-    if not MULTI_TENANT or DEV_MODE:
-        return number_of_invited_users
+    if MULTI_TENANT and not DEV_MODE:
+        # for billing purposes, write to the control plane about the number of new users
+        try:
+            logger.info("Registering tenant users")
+            fetch_ee_implementation_or_noop(
+                "onyx.server.tenants.billing", "register_tenant_users", None
+            )(tenant_id, get_live_users_count(db_session))
+        except Exception as e:
+            logger.error(f"Failed to register tenant users: {str(e)}")
+            logger.info(
+                "Reverting changes: removing users from tenant and resetting invited users"
+            )
+            write_invited_users(initial_invited_users)  # Reset to original state
+            fetch_ee_implementation_or_noop(
+                "onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
+            )(new_invited_emails, tenant_id)
+            raise e

-    # for billing purposes, write to the control plane about the number of new users
-    try:
-        logger.info("Registering tenant users")
-        fetch_ee_implementation_or_noop(
-            "onyx.server.tenants.billing", "register_tenant_users", None
-        )(tenant_id, get_live_users_count(db_session))
-
-        return number_of_invited_users
-    except Exception as e:
-        logger.error(f"Failed to register tenant users: {str(e)}")
-        logger.info(
-            "Reverting changes: removing users from tenant and resetting invited users"
-        )
-        write_invited_users(initial_invited_users)  # Reset to original state
-        fetch_ee_implementation_or_noop(
-            "onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
-        )(new_invited_emails, tenant_id)
-        raise e
+    return BulkInviteResponse(
+        invited_count=number_of_invited_users,
+        email_invite_status=email_invite_status,
+    )


@router.patch("/manage/admin/remove-invited-user", tags=PUBLIC_API_TAGS)
--- a/backend/onyx/server/metrics/per_tenant.py
+++ b/backend/onyx/server/metrics/per_tenant.py
@@ -0,0 +1,27 @@
+"""Per-tenant request counter metric.
+
+Increments a counter on every request, labelled by tenant, so Grafana can
+answer "which tenant is generating the most traffic?"
+"""
+
+from prometheus_client import Counter
+from prometheus_fastapi_instrumentator.metrics import Info
+
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+
+_requests_by_tenant = Counter(
+    "onyx_api_requests_by_tenant_total",
+    "Total API requests by tenant",
+    ["tenant_id", "method", "handler", "status"],
+)
+
+
+def per_tenant_request_callback(info: Info) -> None:
+    """Increment per-tenant request counter for every request."""
+    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
+    _requests_by_tenant.labels(
+        tenant_id=tenant_id,
+        method=info.method,
+        handler=info.modified_handler,
+        status=info.modified_status,
+    ).inc()
--- a/backend/onyx/server/metrics/postgres_connection_pool.py
+++ b/backend/onyx/server/metrics/postgres_connection_pool.py
@@ -32,6 +32,7 @@ from sqlalchemy.pool import QueuePool

 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()

@@ -72,7 +73,7 @@ _checkout_timeout_total = Counter(
 _connections_held = Gauge(
    "onyx_db_connections_held_by_endpoint",
    "Number of DB connections currently held, by endpoint and engine",
-    ["handler", "engine"],
+    ["handler", "engine", "tenant_id"],
 )

 _hold_seconds = Histogram(
@@ -163,10 +164,14 @@ def _register_pool_events(engine: Engine, label: str) -> None:
        conn_proxy: PoolProxiedConnection,  # noqa: ARG001
    ) -> None:
        handler = CURRENT_ENDPOINT_CONTEXTVAR.get() or "unknown"
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
        conn_record.info["_metrics_endpoint"] = handler
+        conn_record.info["_metrics_tenant_id"] = tenant_id
        conn_record.info["_metrics_checkout_time"] = time.monotonic()
        _checkout_total.labels(engine=label).inc()
-        _connections_held.labels(handler=handler, engine=label).inc()
+        _connections_held.labels(
+            handler=handler, engine=label, tenant_id=tenant_id
+        ).inc()

    @event.listens_for(engine, "checkin")
    def on_checkin(
@@ -174,9 +179,12 @@ def _register_pool_events(engine: Engine, label: str) -> None:
        conn_record: ConnectionPoolEntry,
    ) -> None:
        handler = conn_record.info.pop("_metrics_endpoint", "unknown")
+        tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
        start = conn_record.info.pop("_metrics_checkout_time", None)
        _checkin_total.labels(engine=label).inc()
-        _connections_held.labels(handler=handler, engine=label).dec()
+        _connections_held.labels(
+            handler=handler, engine=label, tenant_id=tenant_id
+        ).dec()
        if start is not None:
            _hold_seconds.labels(handler=handler, engine=label).observe(
                time.monotonic() - start
@@ -199,9 +207,12 @@ def _register_pool_events(engine: Engine, label: str) -> None:
        # Defensively clean up the held-connections gauge in case checkin
        # doesn't fire after invalidation (e.g. hard pool shutdown).
        handler = conn_record.info.pop("_metrics_endpoint", None)
+        tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
        start = conn_record.info.pop("_metrics_checkout_time", None)
        if handler:
-            _connections_held.labels(handler=handler, engine=label).dec()
+            _connections_held.labels(
+                handler=handler, engine=label, tenant_id=tenant_id
+            ).dec()
        if start is not None:
            _hold_seconds.labels(handler=handler or "unknown", engine=label).observe(
                time.monotonic() - start
--- a/backend/onyx/server/metrics/prometheus_setup.py
+++ b/backend/onyx/server/metrics/prometheus_setup.py
@@ -11,9 +11,11 @@ SQLAlchemy connection pool metrics are registered separately via
 """

 from prometheus_fastapi_instrumentator import Instrumentator
+from prometheus_fastapi_instrumentator.metrics import default as default_metrics
 from sqlalchemy.exc import TimeoutError as SATimeoutError
 from starlette.applications import Starlette

+from onyx.server.metrics.per_tenant import per_tenant_request_callback
 from onyx.server.metrics.postgres_connection_pool import pool_timeout_handler
 from onyx.server.metrics.slow_requests import slow_request_callback

@@ -59,6 +61,15 @@ def setup_prometheus_metrics(app: Starlette) -> None:
        excluded_handlers=_EXCLUDED_HANDLERS,
    )

+    # Explicitly create the default metrics (http_requests_total,
+    # http_request_duration_seconds, etc.) and add them first.  The library
+    # skips creating defaults when ANY custom instrumentations are registered
+    # via .add(), so we must include them ourselves.
+    default_callback = default_metrics(latency_lowr_buckets=_LATENCY_BUCKETS)
+    if default_callback:
+        instrumentator.add(default_callback)
+
    instrumentator.add(slow_request_callback)
+    instrumentator.add(per_tenant_request_callback)

    instrumentator.instrument(app, latency_lowr_buckets=_LATENCY_BUCKETS).expose(app)
--- a/backend/onyx/setup.py
+++ b/backend/onyx/setup.py
@@ -4,6 +4,7 @@ from sqlalchemy.orm import Session

 from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.configs.app_configs import DISABLE_VECTOR_DB
+from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
 from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
@@ -32,6 +33,9 @@ from onyx.db.search_settings import update_current_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
 from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.interfaces import DocumentIndex
+from onyx.document_index.opensearch.client import OpenSearchClient
+from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
+from onyx.document_index.opensearch.opensearch_document_index import set_cluster_state
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.indexing.models import IndexingSetting
 from onyx.key_value_store.factory import get_kv_store
@@ -311,7 +315,14 @@ def setup_multitenant_onyx() -> None:
        logger.notice("DISABLE_VECTOR_DB is set — skipping multitenant Vespa setup.")
        return

+    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
+        opensearch_client = OpenSearchClient()
+        if not wait_for_opensearch_with_timeout(client=opensearch_client):
+            raise RuntimeError("Failed to connect to OpenSearch.")
+        set_cluster_state(opensearch_client)
+
    # For Managed Vespa, the schema is sent over via the Vespa Console manually.
+    # NOTE: Pretty sure this code is never hit in any production environment.
    if not MANAGED_VESPA:
        setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS)

--- a/backend/onyx/tools/fake_tools/research_agent.py
+++ b/backend/onyx/tools/fake_tools/research_agent.py
@@ -120,7 +120,7 @@ def generate_intermediate_report(
            custom_agent_prompt=None,
            simple_chat_history=history,
            reminder_message=reminder_message,
-            project_files=None,
+            context_files=None,
            available_tokens=llm.config.max_input_tokens,
        )

@@ -325,7 +325,7 @@ def run_research_agent_call(
                    custom_agent_prompt=None,
                    simple_chat_history=msg_history,
                    reminder_message=reminder_message,
-                    project_files=None,
+                    context_files=None,
                    available_tokens=llm.config.max_input_tokens,
                )

--- a/backend/onyx/tools/tool_constructor.py
+++ b/backend/onyx/tools/tool_constructor.py
@@ -54,6 +54,7 @@ logger = setup_logger()
 class SearchToolConfig(BaseModel):
    user_selected_filters: BaseFilters | None = None
    project_id: int | None = None
+    persona_id: int | None = None
    bypass_acl: bool = False
    additional_context: str | None = None
    slack_context: SlackContext | None = None
@@ -180,6 +181,7 @@ def construct_tools(
                    document_index=document_index,
                    user_selected_filters=search_tool_config.user_selected_filters,
                    project_id=search_tool_config.project_id,
+                    persona_id=search_tool_config.persona_id,
                    bypass_acl=search_tool_config.bypass_acl,
                    slack_context=search_tool_config.slack_context,
                    enable_slack_search=search_tool_config.enable_slack_search,
@@ -427,6 +429,7 @@ def construct_tools(
            document_index=document_index,
            user_selected_filters=search_tool_config.user_selected_filters,
            project_id=search_tool_config.project_id,
+            persona_id=search_tool_config.persona_id,
            bypass_acl=search_tool_config.bypass_acl,
            slack_context=search_tool_config.slack_context,
            enable_slack_search=search_tool_config.enable_slack_search,
--- a/backend/onyx/tools/tool_implementations/python/python_tool.py
+++ b/backend/onyx/tools/tool_implementations/python/python_tool.py
@@ -12,6 +12,7 @@ from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL
 from onyx.configs.app_configs import CODE_INTERPRETER_DEFAULT_TIMEOUT_MS
 from onyx.configs.app_configs import CODE_INTERPRETER_MAX_OUTPUT_LENGTH
 from onyx.configs.constants import FileOrigin
+from onyx.db.code_interpreter import fetch_code_interpreter_server
 from onyx.file_store.utils import build_full_frontend_file_url
 from onyx.file_store.utils import get_default_file_store
 from onyx.server.query_and_chat.placement import Placement
@@ -103,8 +104,10 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
    @override
    @classmethod
    def is_available(cls, db_session: Session) -> bool:
-        is_available = bool(CODE_INTERPRETER_BASE_URL)
-        return is_available
+        if not CODE_INTERPRETER_BASE_URL:
+            return False
+        server = fetch_code_interpreter_server(db_session)
+        return server.server_enabled

    def tool_definition(self) -> dict:
        return {
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -247,6 +247,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        user_selected_filters: BaseFilters | None,
        # If the chat is part of a project
        project_id: int | None,
+        # If set, search scopes to files attached to this persona
+        persona_id: int | None = None,
        bypass_acl: bool = False,
        # Slack context for federated Slack search (tokens fetched internally)
        slack_context: SlackContext | None = None,
@@ -261,6 +263,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
        self.document_index = document_index
        self.user_selected_filters = user_selected_filters
        self.project_id = project_id
+        self.persona_id = persona_id
        self.bypass_acl = bypass_acl
        self.slack_context = slack_context
        self.enable_slack_search = enable_slack_search
@@ -456,6 +459,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
                limit=num_hits,
            ),
            project_id=self.project_id,
+            persona_id=self.persona_id,
            document_index=self.document_index,
            user=self.user,
            persona=self.persona,
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -809,7 +809,7 @@ pypandoc-binary==1.16.2
    # via onyx
 pyparsing==3.2.5
    # via httplib2
-pypdf==6.6.2
+pypdf==6.7.3
    # via
    #   onyx
    #   unstructured-client
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -317,7 +317,7 @@ oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
-onyx-devtools==0.6.0
+onyx-devtools==0.6.2
    # via onyx
 openai==2.14.0
    # via
--- a/backend/scripts/query_time_check/seed_dummy_docs.py
+++ b/backend/scripts/query_time_check/seed_dummy_docs.py
@@ -95,6 +95,7 @@ def generate_dummy_chunk(
    return DocMetadataAwareIndexChunk.from_index_chunk(
        index_chunk=chunk,
        user_project=[],
+        personas=[],
        access=DocumentAccess.build(
            user_emails=user_emails,
            user_groups=user_groups,
--- a/backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py
+++ b/backend/tests/daily/connectors/sharepoint/test_sharepoint_connector.py
@@ -12,6 +12,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.models import Document
 from onyx.connectors.models import HierarchyNode
 from onyx.connectors.models import ImageSection
+from onyx.connectors.sharepoint.connector import SharepointAuthMethod
 from onyx.connectors.sharepoint.connector import SharepointConnector
 from onyx.db.enums import HierarchyNodeType
 from tests.daily.connectors.utils import load_all_from_connector
@@ -521,3 +522,46 @@ def test_sharepoint_connector_hierarchy_nodes(
                f"Document {doc.semantic_identifier} should have "
                "parent_hierarchy_raw_node_id set"
            )
+
+
+@pytest.fixture
+def sharepoint_cert_credentials() -> dict[str, str]:
+    return {
+        "authentication_method": SharepointAuthMethod.CERTIFICATE.value,
+        "sp_client_id": os.environ["PERM_SYNC_SHAREPOINT_CLIENT_ID"],
+        "sp_private_key": os.environ["PERM_SYNC_SHAREPOINT_PRIVATE_KEY"],
+        "sp_certificate_password": os.environ[
+            "PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD"
+        ],
+        "sp_directory_id": os.environ["PERM_SYNC_SHAREPOINT_DIRECTORY_ID"],
+    }
+
+
+def test_resolve_tenant_domain_from_site_urls(
+    sharepoint_cert_credentials: dict[str, str],
+) -> None:
+    """Verify that certificate auth resolves the tenant domain from site URLs
+    without calling the /organization endpoint."""
+    site_url = os.environ["SHAREPOINT_SITE"]
+    connector = SharepointConnector(sites=[site_url])
+    connector.load_credentials(sharepoint_cert_credentials)
+
+    assert connector.sp_tenant_domain is not None
+    assert len(connector.sp_tenant_domain) > 0
+    # The tenant domain should match the first label of the site URL hostname
+    from urllib.parse import urlsplit
+
+    expected = urlsplit(site_url).hostname.split(".")[0]  # type: ignore
+    assert connector.sp_tenant_domain == expected
+
+
+def test_resolve_tenant_domain_from_root_site(
+    sharepoint_cert_credentials: dict[str, str],
+) -> None:
+    """Verify that certificate auth resolves the tenant domain via the root
+    site endpoint when no site URLs are configured."""
+    connector = SharepointConnector(sites=[])
+    connector.load_credentials(sharepoint_cert_credentials)
+
+    assert connector.sp_tenant_domain is not None
+    assert len(connector.sp_tenant_domain) > 0
--- a/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
+++ b/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
@@ -0,0 +1,544 @@
+"""
+External dependency unit tests for persona file sync.
+
+Validates that:
+
+1. The check_for_user_file_project_sync beat task picks up UserFiles with
+   needs_persona_sync=True (not just needs_project_sync).
+
+2. The process_single_user_file_project_sync worker task reads persona
+   associations from the DB, passes persona_ids to the document index via
+   VespaDocumentUserFields, and clears needs_persona_sync afterwards.
+
+3. upsert_persona correctly marks affected UserFiles with
+   needs_persona_sync=True when file associations change.
+
+Uses real Redis and PostgreSQL.  Document index (Vespa) calls are mocked
+since we only need to verify the arguments passed to update_single.
+"""
+
+from collections.abc import Generator
+from contextlib import contextmanager
+from typing import Any
+from unittest.mock import MagicMock
+from unittest.mock import patch
+from unittest.mock import PropertyMock
+from uuid import uuid4
+
+from sqlalchemy.orm import Session
+
+from onyx.background.celery.tasks.user_file_processing.tasks import (
+    check_for_user_file_project_sync,
+)
+from onyx.background.celery.tasks.user_file_processing.tasks import (
+    process_single_user_file_project_sync,
+)
+from onyx.background.celery.tasks.user_file_processing.tasks import (
+    user_file_project_sync_lock_key,
+)
+from onyx.context.search.enums import RecencyBiasSetting
+from onyx.db.enums import UserFileStatus
+from onyx.db.models import Persona
+from onyx.db.models import Persona__UserFile
+from onyx.db.models import User
+from onyx.db.models import UserFile
+from onyx.db.persona import upsert_persona
+from onyx.document_index.interfaces import VespaDocumentUserFields
+from onyx.redis.redis_pool import get_redis_client
+from tests.external_dependency_unit.conftest import create_test_user
+from tests.external_dependency_unit.constants import TEST_TENANT_ID
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _create_completed_user_file(
+    db_session: Session,
+    user: User,
+    needs_persona_sync: bool = False,
+    needs_project_sync: bool = False,
+) -> UserFile:
+    """Insert a UserFile in COMPLETED status."""
+    uf = UserFile(
+        id=uuid4(),
+        user_id=user.id,
+        file_id=f"test_file_{uuid4().hex[:8]}",
+        name=f"test_{uuid4().hex[:8]}.txt",
+        file_type="text/plain",
+        status=UserFileStatus.COMPLETED,
+        needs_persona_sync=needs_persona_sync,
+        needs_project_sync=needs_project_sync,
+        chunk_count=5,
+    )
+    db_session.add(uf)
+    db_session.commit()
+    db_session.refresh(uf)
+    return uf
+
+
+def _create_test_persona(
+    db_session: Session,
+    user: User,
+    user_files: list[UserFile] | None = None,
+) -> Persona:
+    """Create a minimal Persona via direct model insert."""
+    persona = Persona(
+        name=f"Test Persona {uuid4().hex[:8]}",
+        description="Test persona",
+        num_chunks=10.0,
+        chunks_above=0,
+        chunks_below=0,
+        llm_relevance_filter=False,
+        llm_filter_extraction=False,
+        recency_bias=RecencyBiasSetting.NO_DECAY,
+        system_prompt="You are a test assistant",
+        task_prompt="Answer the question",
+        tools=[],
+        document_sets=[],
+        users=[user],
+        groups=[],
+        is_visible=True,
+        is_public=True,
+        display_priority=None,
+        starter_messages=None,
+        deleted=False,
+        user_files=user_files or [],
+        user_id=user.id,
+    )
+    db_session.add(persona)
+    db_session.commit()
+    db_session.refresh(persona)
+    return persona
+
+
+def _link_file_to_persona(
+    db_session: Session, persona: Persona, user_file: UserFile
+) -> None:
+    """Create the join table row between a persona and a user file."""
+    link = Persona__UserFile(persona_id=persona.id, user_file_id=user_file.id)
+    db_session.add(link)
+    db_session.commit()
+
+
+_PATCH_QUEUE_DEPTH = (
+    "onyx.background.celery.tasks.user_file_processing.tasks"
+    ".get_user_file_project_sync_queue_depth"
+)
+
+
+@contextmanager
+def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
+    """Patch the ``app`` property on a bound Celery task."""
+    task_instance = task.run.__self__
+    with (
+        patch.object(
+            type(task_instance),
+            "app",
+            new_callable=PropertyMock,
+            return_value=mock_app,
+        ),
+        patch(_PATCH_QUEUE_DEPTH, return_value=0),
+    ):
+        yield
+
+
+# ---------------------------------------------------------------------------
+# Test: check_for_user_file_project_sync picks up persona sync
+# ---------------------------------------------------------------------------
+
+
+class TestCheckSweepIncludesPersonaSync:
+    """The beat task must pick up files needing persona sync, not just project sync."""
+
+    def test_persona_sync_flag_enqueues_task(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A file with needs_persona_sync=True (and COMPLETED) gets enqueued."""
+        user = create_test_user(db_session, "persona_sweep")
+        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
+
+        mock_app = MagicMock()
+
+        with _patch_task_app(check_for_user_file_project_sync, mock_app):
+            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
+
+        enqueued_ids = {
+            call.kwargs["kwargs"]["user_file_id"]
+            for call in mock_app.send_task.call_args_list
+        }
+        assert str(uf.id) in enqueued_ids
+
+    def test_neither_flag_does_not_enqueue(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A file with both flags False is not enqueued."""
+        user = create_test_user(db_session, "no_sync")
+        uf = _create_completed_user_file(db_session, user)
+
+        mock_app = MagicMock()
+
+        with _patch_task_app(check_for_user_file_project_sync, mock_app):
+            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
+
+        enqueued_ids = {
+            call.kwargs["kwargs"]["user_file_id"]
+            for call in mock_app.send_task.call_args_list
+        }
+        assert str(uf.id) not in enqueued_ids
+
+    def test_both_flags_enqueues_once(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A file with BOTH flags True is enqueued exactly once."""
+        user = create_test_user(db_session, "both_flags")
+        uf = _create_completed_user_file(
+            db_session, user, needs_persona_sync=True, needs_project_sync=True
+        )
+
+        mock_app = MagicMock()
+
+        with _patch_task_app(check_for_user_file_project_sync, mock_app):
+            check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
+
+        matching_calls = [
+            call
+            for call in mock_app.send_task.call_args_list
+            if call.kwargs["kwargs"]["user_file_id"] == str(uf.id)
+        ]
+        assert len(matching_calls) == 1
+
+
+# ---------------------------------------------------------------------------
+# Test: process_single_user_file_project_sync passes persona_ids to index
+# ---------------------------------------------------------------------------
+
+_PATCH_GET_SETTINGS = (
+    "onyx.background.celery.tasks.user_file_processing.tasks.get_active_search_settings"
+)
+_PATCH_GET_INDICES = (
+    "onyx.background.celery.tasks.user_file_processing.tasks.get_all_document_indices"
+)
+_PATCH_HTTPX_INIT = (
+    "onyx.background.celery.tasks.user_file_processing.tasks.httpx_init_vespa_pool"
+)
+_PATCH_DISABLE_VDB = (
+    "onyx.background.celery.tasks.user_file_processing.tasks.DISABLE_VECTOR_DB"
+)
+
+
+class TestSyncTaskWritesPersonaIds:
+    """The sync task reads persona associations and sends them to the index."""
+
+    def test_passes_persona_ids_to_update_single(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """After linking a file to a persona, sync sends the persona ID."""
+        user = create_test_user(db_session, "sync_persona")
+        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
+        persona = _create_test_persona(db_session, user)
+        _link_file_to_persona(db_session, persona, uf)
+
+        mock_doc_index = MagicMock()
+        mock_search_settings = MagicMock()
+        mock_search_settings.primary = MagicMock()
+        mock_search_settings.secondary = None
+
+        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
+        lock_key = user_file_project_sync_lock_key(str(uf.id))
+        redis_client.delete(lock_key)
+
+        with (
+            patch(_PATCH_DISABLE_VDB, False),
+            patch(_PATCH_HTTPX_INIT),
+            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
+            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
+        ):
+            process_single_user_file_project_sync.run(
+                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
+            )
+
+        mock_doc_index.update_single.assert_called_once()
+        call_args = mock_doc_index.update_single.call_args
+        user_fields: VespaDocumentUserFields = call_args.kwargs["user_fields"]
+        assert user_fields.personas is not None
+        assert persona.id in user_fields.personas
+        assert call_args.args[0] == str(uf.id)
+
+    def test_clears_persona_sync_flag(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """After a successful sync the needs_persona_sync flag is cleared."""
+        user = create_test_user(db_session, "sync_clear")
+        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
+
+        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
+        lock_key = user_file_project_sync_lock_key(str(uf.id))
+        redis_client.delete(lock_key)
+
+        with patch(_PATCH_DISABLE_VDB, True):
+            process_single_user_file_project_sync.run(
+                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
+            )
+
+        db_session.refresh(uf)
+        assert uf.needs_persona_sync is False
+
+    def test_passes_both_project_and_persona_ids(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A file linked to both a project and a persona gets both IDs."""
+        from onyx.db.models import Project__UserFile
+        from onyx.db.models import UserProject
+
+        user = create_test_user(db_session, "sync_both")
+        uf = _create_completed_user_file(
+            db_session, user, needs_persona_sync=True, needs_project_sync=True
+        )
+        persona = _create_test_persona(db_session, user)
+        _link_file_to_persona(db_session, persona, uf)
+
+        project = UserProject(user_id=user.id, name="test-project", instructions="")
+        db_session.add(project)
+        db_session.commit()
+        db_session.refresh(project)
+
+        link = Project__UserFile(project_id=project.id, user_file_id=uf.id)
+        db_session.add(link)
+        db_session.commit()
+
+        mock_doc_index = MagicMock()
+        mock_search_settings = MagicMock()
+        mock_search_settings.primary = MagicMock()
+        mock_search_settings.secondary = None
+
+        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
+        lock_key = user_file_project_sync_lock_key(str(uf.id))
+        redis_client.delete(lock_key)
+
+        with (
+            patch(_PATCH_DISABLE_VDB, False),
+            patch(_PATCH_HTTPX_INIT),
+            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
+            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
+        ):
+            process_single_user_file_project_sync.run(
+                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
+            )
+
+        call_kwargs = mock_doc_index.update_single.call_args.kwargs
+        user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
+        assert user_fields.personas is not None
+        assert user_fields.user_projects is not None
+        assert persona.id in user_fields.personas
+        assert project.id in user_fields.user_projects
+
+        # Both flags should be cleared
+        db_session.refresh(uf)
+        assert uf.needs_persona_sync is False
+        assert uf.needs_project_sync is False
+
+    def test_deleted_persona_excluded_from_ids(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A soft-deleted persona should NOT appear in the persona_ids sent to Vespa."""
+        user = create_test_user(db_session, "sync_deleted")
+        uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
+        persona = _create_test_persona(db_session, user)
+        _link_file_to_persona(db_session, persona, uf)
+
+        persona.deleted = True
+        db_session.commit()
+
+        mock_doc_index = MagicMock()
+        mock_search_settings = MagicMock()
+        mock_search_settings.primary = MagicMock()
+        mock_search_settings.secondary = None
+
+        redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
+        lock_key = user_file_project_sync_lock_key(str(uf.id))
+        redis_client.delete(lock_key)
+
+        with (
+            patch(_PATCH_DISABLE_VDB, False),
+            patch(_PATCH_HTTPX_INIT),
+            patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
+            patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
+        ):
+            process_single_user_file_project_sync.run(
+                user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
+            )
+
+        call_kwargs = mock_doc_index.update_single.call_args.kwargs
+        user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
+        assert user_fields.personas is not None
+        assert persona.id not in user_fields.personas
+
+
+# ---------------------------------------------------------------------------
+# Test: upsert_persona marks files for persona sync
+# ---------------------------------------------------------------------------
+
+
+class TestUpsertPersonaMarksSyncFlag:
+    """upsert_persona must set needs_persona_sync on affected UserFiles."""
+
+    def test_creating_persona_with_files_marks_sync(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        user = create_test_user(db_session, "upsert_create")
+        uf = _create_completed_user_file(db_session, user)
+        assert uf.needs_persona_sync is False
+
+        upsert_persona(
+            user=user,
+            name=f"persona-{uuid4().hex[:8]}",
+            description="test",
+            num_chunks=10.0,
+            llm_relevance_filter=False,
+            llm_filter_extraction=False,
+            recency_bias=RecencyBiasSetting.NO_DECAY,
+            llm_model_provider_override=None,
+            llm_model_version_override=None,
+            starter_messages=None,
+            system_prompt="test",
+            task_prompt="test",
+            datetime_aware=None,
+            is_public=True,
+            db_session=db_session,
+            user_file_ids=[uf.id],
+        )
+
+        db_session.refresh(uf)
+        assert uf.needs_persona_sync is True
+
+    def test_updating_persona_files_marks_both_old_and_new(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """When file associations change, both the removed and added files are flagged."""
+        user = create_test_user(db_session, "upsert_update")
+        uf_old = _create_completed_user_file(db_session, user)
+        uf_new = _create_completed_user_file(db_session, user)
+
+        persona = upsert_persona(
+            user=user,
+            name=f"persona-{uuid4().hex[:8]}",
+            description="test",
+            num_chunks=10.0,
+            llm_relevance_filter=False,
+            llm_filter_extraction=False,
+            recency_bias=RecencyBiasSetting.NO_DECAY,
+            llm_model_provider_override=None,
+            llm_model_version_override=None,
+            starter_messages=None,
+            system_prompt="test",
+            task_prompt="test",
+            datetime_aware=None,
+            is_public=True,
+            db_session=db_session,
+            user_file_ids=[uf_old.id],
+        )
+
+        # Clear the flag from creation so we can observe the update
+        uf_old.needs_persona_sync = False
+        db_session.commit()
+
+        assert persona.num_chunks is not None
+        # Now update the persona to swap files
+        upsert_persona(
+            user=user,
+            name=persona.name,
+            description=persona.description,
+            num_chunks=persona.num_chunks,
+            llm_relevance_filter=persona.llm_relevance_filter,
+            llm_filter_extraction=persona.llm_filter_extraction,
+            recency_bias=persona.recency_bias,
+            llm_model_provider_override=None,
+            llm_model_version_override=None,
+            starter_messages=None,
+            system_prompt=persona.system_prompt,
+            task_prompt=persona.task_prompt,
+            datetime_aware=None,
+            is_public=persona.is_public,
+            db_session=db_session,
+            persona_id=persona.id,
+            user_file_ids=[uf_new.id],
+        )
+
+        db_session.refresh(uf_old)
+        db_session.refresh(uf_new)
+        assert uf_old.needs_persona_sync is True, "Removed file should be flagged"
+        assert uf_new.needs_persona_sync is True, "Added file should be flagged"
+
+    def test_removing_all_files_marks_old_files(
+        self,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """Removing all files from a persona flags the previously associated files."""
+        user = create_test_user(db_session, "upsert_remove")
+        uf = _create_completed_user_file(db_session, user)
+
+        persona = upsert_persona(
+            user=user,
+            name=f"persona-{uuid4().hex[:8]}",
+            description="test",
+            num_chunks=10.0,
+            llm_relevance_filter=False,
+            llm_filter_extraction=False,
+            recency_bias=RecencyBiasSetting.NO_DECAY,
+            llm_model_provider_override=None,
+            llm_model_version_override=None,
+            starter_messages=None,
+            system_prompt="test",
+            task_prompt="test",
+            datetime_aware=None,
+            is_public=True,
+            db_session=db_session,
+            user_file_ids=[uf.id],
+        )
+
+        uf.needs_persona_sync = False
+        db_session.commit()
+
+        assert persona.num_chunks is not None
+        upsert_persona(
+            user=user,
+            name=persona.name,
+            description=persona.description,
+            num_chunks=persona.num_chunks,
+            llm_relevance_filter=persona.llm_relevance_filter,
+            llm_filter_extraction=persona.llm_filter_extraction,
+            recency_bias=persona.recency_bias,
+            llm_model_provider_override=None,
+            llm_model_version_override=None,
+            starter_messages=None,
+            system_prompt=persona.system_prompt,
+            task_prompt=persona.task_prompt,
+            datetime_aware=None,
+            is_public=persona.is_public,
+            db_session=db_session,
+            persona_id=persona.id,
+            user_file_ids=[],
+        )
+
+        db_session.refresh(uf)
+        assert uf.needs_persona_sync is True
--- a/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
@@ -0,0 +1,318 @@
+"""
+External dependency unit tests for UserFileIndexingAdapter metadata writing.
+
+Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
+objects with both `user_project` and `personas` fields populated correctly
+based on actual DB associations.
+
+Uses real PostgreSQL for UserFile/Persona/UserProject rows.
+Mocks the LLM tokenizer and file store since they are not relevant here.
+"""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+from uuid import uuid4
+
+from sqlalchemy.orm import Session
+
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.models import Document
+from onyx.connectors.models import TextSection
+from onyx.context.search.enums import RecencyBiasSetting
+from onyx.db.enums import UserFileStatus
+from onyx.db.models import Persona
+from onyx.db.models import Persona__UserFile
+from onyx.db.models import Project__UserFile
+from onyx.db.models import User
+from onyx.db.models import UserFile
+from onyx.db.models import UserProject
+from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
+from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
+from onyx.indexing.models import ChunkEmbedding
+from onyx.indexing.models import IndexChunk
+from tests.external_dependency_unit.conftest import create_test_user
+from tests.external_dependency_unit.constants import TEST_TENANT_ID
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _create_user_file(db_session: Session, user: User) -> UserFile:
+    uf = UserFile(
+        id=uuid4(),
+        user_id=user.id,
+        file_id=f"test_file_{uuid4().hex[:8]}",
+        name=f"test_{uuid4().hex[:8]}.txt",
+        file_type="text/plain",
+        status=UserFileStatus.COMPLETED,
+        chunk_count=1,
+    )
+    db_session.add(uf)
+    db_session.commit()
+    db_session.refresh(uf)
+    return uf
+
+
+def _create_persona(db_session: Session, user: User) -> Persona:
+    persona = Persona(
+        name=f"Test Persona {uuid4().hex[:8]}",
+        description="Test persona",
+        num_chunks=10.0,
+        chunks_above=0,
+        chunks_below=0,
+        llm_relevance_filter=False,
+        llm_filter_extraction=False,
+        recency_bias=RecencyBiasSetting.NO_DECAY,
+        system_prompt="test",
+        task_prompt="test",
+        tools=[],
+        document_sets=[],
+        users=[user],
+        groups=[],
+        is_visible=True,
+        is_public=True,
+        display_priority=None,
+        starter_messages=None,
+        deleted=False,
+        user_id=user.id,
+    )
+    db_session.add(persona)
+    db_session.commit()
+    db_session.refresh(persona)
+    return persona
+
+
+def _create_project(db_session: Session, user: User) -> UserProject:
+    project = UserProject(
+        user_id=user.id,
+        name=f"project-{uuid4().hex[:8]}",
+        instructions="",
+    )
+    db_session.add(project)
+    db_session.commit()
+    db_session.refresh(project)
+    return project
+
+
+def _make_index_chunk(user_file: UserFile) -> IndexChunk:
+    """Build a minimal IndexChunk whose source document ID matches the UserFile."""
+    doc = Document(
+        id=str(user_file.id),
+        source=DocumentSource.USER_FILE,
+        semantic_identifier=user_file.name,
+        sections=[TextSection(text="test chunk content", link=None)],
+        metadata={},
+    )
+    return IndexChunk(
+        source_document=doc,
+        chunk_id=0,
+        blurb="test chunk",
+        content="test chunk content",
+        source_links={0: ""},
+        image_file_id=None,
+        section_continuation=False,
+        title_prefix="",
+        metadata_suffix_semantic="",
+        metadata_suffix_keyword="",
+        contextual_rag_reserved_tokens=0,
+        doc_summary="",
+        chunk_context="",
+        mini_chunk_texts=None,
+        large_chunk_id=None,
+        embeddings=ChunkEmbedding(
+            full_embedding=[0.0] * 768,
+            mini_chunk_embeddings=[],
+        ),
+        title_embedding=None,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestAdapterWritesBothMetadataFields:
+    """build_metadata_aware_chunks must populate user_project AND personas."""
+
+    @patch(
+        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
+        side_effect=Exception("no LLM in test"),
+    )
+    def test_file_linked_to_persona_gets_persona_id(
+        self,
+        _mock_llm: MagicMock,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        user = create_test_user(db_session, "adapter_persona")
+        uf = _create_user_file(db_session, user)
+        persona = _create_persona(db_session, user)
+
+        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
+        db_session.commit()
+
+        adapter = UserFileIndexingAdapter(
+            tenant_id=TEST_TENANT_ID, db_session=db_session
+        )
+        chunk = _make_index_chunk(uf)
+        doc = chunk.source_document
+        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})
+
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
+            tenant_id=TEST_TENANT_ID,
+            context=context,
+        )
+
+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
+        assert persona.id in aware_chunk.personas
+        assert aware_chunk.user_project == []
+
+    @patch(
+        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
+        side_effect=Exception("no LLM in test"),
+    )
+    def test_file_linked_to_project_gets_project_id(
+        self,
+        _mock_llm: MagicMock,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        user = create_test_user(db_session, "adapter_project")
+        uf = _create_user_file(db_session, user)
+        project = _create_project(db_session, user)
+
+        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
+        db_session.commit()
+
+        adapter = UserFileIndexingAdapter(
+            tenant_id=TEST_TENANT_ID, db_session=db_session
+        )
+        chunk = _make_index_chunk(uf)
+        context = DocumentBatchPrepareContext(
+            updatable_docs=[chunk.source_document], id_to_boost_map={}
+        )
+
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
+            tenant_id=TEST_TENANT_ID,
+            context=context,
+        )
+
+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
+        assert project.id in aware_chunk.user_project
+        assert aware_chunk.personas == []
+
+    @patch(
+        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
+        side_effect=Exception("no LLM in test"),
+    )
+    def test_file_linked_to_both_gets_both_ids(
+        self,
+        _mock_llm: MagicMock,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        user = create_test_user(db_session, "adapter_both")
+        uf = _create_user_file(db_session, user)
+        persona = _create_persona(db_session, user)
+        project = _create_project(db_session, user)
+
+        db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
+        db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
+        db_session.commit()
+
+        adapter = UserFileIndexingAdapter(
+            tenant_id=TEST_TENANT_ID, db_session=db_session
+        )
+        chunk = _make_index_chunk(uf)
+        context = DocumentBatchPrepareContext(
+            updatable_docs=[chunk.source_document], id_to_boost_map={}
+        )
+
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
+            tenant_id=TEST_TENANT_ID,
+            context=context,
+        )
+
+        aware_chunk = result.chunks[0]
+        assert persona.id in aware_chunk.personas
+        assert project.id in aware_chunk.user_project
+
+    @patch(
+        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
+        side_effect=Exception("no LLM in test"),
+    )
+    def test_file_with_no_associations_gets_empty_lists(
+        self,
+        _mock_llm: MagicMock,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        user = create_test_user(db_session, "adapter_empty")
+        uf = _create_user_file(db_session, user)
+
+        adapter = UserFileIndexingAdapter(
+            tenant_id=TEST_TENANT_ID, db_session=db_session
+        )
+        chunk = _make_index_chunk(uf)
+        context = DocumentBatchPrepareContext(
+            updatable_docs=[chunk.source_document], id_to_boost_map={}
+        )
+
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
+            tenant_id=TEST_TENANT_ID,
+            context=context,
+        )
+
+        aware_chunk = result.chunks[0]
+        assert aware_chunk.personas == []
+        assert aware_chunk.user_project == []
+
+    @patch(
+        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
+        side_effect=Exception("no LLM in test"),
+    )
+    def test_multiple_personas_all_appear(
+        self,
+        _mock_llm: MagicMock,
+        db_session: Session,
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """A file linked to multiple personas should have all their IDs."""
+        user = create_test_user(db_session, "adapter_multi")
+        uf = _create_user_file(db_session, user)
+        persona_a = _create_persona(db_session, user)
+        persona_b = _create_persona(db_session, user)
+
+        db_session.add(Persona__UserFile(persona_id=persona_a.id, user_file_id=uf.id))
+        db_session.add(Persona__UserFile(persona_id=persona_b.id, user_file_id=uf.id))
+        db_session.commit()
+
+        adapter = UserFileIndexingAdapter(
+            tenant_id=TEST_TENANT_ID, db_session=db_session
+        )
+        chunk = _make_index_chunk(uf)
+        context = DocumentBatchPrepareContext(
+            updatable_docs=[chunk.source_document], id_to_boost_map={}
+        )
+
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
+            tenant_id=TEST_TENANT_ID,
+            context=context,
+        )
+
+        aware_chunk = result.chunks[0]
+        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}
--- a/backend/tests/external_dependency_unit/mock_search_pipeline.py
+++ b/backend/tests/external_dependency_unit/mock_search_pipeline.py
@@ -144,7 +144,8 @@ def use_mock_search_pipeline(
        auto_detect_filters: bool = False,  # noqa: ARG001
        llm: LLM | None = None,  # noqa: ARG001
        project_id: int | None = None,  # noqa: ARG001
-        # Pre-fetched data (used by SearchTool to avoid DB access in parallel)
+        persona_id: int | None = None,  # noqa: ARG001
+        # Pre-fetched data (used by SearchTool to avoid DB access in parallel calls)
        acl_filters: list[str] | None = None,  # noqa: ARG001
        embedding_model: EmbeddingModel | None = None,  # noqa: ARG001
        prefetched_federated_retrieval_infos: (  # noqa: ARG001
--- a/backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py
+++ b/backend/tests/external_dependency_unit/opensearch/test_assistant_knowledge_filter.py
@@ -38,6 +38,7 @@ def _get_search_filters(
        tags=[],
        document_sets=[],
        project_id=None,
+        persona_id=None,
        time_cutoff=None,
        min_chunk_index=None,
        max_chunk_index=None,
--- a/backend/tests/external_dependency_unit/opensearch/test_opensearch_client.py
+++ b/backend/tests/external_dependency_unit/opensearch/test_opensearch_client.py
@@ -1,4 +1,4 @@
-"""External dependency unit tests for OpenSearchClient.
+"""External dependency unit tests for OpenSearchIndexClient.

 These tests assume OpenSearch is running and test all implemented methods
 using real schemas, pipelines, and search queries from the codebase.
@@ -19,7 +19,7 @@ from onyx.access.utils import prefix_user_email
 from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import IndexFilters
 from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.opensearch.client import OpenSearchClient
+from onyx.document_index.opensearch.client import OpenSearchIndexClient
 from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
 from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
 from onyx.document_index.opensearch.opensearch_document_index import (
@@ -125,10 +125,10 @@ def opensearch_available() -> None:
@pytest.fixture(scope="function")
 def test_client(
    opensearch_available: None,  # noqa: ARG001
-) -> Generator[OpenSearchClient, None, None]:
+) -> Generator[OpenSearchIndexClient, None, None]:
    """Creates an OpenSearch client for testing with automatic cleanup."""
    test_index_name = f"test_index_{uuid.uuid4().hex[:8]}"
-    client = OpenSearchClient(index_name=test_index_name)
+    client = OpenSearchIndexClient(index_name=test_index_name)

    yield client  # Test runs here.

@@ -142,7 +142,7 @@ def test_client(


@pytest.fixture(scope="function")
-def search_pipeline(test_client: OpenSearchClient) -> Generator[None, None, None]:
+def search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None, None]:
    """Creates a search pipeline for testing with automatic cleanup."""
    test_client.create_search_pipeline(
        pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
@@ -158,9 +158,9 @@ def search_pipeline(test_client: OpenSearchClient) -> Generator[None, None, None


 class TestOpenSearchClient:
-    """Tests for OpenSearchClient."""
+    """Tests for OpenSearchIndexClient."""

-    def test_create_index(self, test_client: OpenSearchClient) -> None:
+    def test_create_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests creating an index with a real schema."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
@@ -176,7 +176,7 @@ class TestOpenSearchClient:
        # Verify index exists.
        assert test_client.validate_index(expected_mappings=mappings) is True

-    def test_delete_existing_index(self, test_client: OpenSearchClient) -> None:
+    def test_delete_existing_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests deleting an existing index returns True."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
@@ -193,7 +193,7 @@ class TestOpenSearchClient:
        assert result is True
        assert test_client.validate_index(expected_mappings=mappings) is False

-    def test_delete_nonexistent_index(self, test_client: OpenSearchClient) -> None:
+    def test_delete_nonexistent_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests deleting a nonexistent index returns False."""
        # Under test.
        # Don't create index, just try to delete.
@@ -202,7 +202,7 @@ class TestOpenSearchClient:
        # Postcondition.
        assert result is False

-    def test_index_exists(self, test_client: OpenSearchClient) -> None:
+    def test_index_exists(self, test_client: OpenSearchIndexClient) -> None:
        """Tests checking if an index exists."""
        # Precondition.
        # Index should not exist before creation.
@@ -219,7 +219,7 @@ class TestOpenSearchClient:
        # Index should exist after creation.
        assert test_client.index_exists() is True

-    def test_validate_index(self, test_client: OpenSearchClient) -> None:
+    def test_validate_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests validating an index."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
@@ -239,7 +239,120 @@ class TestOpenSearchClient:
        # Should return True after creation.
        assert test_client.validate_index(expected_mappings=mappings) is True

-    def test_create_duplicate_index(self, test_client: OpenSearchClient) -> None:
+    def test_put_mapping_idempotent(self, test_client: OpenSearchIndexClient) -> None:
+        """Tests put_mapping with same schema is idempotent."""
+        # Precondition.
+        mappings = DocumentSchema.get_document_schema(
+            vector_dimension=128, multitenant=True
+        )
+        settings = DocumentSchema.get_index_settings()
+        test_client.create_index(mappings=mappings, settings=settings)
+
+        # Under test.
+        # Applying the same mappings again should succeed.
+        test_client.put_mapping(mappings)
+
+        # Postcondition.
+        # Index should still be valid.
+        assert test_client.validate_index(expected_mappings=mappings)
+
+    def test_put_mapping_adds_new_field(
+        self, test_client: OpenSearchIndexClient
+    ) -> None:
+        """Tests put_mapping successfully adds new fields to existing index."""
+        # Precondition.
+        # Create index with minimal schema (just required fields).
+        initial_mappings = {
+            "dynamic": "strict",
+            "properties": {
+                "document_id": {"type": "keyword"},
+                "chunk_index": {"type": "integer"},
+                "content": {"type": "text"},
+                "content_vector": {
+                    "type": "knn_vector",
+                    "dimension": 128,
+                    "method": {
+                        "name": "hnsw",
+                        "space_type": "cosinesimil",
+                        "engine": "lucene",
+                        "parameters": {"ef_construction": 512, "m": 16},
+                    },
+                },
+            },
+        }
+        settings = DocumentSchema.get_index_settings()
+        test_client.create_index(mappings=initial_mappings, settings=settings)
+
+        # Under test.
+        # Add a new field using put_mapping.
+        updated_mappings = {
+            "properties": {
+                "document_id": {"type": "keyword"},
+                "chunk_index": {"type": "integer"},
+                "content": {"type": "text"},
+                "content_vector": {
+                    "type": "knn_vector",
+                    "dimension": 128,
+                    "method": {
+                        "name": "hnsw",
+                        "space_type": "cosinesimil",
+                        "engine": "lucene",
+                        "parameters": {"ef_construction": 512, "m": 16},
+                    },
+                },
+                # New field
+                "new_test_field": {"type": "keyword"},
+            },
+        }
+        # Should not raise.
+        test_client.put_mapping(updated_mappings)
+
+        # Postcondition.
+        # Validate the new schema includes the new field.
+        assert test_client.validate_index(expected_mappings=updated_mappings)
+
+    def test_put_mapping_fails_on_type_change(
+        self, test_client: OpenSearchIndexClient
+    ) -> None:
+        """Tests put_mapping fails when trying to change existing field type."""
+        # Precondition.
+        initial_mappings = {
+            "dynamic": "strict",
+            "properties": {
+                "document_id": {"type": "keyword"},
+                "test_field": {"type": "keyword"},
+            },
+        }
+        settings = DocumentSchema.get_index_settings()
+        test_client.create_index(mappings=initial_mappings, settings=settings)
+
+        # Under test and postcondition.
+        # Try to change test_field type from keyword to text.
+        conflicting_mappings = {
+            "properties": {
+                "document_id": {"type": "keyword"},
+                "test_field": {"type": "text"},  # Changed from keyword to text
+            },
+        }
+        # Should raise because field type cannot be changed.
+        with pytest.raises(Exception, match="mapper|illegal_argument_exception"):
+            test_client.put_mapping(conflicting_mappings)
+
+    def test_put_mapping_on_nonexistent_index(
+        self, test_client: OpenSearchIndexClient
+    ) -> None:
+        """Tests put_mapping on non-existent index raises an error."""
+        # Precondition.
+        # Index does not exist yet.
+        mappings = DocumentSchema.get_document_schema(
+            vector_dimension=128, multitenant=True
+        )
+
+        # Under test and postcondition.
+        with pytest.raises(Exception, match="index_not_found_exception|404"):
+            test_client.put_mapping(mappings)
+
+    def test_create_duplicate_index(self, test_client: OpenSearchIndexClient) -> None:
        """Tests creating an index twice raises an error."""
        # Precondition.
        mappings = DocumentSchema.get_document_schema(
@@ -254,14 +367,14 @@ class TestOpenSearchClient:
        with pytest.raises(Exception, match="already exists"):
            test_client.create_index(mappings=mappings, settings=settings)

-    def test_update_settings(self, test_client: OpenSearchClient) -> None:
+    def test_update_settings(self, test_client: OpenSearchIndexClient) -> None:
        """Tests that update_settings raises NotImplementedError."""
        # Under test and postcondition.
        with pytest.raises(NotImplementedError):
            test_client.update_settings(settings={})

    def test_create_and_delete_search_pipeline(
-        self, test_client: OpenSearchClient
+        self, test_client: OpenSearchIndexClient
    ) -> None:
        """Tests creating and deleting a search pipeline."""
        # Under test and postcondition.
@@ -278,7 +391,7 @@ class TestOpenSearchClient:
        )

    def test_index_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests indexing a document."""
        # Precondition.
@@ -306,7 +419,7 @@ class TestOpenSearchClient:
        )

    def test_bulk_index_documents(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests bulk indexing documents."""
        # Precondition.
@@ -337,7 +450,7 @@ class TestOpenSearchClient:
        )

    def test_index_duplicate_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests indexing a duplicate document raises an error."""
        # Precondition.
@@ -365,7 +478,7 @@ class TestOpenSearchClient:
            test_client.index_document(document=doc, tenant_state=tenant_state)

    def test_get_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests getting a document."""
        # Precondition.
@@ -401,7 +514,7 @@ class TestOpenSearchClient:
        assert retrieved_doc == original_doc

    def test_get_nonexistent_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests getting a nonexistent document raises an error."""
        # Precondition.
@@ -419,7 +532,7 @@ class TestOpenSearchClient:
            )

    def test_delete_existing_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting an existing document returns True."""
        # Precondition.
@@ -455,7 +568,7 @@ class TestOpenSearchClient:
            test_client.get_document(document_chunk_id=doc_chunk_id)

    def test_delete_nonexistent_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting a nonexistent document returns False."""
        # Precondition.
@@ -476,7 +589,7 @@ class TestOpenSearchClient:
        assert result is False

    def test_delete_by_query(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests deleting documents by query."""
        # Precondition.
@@ -552,7 +665,7 @@ class TestOpenSearchClient:
        assert len(keep_ids) == 1

    def test_update_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests updating a document's properties."""
        # Precondition.
@@ -601,7 +714,7 @@ class TestOpenSearchClient:
        assert updated_doc.public == doc.public

    def test_update_nonexistent_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests updating a nonexistent document raises an error."""
        # Precondition.
@@ -623,7 +736,7 @@ class TestOpenSearchClient:

    def test_hybrid_search_with_pipeline(
        self,
-        test_client: OpenSearchClient,
+        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
@@ -704,7 +817,7 @@ class TestOpenSearchClient:

    def test_search_empty_index(
        self,
-        test_client: OpenSearchClient,
+        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
@@ -743,7 +856,7 @@ class TestOpenSearchClient:

    def test_hybrid_search_with_pipeline_and_filters(
        self,
-        test_client: OpenSearchClient,
+        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
@@ -863,7 +976,7 @@ class TestOpenSearchClient:

    def test_hybrid_search_with_pipeline_and_filters_returns_chunks_with_related_content_first(
        self,
-        test_client: OpenSearchClient,
+        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
@@ -993,7 +1106,7 @@ class TestOpenSearchClient:
            previous_score = current_score

    def test_delete_by_query_multitenant_isolation(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests delete_by_query respects tenant boundaries in multi-tenant mode.
@@ -1087,7 +1200,7 @@ class TestOpenSearchClient:
        assert set(remaining_y_ids) == expected_y_ids

    def test_delete_by_query_nonexistent_document(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests delete_by_query for non-existent document returns 0 deleted.
@@ -1116,7 +1229,7 @@ class TestOpenSearchClient:
        assert num_deleted == 0

    def test_search_for_document_ids(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests search_for_document_ids method returns correct chunk IDs."""
        # Precondition.
@@ -1181,7 +1294,7 @@ class TestOpenSearchClient:
        assert set(chunk_ids) == expected_ids

    def test_search_with_no_document_access_can_retrieve_all_documents(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """
        Tests search with no document access can retrieve all documents, even
@@ -1259,7 +1372,7 @@ class TestOpenSearchClient:

    def test_time_cutoff_filter(
        self,
-        test_client: OpenSearchClient,
+        test_client: OpenSearchIndexClient,
        search_pipeline: None,  # noqa: ARG002
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
@@ -1352,7 +1465,7 @@ class TestOpenSearchClient:
        )

    def test_random_search(
-        self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
+        self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Tests the random search query works."""
        # Precondition.
--- a/backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py
+++ b/backend/tests/external_dependency_unit/opensearch_migration/test_opensearch_migration_tasks.py
@@ -37,6 +37,7 @@ from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapp
 from onyx.db.search_settings import get_active_search_settings
 from onyx.document_index.interfaces_new import TenantState
 from onyx.document_index.opensearch.client import OpenSearchClient
+from onyx.document_index.opensearch.client import OpenSearchIndexClient
 from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
 from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
 from onyx.document_index.opensearch.schema import DocumentChunk
@@ -74,7 +75,7 @@ CHUNK_COUNT = 5


 def _get_document_chunks_from_opensearch(
-    opensearch_client: OpenSearchClient, document_id: str, current_tenant_id: str
+    opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
 ) -> list[DocumentChunk]:
    opensearch_client.refresh_index()
    filters = IndexFilters(access_control_list=None, tenant_id=current_tenant_id)
@@ -95,7 +96,7 @@ def _get_document_chunks_from_opensearch(


 def _delete_document_chunks_from_opensearch(
-    opensearch_client: OpenSearchClient, document_id: str, current_tenant_id: str
+    opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
 ) -> None:
    opensearch_client.refresh_index()
    query_body = DocumentQuery.delete_from_document_id_query(
@@ -283,10 +284,10 @@ def vespa_document_index(
 def opensearch_client(
    db_session: Session,
    full_deployment_setup: None,  # noqa: ARG001
-) -> Generator[OpenSearchClient, None, None]:
+) -> Generator[OpenSearchIndexClient, None, None]:
    """Creates an OpenSearch client for the test tenant."""
    active = get_active_search_settings(db_session)
-    yield OpenSearchClient(index_name=active.primary.index_name)  # Test runs here.
+    yield OpenSearchIndexClient(index_name=active.primary.index_name)  # Test runs here.


@pytest.fixture(scope="module")
@@ -330,7 +331,7 @@ def patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:
 def test_documents(
    db_session: Session,
    vespa_document_index: VespaDocumentIndex,
-    opensearch_client: OpenSearchClient,
+    opensearch_client: OpenSearchIndexClient,
    patch_get_vespa_chunks_page_size: int,
 ) -> Generator[list[Document], None, None]:
    """
@@ -411,7 +412,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
-        opensearch_client: OpenSearchClient,
+        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
@@ -480,7 +481,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
-        opensearch_client: OpenSearchClient,
+        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
@@ -618,7 +619,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
-        opensearch_client: OpenSearchClient,
+        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
@@ -712,7 +713,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
        db_session: Session,
        test_documents: list[Document],
        vespa_document_index: VespaDocumentIndex,
-        opensearch_client: OpenSearchClient,
+        opensearch_client: OpenSearchIndexClient,
        test_embedding_dimension: int,
        clean_migration_tables: None,  # noqa: ARG002
        enable_opensearch_indexing_for_onyx: None,  # noqa: ARG002
--- a/backend/tests/external_dependency_unit/tools/test_oauth_token_manager.py
+++ b/backend/tests/external_dependency_unit/tools/test_oauth_token_manager.py
@@ -20,6 +20,7 @@ from onyx.auth.oauth_token_manager import OAuthTokenManager
 from onyx.db.models import OAuthConfig
 from onyx.db.oauth_config import create_oauth_config
 from onyx.db.oauth_config import upsert_user_oauth_token
+from onyx.utils.sensitive import SensitiveValue
 from tests.external_dependency_unit.conftest import create_test_user


@@ -491,3 +492,19 @@ class TestOAuthTokenManagerURLBuilding:
        # Should use & instead of ? since URL already has query params
        assert "foo=bar&" in url or "?foo=bar" in url
        assert "client_id=custom_client_id" in url
+
+
+class TestUnwrapSensitiveStr:
+    """Tests for _unwrap_sensitive_str static method"""
+
+    def test_unwrap_sensitive_str(self) -> None:
+        """Test that both SensitiveValue and plain str inputs are handled"""
+        # SensitiveValue input
+        sensitive = SensitiveValue[str](
+            encrypted_bytes=b"test_client_id",
+            decrypt_fn=lambda b: b.decode(),
+        )
+        assert OAuthTokenManager._unwrap_sensitive_str(sensitive) == "test_client_id"
+
+        # Plain str input
+        assert OAuthTokenManager._unwrap_sensitive_str("plain_string") == "plain_string"
--- a/backend/tests/external_dependency_unit/tools/test_python_tool_server_enabled.py
+++ b/backend/tests/external_dependency_unit/tools/test_python_tool_server_enabled.py
@@ -0,0 +1,53 @@
+"""Tests that PythonTool.is_available() respects the server_enabled DB flag.
+
+Uses a real DB session with CODE_INTERPRETER_BASE_URL mocked so the
+environment-variable check passes and the DB flag is the deciding factor.
+"""
+
+from unittest.mock import patch
+
+from sqlalchemy.orm import Session
+
+from onyx.db.code_interpreter import fetch_code_interpreter_server
+from onyx.db.code_interpreter import update_code_interpreter_server_enabled
+from onyx.tools.tool_implementations.python.python_tool import PythonTool
+
+
+def test_python_tool_unavailable_when_server_disabled(
+    db_session: Session,
+) -> None:
+    """With a valid base URL, the tool should be unavailable when
+    server_enabled is False in the DB."""
+    server = fetch_code_interpreter_server(db_session)
+    initial_enabled = server.server_enabled
+
+    try:
+        update_code_interpreter_server_enabled(db_session, enabled=False)
+
+        with patch(
+            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+            "http://fake:8888",
+        ):
+            assert PythonTool.is_available(db_session) is False
+    finally:
+        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)
+
+
+def test_python_tool_available_when_server_enabled(
+    db_session: Session,
+) -> None:
+    """With a valid base URL, the tool should be available when
+    server_enabled is True in the DB."""
+    server = fetch_code_interpreter_server(db_session)
+    initial_enabled = server.server_enabled
+
+    try:
+        update_code_interpreter_server_enabled(db_session, enabled=True)
+
+        with patch(
+            "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+            "http://fake:8888",
+        ):
+            assert PythonTool.is_available(db_session) is True
+    finally:
+        update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)
--- a/backend/tests/integration/Dockerfile
+++ b/backend/tests/integration/Dockerfile
@@ -38,5 +38,5 @@ COPY --from=openapi-client /local/onyx_openapi_client /app/generated/onyx_openap

 ENV PYTHONPATH=/app

-ENTRYPOINT ["pytest", "-s"]
+ENTRYPOINT ["pytest", "-s", "-rs"]
 CMD ["/app/tests/integration", "--ignore=/app/tests/integration/multitenant_tests"]
--- a/backend/tests/integration/common_utils/managers/chat.py
+++ b/backend/tests/integration/common_utils/managers/chat.py
@@ -76,9 +76,12 @@ class ChatSessionManager:
        user_performing_action: DATestUser,
        persona_id: int = 0,
        description: str = "Test chat session",
+        project_id: int | None = None,
    ) -> DATestChatSession:
        chat_session_creation_req = ChatSessionCreationRequest(
-            persona_id=persona_id, description=description
+            persona_id=persona_id,
+            description=description,
+            project_id=project_id,
        )
        response = requests.post(
            f"{API_SERVER_URL}/chat/create-chat-session",
--- a/backend/tests/integration/common_utils/managers/query_history.py
+++ b/backend/tests/integration/common_utils/managers/query_history.py
@@ -1,3 +1,4 @@
+import time
 from datetime import datetime
 from urllib.parse import urlencode
 from uuid import UUID
@@ -8,8 +9,10 @@ from requests.models import CaseInsensitiveDict
 from ee.onyx.server.query_history.models import ChatSessionMinimal
 from ee.onyx.server.query_history.models import ChatSessionSnapshot
 from onyx.configs.constants import QAFeedbackType
+from onyx.db.enums import TaskStatus
 from onyx.server.documents.models import PaginatedReturn
 from tests.integration.common_utils.constants import API_SERVER_URL
+from tests.integration.common_utils.constants import MAX_DELAY
 from tests.integration.common_utils.test_models import DATestUser


@@ -69,9 +72,42 @@ class QueryHistoryManager:
        if end_time:
            query_params["end"] = end_time.isoformat()

-        response = requests.get(
-            url=f"{API_SERVER_URL}/admin/query-history-csv?{urlencode(query_params, doseq=True)}",
+        start_response = requests.post(
+            url=f"{API_SERVER_URL}/admin/query-history/start-export?{urlencode(query_params, doseq=True)}",
            headers=user_performing_action.headers,
        )
-        response.raise_for_status()
-        return response.headers, response.content.decode()
+        start_response.raise_for_status()
+        request_id = start_response.json()["request_id"]
+
+        deadline = time.time() + MAX_DELAY
+        while time.time() < deadline:
+            status_response = requests.get(
+                url=f"{API_SERVER_URL}/admin/query-history/export-status",
+                params={"request_id": request_id},
+                headers=user_performing_action.headers,
+            )
+            status_response.raise_for_status()
+            status = status_response.json()["status"]
+            if status == TaskStatus.SUCCESS:
+                break
+            if status == TaskStatus.FAILURE:
+                raise RuntimeError("Query history export task failed")
+            time.sleep(2)
+        else:
+            raise TimeoutError(
+                f"Query history export not completed within {MAX_DELAY} seconds"
+            )
+
+        download_response = requests.get(
+            url=f"{API_SERVER_URL}/admin/query-history/download",
+            params={"request_id": request_id},
+            headers=user_performing_action.headers,
+        )
+        download_response.raise_for_status()
+
+        if not download_response.content:
+            raise RuntimeError(
+                "Query history CSV download returned zero-length content"
+            )
+
+        return download_response.headers, download_response.content.decode()
--- a/backend/tests/integration/common_utils/managers/scim_token.py
+++ b/backend/tests/integration/common_utils/managers/scim_token.py
@@ -0,0 +1,79 @@
+import requests
+
+from tests.integration.common_utils.constants import API_SERVER_URL
+from tests.integration.common_utils.constants import GENERAL_HEADERS
+from tests.integration.common_utils.test_models import DATestScimToken
+from tests.integration.common_utils.test_models import DATestUser
+
+
+class ScimTokenManager:
+    @staticmethod
+    def create(
+        name: str,
+        user_performing_action: DATestUser,
+    ) -> DATestScimToken:
+        response = requests.post(
+            f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
+            json={"name": name},
+            headers=user_performing_action.headers,
+            timeout=60,
+        )
+        response.raise_for_status()
+        data = response.json()
+        return DATestScimToken(
+            id=data["id"],
+            name=data["name"],
+            token_display=data["token_display"],
+            is_active=data["is_active"],
+            created_at=data["created_at"],
+            last_used_at=data.get("last_used_at"),
+            raw_token=data["raw_token"],
+        )
+
+    @staticmethod
+    def get_active(
+        user_performing_action: DATestUser,
+    ) -> DATestScimToken | None:
+        response = requests.get(
+            f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
+            headers=user_performing_action.headers,
+            timeout=60,
+        )
+        if response.status_code == 404:
+            return None
+        response.raise_for_status()
+        data = response.json()
+        return DATestScimToken(
+            id=data["id"],
+            name=data["name"],
+            token_display=data["token_display"],
+            is_active=data["is_active"],
+            created_at=data["created_at"],
+            last_used_at=data.get("last_used_at"),
+        )
+
+    @staticmethod
+    def get_scim_headers(raw_token: str) -> dict[str, str]:
+        return {
+            **GENERAL_HEADERS,
+            "Authorization": f"Bearer {raw_token}",
+        }
+
+    @staticmethod
+    def scim_get(
+        path: str,
+        raw_token: str,
+    ) -> requests.Response:
+        return requests.get(
+            f"{API_SERVER_URL}/scim/v2{path}",
+            headers=ScimTokenManager.get_scim_headers(raw_token),
+            timeout=60,
+        )
+
+    @staticmethod
+    def scim_get_no_auth(path: str) -> requests.Response:
+        return requests.get(
+            f"{API_SERVER_URL}/scim/v2{path}",
+            headers=GENERAL_HEADERS,
+            timeout=60,
+        )
--- a/backend/tests/integration/common_utils/test_models.py
+++ b/backend/tests/integration/common_utils/test_models.py
@@ -42,6 +42,18 @@ class DATestPAT(BaseModel):
    last_used_at: str | None = None


+class DATestScimToken(BaseModel):
+    """SCIM bearer token model for testing."""
+
+    id: int
+    name: str
+    raw_token: str | None = None  # Only present on initial creation
+    token_display: str
+    is_active: bool
+    created_at: str
+    last_used_at: str | None = None
+
+
 class DATestAPIKey(BaseModel):
    api_key_id: int
    api_key_display: str
--- a/backend/tests/integration/connector_job_tests/slack/conftest.py
+++ b/backend/tests/integration/connector_job_tests/slack/conftest.py
@@ -6,16 +6,26 @@ import pytest
 from onyx.connectors.slack.models import ChannelType
 from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager

-# from tests.load_env_vars import load_env_vars
-
-# load_env_vars()
+SLACK_ADMIN_EMAIL = os.environ.get("SLACK_ADMIN_EMAIL", "evan@onyx.app")
+SLACK_TEST_USER_1_EMAIL = os.environ.get("SLACK_TEST_USER_1_EMAIL", "evan+1@onyx.app")
+SLACK_TEST_USER_2_EMAIL = os.environ.get("SLACK_TEST_USER_2_EMAIL", "justin@onyx.app")


-@pytest.fixture()
-def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:
-    slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"])
+def _provision_slack_channels(
+    bot_token: str,
+) -> Generator[tuple[ChannelType, ChannelType], None, None]:
+    slack_client = SlackManager.get_slack_client(bot_token)
+
+    auth_info = slack_client.auth_test()
+    print(f"\nSlack workspace: {auth_info.get('team')} ({auth_info.get('url')})")
+
    user_map = SlackManager.build_slack_user_email_id_map(slack_client)
-    admin_user_id = user_map["admin@example.com"]
+    if SLACK_ADMIN_EMAIL not in user_map:
+        raise KeyError(
+            f"'{SLACK_ADMIN_EMAIL}' not found in Slack workspace. "
+            f"Available emails: {sorted(user_map.keys())}"
+        )
+    admin_user_id = user_map[SLACK_ADMIN_EMAIL]

    (
        public_channel,
@@ -27,5 +37,16 @@ def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]

    yield public_channel, private_channel

-    # This part will always run after the test, even if it fails
    SlackManager.cleanup_after_test(slack_client=slack_client, test_id=run_id)
+
+
+@pytest.fixture()
+def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:
+    yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN"])
+
+
+@pytest.fixture()
+def slack_perm_sync_test_setup() -> (
+    Generator[tuple[ChannelType, ChannelType], None, None]
+):
+    yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN_TEST_SPACE"])
--- a/backend/tests/integration/connector_job_tests/slack/slack_api_utils.py
+++ b/backend/tests/integration/connector_job_tests/slack/slack_api_utils.py
@@ -16,7 +16,6 @@ from uuid import uuid4
 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError

-from onyx.connectors.slack.connector import default_msg_filter
 from onyx.connectors.slack.connector import get_channel_messages
 from onyx.connectors.slack.models import ChannelType
 from onyx.connectors.slack.utils import make_paginated_slack_api_call
@@ -113,9 +112,6 @@ def _delete_slack_conversation_messages(
    channel_id = _get_slack_channel_id(channel)
    for message_batch in get_channel_messages(slack_client, channel):
        for message in message_batch:
-            if default_msg_filter(message):
-                continue
-
            if message_to_delete and message.get("text") != message_to_delete:
                continue
            print(" removing message: ", message.get("text"))
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dane Urban	2036847a2d	Lookup by name	2026-02-27 12:40:18 -08:00
Dane Urban	9ff1ac862e	.	2026-02-27 12:35:38 -08:00
Dane Urban	e4179629ae	Test	2026-02-27 10:29:10 -08:00
Dane Urban	4077c20def	Add python tool to default persona	2026-02-27 10:14:53 -08:00
Yuhong Sun	4d256c5666	chore: remove instance of Assistant from frontend (#8848 ) Co-authored-by: Nik <nikolas.garza5@gmail.com>	2026-02-27 04:22:28 +00:00
Danelegend	2e53496f46	feat: Code interpreter admin page visuals (#8729 )	2026-02-27 04:01:02 +00:00
acaprau	63a206706a	docs(best practices): Add comment about import-time side effects and main.py files (#8820 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-02-27 01:29:56 +00:00
Nikolas Garza	28427b3e5f	fix(metrics): restore default HTTP request counter and histogram metrics (#8842 )	2026-02-27 00:53:22 +00:00
Justin Tahara	3cafcd8a5e	chore(llm): add OpenRouter nightly tests (#8818 )	2026-02-26 23:54:25 +00:00
Justin Tahara	f2c50b7bb5	chore(llm): add Ollama nightly tests (#8817 )	2026-02-26 23:28:40 +00:00
Jamison Lahman	6b28c6bbfc	fix(fe): Search Actions popover has consistent hover states (#8826 )	2026-02-26 23:16:09 +00:00
Justin Tahara	226e801665	chore(llm): add Azure nightly tests (#8816 )	2026-02-26 23:05:03 +00:00
Justin Tahara	be13aa1310	chore(llm): add Vertex AI nightly tests (#8813 )	2026-02-26 22:38:05 +00:00
Nikolas Garza	45d38c4906	feat(metrics): add per-tenant Prometheus metrics (#8822 )	2026-02-26 22:37:35 +00:00
Danelegend	8aab518532	fix: Admin page modal centering excludes sidebar (#8823 )	2026-02-26 22:27:58 +00:00
Nikolas Garza	da6ce10e86	test(scim): add integration tests for SCIM token management (#8819 )	2026-02-26 22:22:16 +00:00
Nikolas Garza	aaf8253520	fix(ee): show subscription text on expired access page for cloud users (#8804 )	2026-02-26 22:15:44 +00:00
Jamison Lahman	7c7f81b164	chore(fe): add feature agent to editor page (#8814 )	2026-02-26 22:12:20 +00:00
Justin Tahara	2d4a3c72e9	chore(llm): Nightly Bedrock Tests (#8812 )	2026-02-26 22:10:31 +00:00
acaprau	7c51712018	fix(db ssl): Remove import-time side effect of creating SSL context if IAM enabled (#8811 )	2026-02-26 21:37:13 +00:00
Evan Lohn	aa5614695d	feat: sharepoint tenant avoid org get (#8802 )	2026-02-26 21:28:56 +00:00
Jamison Lahman	8d7255d3c4	chore(fe): support featured agents w/o being public (#8809 )	2026-02-26 21:16:23 +00:00
Evan Lohn	d403498f48	feat: context injection unification (#8687 )	2026-02-26 21:11:19 +00:00
dependabot[bot]	9ef3095c17	chore(deps): bump pypdf from 6.6.2 to 6.7.3 (#8808 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-02-26 20:42:01 +00:00
Justin Tahara	a39e93a0cb	chore(llm): LLM Integration Tests Generic Setup (#8803 )	2026-02-26 19:59:19 +00:00
Jamison Lahman	46d73cdfee	fix(docker): prefer user runtime docker socket (#8799 )	2026-02-26 10:55:44 -08:00
Raunak Bhagat	1e04ce78e0	feat(opal): add Hoverable compound component (#8798 )	2026-02-26 17:08:53 +00:00
Jamison Lahman	f9b81c1725	feat(agents): share agents with labels or featured (#8742 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-02-26 16:21:05 +00:00
SubashMohan	3bc1b89fee	fix(memory): timeline UI alignment issues and highlighting issue (#8753 )	2026-02-26 08:46:43 +00:00
Nikolas Garza	01743d99d4	fix(billing): handle manual license users without Stripe subscription (#8787 )	2026-02-26 08:07:14 +00:00
acaprau	092c1db7e0	chore(opensearch): Allow programatic schema updates (#8794 )	2026-02-26 07:49:56 +00:00
acaprau	40ac0d859a	chore(opensearch): OpenSearchClient implements context manager, also closes on del (#8781 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-02-26 07:38:16 +00:00
SubashMohan	929e58361f	fix: resolve OAuth token manager using masked secrets (#8673 )	2026-02-26 07:06:51 +00:00
SubashMohan	6d472df7c5	fix(timeline): Fix double-collapse and improve tool status messages (#8751 )	2026-02-26 07:05:48 +00:00
acaprau	cfa7acd904	chore(opensearch): MT cloud should verify index on document index init, and do cluster setup once at start (#8776 )	2026-02-26 06:42:06 +00:00
Danelegend	5c5a6f943b	chore: deprecate llm provider fields (#8783 )	2026-02-26 05:27:28 +00:00
Evan Lohn	d04128b8b1	fix: sharepoint unquote (#8786 )	2026-02-26 03:38:46 +00:00
Nikolas Garza	bbebdf8f78	feat(scim): Entra ID enterprise extension support [3/3] (#8747 )	2026-02-26 02:32:04 +00:00
Nikolas Garza	161279a2d5	feat(scim): field round-tripping for IdP attribute preservation [2/3] (#8746 )	2026-02-26 02:01:13 +00:00
Jamison Lahman	e5ebb45a20	chore(devtools): upgrade `ods`: v0.6.1->v0.6.2 (#8773 )	2026-02-26 01:57:25 +00:00
Evan Lohn	320ba9cb1b	refactor: filter by persona id during search (#8683 )	2026-02-26 01:51:00 +00:00
Nikolas Garza	f2e8cb3114	fix(slack): sanitize HTML tags and broken citation links in bot responses (#8767 )	2026-02-26 01:47:44 +00:00
Nikolas Garza	43054a28ec	feat(scim): SCIM 2.0 protocol compliance fixes [1/3] (#8745 )	2026-02-26 01:33:08 +00:00
Justin Tahara	dc74aa7b1f	chore(llm): Add OpenAI Integration Tests (#8711 )	2026-02-26 00:58:28 +00:00
Raunak Bhagat	bd773191c2	feat(opal): add more icons (#8778 )	2026-02-26 00:38:54 +00:00
Evan Lohn	66dbff41e6	refactor: extend sync mechanism to persona files (#8682 )	2026-02-26 00:32:30 +00:00
roshan	1dcffe38bc	fix: Invoke generate_agents_md.py in K8s to populate knowledge sources (#8768 ) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-02-26 00:04:10 +00:00
Evan Lohn	c35e883564	refactor: persona id in vector db by indexing (#8681 )	2026-02-25 22:51:57 +00:00
Jamison Lahman	fefcd58481	chore(devtools): `ods web` to run web/package.json scripts (#8766 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-02-25 14:05:29 -08:00
Jamison Lahman	bdc89d9e3f	chore(fe): opal button implements responsiveHideText (#8764 )	2026-02-25 21:05:08 +00:00
Evan Lohn	f4d777b80d	refactor: persona id in vector db (#8680 )	2026-02-25 20:42:38 +00:00
acaprau	da4d57b5e3	chore(devtools): Make AGENTS.md reference contributing_guides/best_practices.md (#8760 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-02-25 20:27:12 +00:00
Evan Lohn	dcdcd067bd	fix: drive 403 rate limits (#8762 )	2026-02-25 20:12:36 +00:00
Evan Lohn	8b15a29723	feat: slab connector validation (#8758 )	2026-02-25 20:00:42 +00:00
Danelegend	763853674f	feat(ci): Add preview modal for data types (#8752 )	2026-02-25 19:52:19 +00:00
Jamison Lahman	429b6f3465	fix(fe): modal aligning with detached element after navigation (#8676 )	2026-02-25 19:33:07 +00:00
Danelegend	37d5be1b40	feat: python tool not added when no code interpretter server (#8749 )	2026-02-25 19:17:42 +00:00
Jamison Lahman	8ab99dbb06	chore(fe): add hover style to AgentCard (#8689 )	2026-02-25 19:08:00 +00:00
Jamison Lahman	52799e9c7a	fix(fe): middle align human chat message text (#8756 )	2026-02-25 19:00:01 +00:00
Jamison Lahman	aef009cc97	chore(fe): foldable buttons display text via tooltip when disabled (#8735 )	2026-02-25 18:39:53 +00:00
Evan Lohn	18d1ea1770	fix: sharepoint driveItem perm sync (#8698 )	2026-02-25 18:29:26 +00:00
Bo-Onyx	f336ad00f4	fix(user invitation): failed but no warning. (#8731 ) Co-authored-by: Bo Yang <boyang@Bos-MacBook-Pro.local>	2026-02-25 17:23:39 +00:00
SubashMohan	0558e687d9	fix: persist onboarding dismissal in localStorage with user-specific keys (#8674 )	2026-02-25 06:22:17 +00:00
roshan	784a99e24a	updated demo data (#8748 )	2026-02-24 19:59:46 -08:00
Justin Tahara	da1f5a11f4	chore(cherry-pick): Alerting on Failed Cherry-Picks (#8744 )	2026-02-25 02:09:19 +00:00
Justin Tahara	5633805890	chore(devtools): Upgrade ods from 0.6.0 -> 0.6.1 (#8743 )	2026-02-25 02:01:20 +00:00
Danelegend	0817b45ae1	feat: Get code interpreter config route (#8739 )	2026-02-25 01:49:30 +00:00
Justin Tahara	af0e4bdebc	fix(slack): Cleaning up URL Links (#8569 )	2026-02-25 01:42:12 +00:00