.

n
nits
2026-03-04 07:05:46 +00:00 · 2026-02-06 17:52:23 -08:00 · 2026-02-06 17:38:36 -08:00 · 2026-02-06 16:25:00 -08:00 · 2026-02-06 13:39:05 -08:00 · 2026-02-06 09:56:16 -08:00
774 changed files with 10181 additions and 30064 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -6,5 +6,5 @@
 /web/STANDARDS.md @raunakab @Weves

 # Agent context files
-/CLAUDE.md @Weves
-/AGENTS.md @Weves
+/CLAUDE.md.template @Weves
+/AGENTS.md.template @Weves
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -8,5 +8,5 @@

 ## Additional Options

- [ ] [Optional] Please cherry-pick this PR to the latest release version.
+- [ ] [Required] I have considered whether this PR needs to be cherry-picked to the latest beta branch.
 - [ ] [Optional] Override Linear Check
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -82,7 +82,7 @@ jobs:
          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
            IS_STABLE=true
          fi
-          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta(\.[0-9]+)?$ ]]; then
+          if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$ ]]; then
            IS_BETA=true
          fi

@@ -174,10 +174,23 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
+          parse-json-secrets: true
+
      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
-          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: "• check-version-tag"
          title: "🚨 Version Tag Check Failed"
          ref-name: ${{ github.ref_name }}
@@ -249,7 +262,7 @@ jobs:
            xdg-utils

      - name: setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6.2.0
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6.1.0
        with:
          node-version: 24
          package-manager-cache: false
@@ -409,7 +422,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -482,7 +495,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -542,7 +555,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -620,7 +633,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -701,7 +714,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -769,7 +782,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -844,7 +857,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -916,7 +929,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -975,7 +988,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1053,7 +1066,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1126,7 +1139,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1187,7 +1200,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1267,7 +1280,7 @@ jobs:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1346,7 +1359,7 @@ jobs:
          buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1409,7 +1422,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ env.DOCKER_USERNAME }}
          password: ${{ env.DOCKER_TOKEN }}
@@ -1696,6 +1709,19 @@ jobs:
        with:
          persist-credentials: false

+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
+        with:
+          role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
+          aws-region: us-east-2
+
+      - name: Get AWS Secrets
+        uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
+        with:
+          secret-ids: |
+            MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
+          parse-json-secrets: true
+
      - name: Determine failed jobs
        id: failed-jobs
        shell: bash
@@ -1761,7 +1787,7 @@ jobs:
      - name: Send Slack notification
        uses: ./.github/actions/slack-notify
        with:
-          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
          failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
          title: "🚨 Deployment Workflow Failed"
          ref-name: ${{ github.ref_name }}
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -24,7 +24,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -24,7 +24,7 @@ jobs:
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -0,0 +1,151 @@
+# Scan for problematic software licenses
+
+# trivy has their own rate limiting issues causing this action to flake
+# we worked around it by hardcoding to different db repos in env
+# can re-enable when they figure it out
+# https://github.com/aquasecurity/trivy/discussions/7538
+# https://github.com/aquasecurity/trivy-action/issues/389
+
+name: 'Nightly - Scan licenses'
+on:
+#   schedule:
+#     - cron: '0 14 * * *'  # Runs every day at 6 AM PST / 7 AM PDT / 2 PM UTC
+  workflow_dispatch:  # Allows manual triggering
+
+permissions:
+  actions: read
+  contents: read
+
+jobs:
+  scan-licenses:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
+    timeout-minutes: 45
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+
+      - name: Get explicit and transitive dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+          pip freeze > requirements-all.txt
+
+      - name: Check python
+        id: license_check_report
+        uses: pilosus/action-pip-license-checker@e909b0226ff49d3235c99c4585bc617f49fff16a # ratchet:pilosus/action-pip-license-checker@v3
+        with:
+          requirements: 'requirements-all.txt'
+          fail: 'Copyleft'
+          exclude: '(?i)^(pylint|aio[-_]*).*'
+
+      - name: Print report
+        if: always()
+        env:
+          REPORT: ${{ steps.license_check_report.outputs.report }}
+        run: echo "$REPORT"
+
+      - name: Install npm dependencies
+        working-directory: ./web
+        run: npm ci
+
+        # be careful enabling the sarif and upload as it may spam the security tab
+        # with a huge amount of items. Work out the issues before enabling upload.
+#       - name: Run Trivy vulnerability scanner in repo mode
+#         if: always()
+#         uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
+#         with:
+#           scan-type: fs
+#           scan-ref: .
+#           scanners: license
+#           format: table
+#           severity: HIGH,CRITICAL
+# #           format: sarif
+# #           output: trivy-results.sarif
+#
+# #       - name: Upload Trivy scan results to GitHub Security tab
+# #         uses: github/codeql-action/upload-sarif@v3
+# #         with:
+# #           sarif_file: trivy-results.sarif
+
+  scan-trivy:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
+    timeout-minutes: 45
+
+    steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+
+    # Backend
+    - name: Pull backend docker image
+      run: docker pull onyxdotapp/onyx-backend:latest
+
+    - name: Run Trivy vulnerability scanner on backend
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-backend:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0  # Set to 1 if we want a failed scan to fail the workflow
+
+    # Web server
+    - name: Pull web server docker image
+      run: docker pull onyxdotapp/onyx-web-server:latest
+
+    - name: Run Trivy vulnerability scanner on web server
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-web-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
+
+    # Model server
+    - name: Pull model server docker image
+      run: docker pull onyxdotapp/onyx-model-server:latest
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # ratchet:aquasecurity/trivy-action@0.33.1
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-model-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -1,79 +0,0 @@
-name: Post-Merge Beta Cherry-Pick
-
-on:
-  push:
-    branches:
-      - main
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  cherry-pick-to-latest-release:
-    runs-on: ubuntu-latest
-    timeout-minutes: 45
-    steps:
-      - name: Resolve merged PR and checkbox state
-        id: gate
-        env:
-          GH_TOKEN: ${{ github.token }}
-        run: |
-          # For the commit that triggered this workflow (HEAD on main), fetch all
-          # associated PRs and keep only the PR that was actually merged into main
-          # with this exact merge commit SHA.
-          pr_numbers="$(gh api "repos/${GITHUB_REPOSITORY}/commits/${GITHUB_SHA}/pulls" | jq -r --arg sha "${GITHUB_SHA}" '.[] | select(.merged_at != null and .base.ref == "main" and .merge_commit_sha == $sha) | .number')"
-          match_count="$(printf '%s\n' "$pr_numbers" | sed '/^[[:space:]]*$/d' | wc -l | tr -d ' ')"
-          pr_number="$(printf '%s\n' "$pr_numbers" | sed '/^[[:space:]]*$/d' | head -n 1)"
-
-          if [ "${match_count}" -gt 1 ]; then
-            echo "::warning::Multiple merged PRs matched commit ${GITHUB_SHA}. Using PR #${pr_number}."
-          fi
-
-          if [ -z "$pr_number" ]; then
-            echo "No merged PR associated with commit ${GITHUB_SHA}; skipping."
-            echo "should_cherrypick=false" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          # Read the PR body and check whether the helper checkbox is checked.
-          pr_body="$(gh api "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}" --jq '.body // ""')"
-          echo "pr_number=$pr_number" >> "$GITHUB_OUTPUT"
-
-          if echo "$pr_body" | grep -qiE "\\[x\\][[:space:]]*(\\[[^]]+\\][[:space:]]*)?Please cherry-pick this PR to the latest release version"; then
-            echo "should_cherrypick=true" >> "$GITHUB_OUTPUT"
-            echo "Cherry-pick checkbox checked for PR #${pr_number}."
-            exit 0
-          fi
-
-          echo "should_cherrypick=false" >> "$GITHUB_OUTPUT"
-          echo "Cherry-pick checkbox not checked for PR #${pr_number}. Skipping."
-
-      - name: Checkout repository
-        if: steps.gate.outputs.should_cherrypick == 'true'
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
-        with:
-          fetch-depth: 0
-          persist-credentials: true
-          ref: main
-
-      - name: Install the latest version of uv
-        if: steps.gate.outputs.should_cherrypick == 'true'
-        uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
-        with:
-          enable-cache: false
-          version: "0.9.9"
-
-      - name: Configure git identity
-        if: steps.gate.outputs.should_cherrypick == 'true'
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-
-      - name: Create cherry-pick PR to latest release
-        if: steps.gate.outputs.should_cherrypick == 'true'
-        env:
-          GH_TOKEN: ${{ github.token }}
-          GITHUB_TOKEN: ${{ github.token }}
-        run: |
-          uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify
--- a/.github/workflows/pr-beta-cherrypick-check.yml
+++ b/.github/workflows/pr-beta-cherrypick-check.yml
@@ -0,0 +1,28 @@
+name: Require beta cherry-pick consideration
+concurrency:
+  group: Require-Beta-Cherrypick-Consideration-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+
+permissions:
+  contents: read
+
+jobs:
+  beta-cherrypick-check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - name: Check PR body for beta cherry-pick consideration
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
+        run: |
+          if echo "$PR_BODY" | grep -qiE "\\[x\\][[:space:]]*\\[Required\\][[:space:]]*I have considered whether this PR needs to be cherry[- ]picked to the latest beta branch"; then
+            echo "Cherry-pick consideration box is checked. Check passed."
+            exit 0
+          fi
+
+          echo "::error::Please check the 'I have considered whether this PR needs to be cherry-picked to the latest beta branch' box in the PR description."
+          exit 1
--- a/.github/workflows/pr-database-tests.yml
+++ b/.github/workflows/pr-database-tests.yml
@@ -40,16 +40,13 @@ jobs:

      - name: Generate OpenAPI schema and Python client
        shell: bash
-        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
-        env:
-          LICENSE_ENFORCEMENT_ENABLED: "false"
        run: |
          ods openapi all

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -45,12 +45,12 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
        with:
          node-version: 24
          cache: "npm" # zizmor: ignore[cache-poisoning]
@@ -63,7 +63,7 @@ jobs:
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
-        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
@@ -105,7 +105,7 @@ jobs:

      - name: Upload build artifacts
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
        with:
          name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
          path: |
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -110,7 +110,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -118,7 +118,6 @@ jobs:
      - name: Create .env file for Docker Compose
        run: |
          cat <<EOF > deployment/docker_compose/.env
-          COMPOSE_PROFILES=s3-filestore
          CODE_INTERPRETER_BETA_ENABLED=true
          DISABLE_TELEMETRY=true
          EOF
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -41,7 +41,8 @@ jobs:
          version: v3.19.0

      - name: Set up chart-testing
-        uses: helm/chart-testing-action@b5eebdd9998021f29756c53432f48dab66394810
+        # NOTE: This is Jamison's patch from https://github.com/helm/chart-testing-action/pull/194
+        uses: helm/chart-testing-action@8958a6ac472cbd8ee9a8fbb6f1acbc1b0e966e44 # zizmor: ignore[impostor-commit]
        with:
          uv_version: "0.9.9"

--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -109,7 +109,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -169,7 +169,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -214,7 +214,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -287,7 +287,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -300,10 +300,7 @@ jobs:
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
-          COMPOSE_PROFILES=s3-filestore
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
-          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
          POSTGRES_POOL_PRE_PING=true
          POSTGRES_USE_NULL_POOL=true
@@ -468,7 +465,7 @@ jobs:
          persist-credentials: false

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -480,7 +477,6 @@ jobs:
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          LICENSE_ENFORCEMENT_ENABLED=false \
          MULTI_TENANT=true \
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -28,7 +28,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -101,7 +101,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -161,7 +161,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -220,7 +220,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -279,7 +279,7 @@ jobs:
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -292,7 +292,6 @@ jobs:
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
-          COMPOSE_PROFILES=s3-filestore
          AUTH_TYPE=basic
          POSTGRES_POOL_PRE_PING=true
          POSTGRES_USE_NULL_POOL=true
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -22,9 +22,6 @@ env:
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
-  FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }}
-  GOOGLE_PSE_API_KEY: ${{ secrets.GOOGLE_PSE_API_KEY }}
-  GOOGLE_PSE_SEARCH_ENGINE_ID: ${{ secrets.GOOGLE_PSE_SEARCH_ENGINE_ID }}

  # for federated slack tests
  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
@@ -93,7 +90,7 @@ jobs:
      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -154,7 +151,7 @@ jobs:
      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -215,7 +212,7 @@ jobs:
      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -252,7 +249,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        project: [admin, exclusive]
+        project: [admin, no-auth, exclusive]
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

@@ -262,7 +259,7 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
          cache: "npm"
@@ -292,10 +289,7 @@ jobs:
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
-          COMPOSE_PROFILES=s3-filestore
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
-          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
          EXA_API_KEY=${EXA_API_KEY_VALUE}
@@ -305,12 +299,15 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
          EOF
+          if [ "${{ matrix.project }}" = "no-auth" ]; then
+            echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
+          fi

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
@@ -433,6 +430,9 @@ jobs:
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
+          if [ "${PROJECT}" = "no-auth" ]; then
+            export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
+          fi
          npx playwright test --project ${PROJECT}

      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
@@ -493,7 +493,7 @@ jobs:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
+#       uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -42,9 +42,6 @@ jobs:

      - name: Generate OpenAPI schema and Python client
        shell: bash
-        # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
-        env:
-          LICENSE_ENFORCEMENT_ENABLED: "false"
        run: |
          ods openapi all

--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -64,7 +64,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -27,8 +27,6 @@ jobs:
      PYTHONPATH: ./backend
      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
      DISABLE_TELEMETRY: "true"
-      # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
-      LICENSE_ENFORCEMENT_ENABLED: "false"

    steps:
    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -24,13 +24,13 @@ jobs:
        with:
          fetch-depth: 0
          persist-credentials: false
-      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # ratchet:actions/setup-python@v6
+      - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
      - name: Setup node
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v6
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
        with: # zizmor: ignore[cache-poisoning]
          node-version: 22
          cache: "npm"
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,10 @@ settings.json
 /backend/tests/regression/answer_quality/search_test_config.yaml
 *.egg-info

+# Claude
+AGENTS.md
+CLAUDE.md
+
 # Local .terraform directories
 **/.terraform/*

--- a/AGENTS.md.template
+++ b/AGENTS.md.template
@@ -0,0 +1,599 @@
+# AGENTS.md
+
+This file provides guidance to AI agents when working with code in this repository.
+
+## KEY NOTES
+
+- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
+to assume the python venv.
+- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
+- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
+`a`. The app can be accessed at `http://localhost:3000`.
+- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
+make sure we see logs coming out from the relevant service.
+- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
+- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
+- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
+outside of those directories.
+
+## Project Overview
+
+**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
+
+
+### Background Workers (Celery)
+
+Onyx uses Celery for asynchronous task processing with multiple specialized workers:
+
+#### Worker Types
+
+1. **Primary Worker** (`celery_app.py`)
+   - Coordinates core background tasks and system-wide operations
+   - Handles connector management, document sync, pruning, and periodic checks
+   - Runs with 4 threads concurrency
+   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
+
+2. **Docfetching Worker** (`docfetching`)
+   - Fetches documents from external data sources (connectors)
+   - Spawns docprocessing tasks for each document batch
+   - Implements watchdog monitoring for stuck connectors
+   - Configurable concurrency (default from env)
+
+3. **Docprocessing Worker** (`docprocessing`)
+   - Processes fetched documents through the indexing pipeline:
+     - Upserts documents to PostgreSQL
+     - Chunks documents and adds contextual information
+     - Embeds chunks via model server
+     - Writes chunks to Vespa vector database
+     - Updates document metadata
+   - Configurable concurrency (default from env)
+
+4. **Light Worker** (`light`)
+   - Handles lightweight, fast operations
+   - Tasks: vespa operations, document permissions sync, external group sync
+   - Higher concurrency for quick tasks
+
+5. **Heavy Worker** (`heavy`)
+   - Handles resource-intensive operations
+   - Primary task: document pruning operations
+   - Runs with 4 threads concurrency
+
+6. **KG Processing Worker** (`kg_processing`)
+   - Handles Knowledge Graph processing and clustering
+   - Builds relationships between documents
+   - Runs clustering algorithms
+   - Configurable concurrency
+
+7. **Monitoring Worker** (`monitoring`)
+   - System health monitoring and metrics collection
+   - Monitors Celery queues, process memory, and system status
+   - Single thread (monitoring doesn't need parallelism)
+   - Cloud-specific monitoring tasks
+
+8. **User File Processing Worker** (`user_file_processing`)
+   - Processes user-uploaded files
+   - Handles user file indexing and project synchronization
+   - Configurable concurrency
+
+9. **Beat Worker** (`beat`)
+   - Celery's scheduler for periodic tasks
+   - Uses DynamicTenantScheduler for multi-tenant support
+   - Schedules tasks like:
+     - Indexing checks (every 15 seconds)
+     - Connector deletion checks (every 20 seconds)
+     - Vespa sync checks (every 20 seconds)
+     - Pruning checks (every 20 seconds)
+     - KG processing (every 60 seconds)
+     - Monitoring tasks (every 5 minutes)
+     - Cleanup tasks (hourly)
+
+#### Worker Deployment Modes
+
+Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
+
+**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
+- Runs a single consolidated `background` worker that handles all background tasks:
+  - Pruning operations (from `heavy` worker)
+  - Knowledge graph processing (from `kg_processing` worker)
+  - Monitoring tasks (from `monitoring` worker)
+  - User file processing (from `user_file_processing` worker)
+- Lower resource footprint (single worker process)
+- Suitable for smaller deployments or development environments
+- Default concurrency: 6 threads
+
+**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
+- Runs separate specialized workers as documented above (heavy, kg_processing, monitoring, user_file_processing)
+- Better isolation and scalability
+- Can scale individual workers independently based on workload
+- Suitable for production deployments with higher load
+
+The deployment mode affects:
+- **Backend**: Worker processes spawned by supervisord or dev scripts
+- **Helm**: Which Kubernetes deployments are created
+- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
+
+#### Key Features
+
+- **Thread-based Workers**: All workers use thread pools (not processes) for stability
+- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a 
+middleware layer that automatically finds the appropriate tenant ID when sending tasks 
+via Celery Beat.
+- **Task Prioritization**: High, Medium, Low priority queues
+- **Monitoring**: Built-in heartbeat and liveness checking
+- **Failure Handling**: Automatic retry and failure recovery mechanisms
+- **Redis Coordination**: Inter-process communication via Redis
+- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
+
+
+#### Important Notes
+
+**Defining Tasks**: 
+- Always use `@shared_task` rather than `@celery_app`
+- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
+
+**Defining APIs**:
+When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
+function.
+
+**Testing Updates**:
+If you make any updates to a celery worker and you want to test these changes, you will need
+to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
+
+### Code Quality
+```bash
+# Install and run pre-commit hooks
+pre-commit install
+pre-commit run --all-files
+```
+
+NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
+
+## Architecture Overview
+
+### Technology Stack
+- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
+- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
+- **Database**: PostgreSQL with Redis caching
+- **Search**: Vespa vector database
+- **Auth**: OAuth2, SAML, multi-provider support
+- **AI/ML**: LangChain, LiteLLM, multiple embedding models
+
+### Directory Structure
+
+```
+backend/
+├── onyx/
+│   ├── auth/                    # Authentication & authorization
+│   ├── chat/                    # Chat functionality & LLM interactions
+│   ├── connectors/              # Data source connectors
+│   ├── db/                      # Database models & operations
+│   ├── document_index/          # Vespa integration
+│   ├── federated_connectors/    # External search connectors
+│   ├── llm/                     # LLM provider integrations
+│   └── server/                  # API endpoints & routers
+├── ee/                          # Enterprise Edition features
+├── alembic/                     # Database migrations
+└── tests/                       # Test suites
+
+web/
+├── src/app/                     # Next.js app router pages
+├── src/components/              # Reusable React components
+└── src/lib/                     # Utilities & business logic
+```
+
+## Frontend Standards
+
+### 1. Import Standards
+
+**Always use absolute imports with the `@` prefix.**
+
+**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
+
+```typescript
+// ✅ Good
+import { Button } from "@/components/ui/button";
+import { useAuth } from "@/hooks/useAuth";
+import { Text } from "@/refresh-components/texts/Text";
+
+// ❌ Bad
+import { Button } from "../../../components/ui/button";
+import { useAuth } from "./hooks/useAuth";
+```
+
+### 2. React Component Functions
+
+**Prefer regular functions over arrow functions for React components.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+function UserProfile({ userId }: UserProfileProps) {
+  return <div>User Profile</div>
+}
+
+// ❌ Bad
+const UserProfile = ({ userId }: UserProfileProps) => {
+  return <div>User Profile</div>
+}
+```
+
+### 3. Props Interface Extraction
+
+**Extract prop types into their own interface definitions.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+interface UserCardProps {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}
+
+function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
+  return <div>User Card</div>
+}
+
+// ❌ Bad
+function UserCard({
+  user,
+  showActions = false,
+  onEdit
+}: {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}) {
+  return <div>User Card</div>
+}
+```
+
+### 4. Spacing Guidelines
+
+**Prefer padding over margins for spacing.**
+
+**Reason:** We want to consolidate usage to paddings instead of margins.
+
+```typescript
+// ✅ Good
+<div className="p-4 space-y-2">
+  <div className="p-2">Content</div>
+</div>
+
+// ❌ Bad
+<div className="m-4 space-y-2">
+  <div className="m-2">Content</div>
+</div>
+```
+
+### 5. Tailwind Dark Mode
+
+**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
+
+**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
+
+**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
+
+```typescript
+// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
+<div className="bg-background-neutral-03 text-text-02">
+  Content
+</div>
+
+// ✅ Good - Logo icons with dark mode handling via createLogoIcon
+export const GithubIcon = createLogoIcon(githubLightIcon, {
+  monochromatic: true,  // Will apply dark:invert internally
+});
+
+export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
+  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
+});
+
+// ❌ Bad - Manual dark mode overrides
+<div className="bg-white dark:bg-black text-black dark:text-white">
+  Content
+</div>
+```
+
+### 6. Class Name Utilities
+
+**Use the `cn` utility instead of raw string formatting for classNames.**
+
+**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
+
+```typescript
+import { cn } from '@/lib/utils'
+
+// ✅ Good
+<div className={cn(
+  'base-class',
+  isActive && 'active-class',
+  className
+)}>
+  Content
+</div>
+
+// ❌ Bad
+<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
+  Content
+</div>
+```
+
+### 7. Custom Hooks Organization
+
+**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
+
+**Reason:** This is just a layout preference. Keeps code clean.
+
+```typescript
+// web/src/hooks/useUserData.ts
+export function useUserData(userId: string) {
+  // hook implementation
+}
+
+// web/src/hooks/useLocalStorage.ts
+export function useLocalStorage<T>(key: string, initialValue: T) {
+  // hook implementation
+}
+```
+
+### 8. Icon Usage
+
+**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
+
+**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
+
+```typescript
+// ✅ Good
+import SvgX from "@/icons/x";
+import SvgMoreHorizontal from "@/icons/more-horizontal";
+
+// ❌ Bad
+import { User } from "lucide-react";
+import { FiSearch } from "react-icons/fi";
+```
+
+**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
+If you need help with this step, reach out to `raunak@onyx.app`.
+
+### 9. Text Rendering
+
+**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
+
+**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
+
+```typescript
+// ✅ Good
+import { Text } from '@/refresh-components/texts/Text'
+
+function UserCard({ name }: { name: string }) {
+  return (
+    <Text
+      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
+      text03
+      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
+      mainAction
+    >
+      {name}
+    </Text>
+  )
+}
+
+// ❌ Bad
+function UserCard({ name }: { name: string }) {
+  return (
+    <div>
+      <h2>{name}</h2>
+      <p>User details</p>
+    </div>
+  )
+}
+```
+
+### 10. Component Usage
+
+**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
+
+**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
+
+```typescript
+// ✅ Good
+import Button from '@/refresh-components/buttons/Button'
+import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
+import SvgPlusCircle from '@/icons/plus-circle'
+
+function ContactForm() {
+  return (
+    <form>
+      <InputTypeIn placeholder="Search..." />
+      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
+    </form>
+  )
+}
+
+// ❌ Bad
+function ContactForm() {
+  return (
+    <form>
+      <input placeholder="Name" />
+      <textarea placeholder="Message" />
+      <button type="submit">Submit</button>
+    </form>
+  )
+}
+```
+
+### 11. Colors
+
+**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
+
+**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
+
+**Available color categories:**
+- **Text:** `text-01` through `text-05`, `text-inverted-XX`
+- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
+- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
+- **Actions:** `action-link-XX`, `action-danger-XX`
+- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
+- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
+
+```typescript
+// ✅ Good - Use custom Onyx color classes
+<div className="bg-background-neutral-01 border border-border-02" />
+<div className="bg-background-tint-02 border border-border-01" />
+<div className="bg-status-success-01" />
+<div className="bg-action-link-01" />
+<div className="bg-theme-primary-05" />
+
+// ❌ Bad - Do NOT use standard Tailwind colors
+<div className="bg-gray-100 border border-gray-300 text-gray-600" />
+<div className="bg-white border border-slate-200" />
+<div className="bg-green-100 text-green-700" />
+<div className="bg-blue-100 text-blue-600" />
+<div className="bg-indigo-500" />
+```
+
+### 12. Data Fetching
+
+**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
+
+**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+
+## Database & Migrations
+
+### Running Migrations
+```bash
+# Standard migrations
+alembic upgrade head
+
+# Multi-tenant (Enterprise)
+alembic -n schema_private upgrade head
+```
+
+### Creating Migrations
+```bash
+# Create migration
+alembic revision -m "description"
+
+# Multi-tenant migration
+alembic -n schema_private revision -m "description"
+```
+
+Write the migration manually and place it in the file that alembic creates when running the above command.
+
+## Testing Strategy
+
+There are 4 main types of tests within Onyx:
+
+### Unit Tests
+These should not assume any Onyx/external services are available to be called.
+Interactions with the outside world should be mocked using `unittest.mock`. Generally, only 
+write these for complex, isolated modules e.g. `citation_processing.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
+```
+
+### External Dependency Unit Tests
+These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis, 
+MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
+
+However, the actual Onyx containers are not running and with these tests we call the function to test directly.
+We can also mock components/calls at will. 
+
+The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey, 
+need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
+with certain args, something that would be impossible with proper integration tests).
+
+A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
+```
+
+### Integration Tests
+Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot 
+mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal 
+verification is necessary) over any other type of test.
+
+Tests are parallelized at a directory level.
+
+When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager 
+class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
+calling the utilities directly (e.g. do NOT create admin users with 
+`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
+
+A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
+```
+
+### Playwright (E2E) Tests
+These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx 
+running, *including* the Web Server.
+
+Use these tests for anything that requires significant frontend <-> backend coordination.
+
+Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
+
+To run them:
+
+```bash
+npx playwright test <TEST_NAME>
+```
+
+
+## Logs
+
+When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
+to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
+will be tailing their logs to this file. 
+
+
+## Security Considerations
+
+- Never commit API keys or secrets to repository
+- Use encrypted credential storage for connector credentials
+- Follow RBAC patterns for new features
+- Implement proper input validation with Pydantic models
+- Use parameterized queries to prevent SQL injection
+
+## AI/LLM Integration
+
+- Multiple LLM providers supported via LiteLLM
+- Configurable models per feature (chat, search, embeddings)
+- Streaming support for real-time responses
+- Token management and rate limiting
+- Custom prompts and agent actions
+
+## Creating a Plan
+When creating a plan in the `plans` directory, make sure to include at least these elements:
+
+**Issues to Address**
+What the change is meant to do.
+
+**Important Notes**
+Things you come across in your research that are important to the implementation.
+
+**Implementation strategy**
+How you are going to make the changes happen. High level approach.
+
+**Tests**
+What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to 
+verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
+
+Do NOT include these: *Timeline*, *Rollback plan*
+
+This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
+Keep it high level. You can reference certain files or functions though.
+
+Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1 +0,0 @@
-AGENTS.md
--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -1,25 +1,26 @@
-# PROJECT KNOWLEDGE BASE
+# CLAUDE.md

-This file provides guidance to AI agents when working with code in this repository.
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

 ## KEY NOTES

 - If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
-  to assume the python venv.
+to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
 - If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
-  `a`. The app can be accessed at `http://localhost:3000`.
+`a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
-  make sure we see logs coming out from the relevant service.
+make sure we see logs coming out from the relevant service.
 - To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
 - When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
 - Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
-  outside of those directories.
+outside of those directories.

 ## Project Overview

 **Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.

+
 ### Background Workers (Celery)

 Onyx uses Celery for asynchronous task processing with multiple specialized workers:
@@ -91,7 +92,6 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
 Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:

 **Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
-
 - Runs a single consolidated `background` worker that handles all background tasks:
  - Light worker tasks (Vespa operations, permissions sync, deletion)
  - Document processing (indexing pipeline)
@@ -105,14 +105,12 @@ Onyx supports two deployment modes for background workers, controlled by the `US
 - Default concurrency: 20 threads (increased to handle combined workload)

 **Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
-
 - Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
 - Better isolation and scalability
 - Can scale individual workers independently based on workload
 - Suitable for production deployments with higher load

 The deployment mode affects:
-
 - **Backend**: Worker processes spawned by supervisord or dev scripts
 - **Helm**: Which Kubernetes deployments are created
 - **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
@@ -121,18 +119,18 @@ The deployment mode affects:

 - **Thread-based Workers**: All workers use thread pools (not processes) for stability
 - **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a
-  middleware layer that automatically finds the appropriate tenant ID when sending tasks
-  via Celery Beat.
+middleware layer that automatically finds the appropriate tenant ID when sending tasks
+via Celery Beat.
 - **Task Prioritization**: High, Medium, Low priority queues
 - **Monitoring**: Built-in heartbeat and liveness checking
 - **Failure Handling**: Automatic retry and failure recovery mechanisms
 - **Redis Coordination**: Inter-process communication via Redis
 - **PostgreSQL State**: Task state and metadata stored in PostgreSQL

+
 #### Important Notes

-**Defining Tasks**:
-
+**Defining Tasks**: 
 - Always use `@shared_task` rather than `@celery_app`
 - Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`

@@ -145,7 +143,6 @@ If you make any updates to a celery worker and you want to test these changes, y
 to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.

 ### Code Quality
-
 ```bash
 # Install and run pre-commit hooks
 pre-commit install
@@ -157,7 +154,6 @@ NOTE: Always make sure everything is strictly typed (both in Python and Typescri
 ## Architecture Overview

 ### Technology Stack
-
 - **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
 - **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
 - **Database**: PostgreSQL with Redis caching
@@ -439,7 +435,6 @@ function ContactForm() {
 **Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.

 **Available color categories:**
-
 - **Text:** `text-01` through `text-05`, `text-inverted-XX`
 - **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
 - **Borders:** `border-01` through `border-05`, `border-inverted-XX`
@@ -472,7 +467,6 @@ function ContactForm() {
 ## Database & Migrations

 ### Running Migrations
-
 ```bash
 # Standard migrations
 alembic upgrade head
@@ -482,7 +476,6 @@ alembic -n schema_private upgrade head
 ```

 ### Creating Migrations
-
 ```bash
 # Create migration
 alembic revision -m "description"
@@ -495,14 +488,13 @@ Write the migration manually and place it in the file that alembic creates when

 ## Testing Strategy

-First, you must activate the virtual environment with `source .venv/bin/activate`.
+First, you must activate the virtual environment with `source .venv/bin/activate`. 

 There are 4 main types of tests within Onyx:

 ### Unit Tests
-
 These should not assume any Onyx/external services are available to be called.
-Interactions with the outside world should be mocked using `unittest.mock`. Generally, only
+Interactions with the outside world should be mocked using `unittest.mock`. Generally, only 
 write these for complex, isolated modules e.g. `citation_processing.py`.

 To run them:
@@ -512,14 +504,13 @@ pytest -xv backend/tests/unit
 ```

 ### External Dependency Unit Tests
-
-These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis,
+These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis, 
 MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).

 However, the actual Onyx containers are not running and with these tests we call the function to test directly.
-We can also mock components/calls at will.
+We can also mock components/calls at will. 

-The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey,
+The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey, 
 need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
 with certain args, something that would be impossible with proper integration tests).

@@ -532,16 +523,15 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency
 ```

 ### Integration Tests
-
-Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot
-mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal
+Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot 
+mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal 
 verification is necessary) over any other type of test.

 Tests are parallelized at a directory level.

-When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager
+When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager 
 class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
-calling the utilities directly (e.g. do NOT create admin users with
+calling the utilities directly (e.g. do NOT create admin users with 
 `admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).

 A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
@@ -553,9 +543,8 @@ python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
 ```

 ### Playwright (E2E) Tests
-
-These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx
-running, _including_ the Web Server.
+These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx 
+running, *including* the Web Server.

 Use these tests for anything that requires significant frontend <-> backend coordination.

@@ -567,11 +556,13 @@ To run them:
 npx playwright test <TEST_NAME>
 ```

+
 ## Logs

 When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
 to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
-will be tailing their logs to this file.
+will be tailing their logs to this file. 
+

 ## Security Considerations

@@ -590,7 +581,6 @@ will be tailing their logs to this file.
 - Custom prompts and agent actions

 ## Creating a Plan
-
 When creating a plan in the `plans` directory, make sure to include at least these elements:

 **Issues to Address**
@@ -603,10 +593,10 @@ Things you come across in your research that are important to the implementation
 How you are going to make the changes happen. High level approach.

 **Tests**
-What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to
+What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to 
 verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.

-Do NOT include these: _Timeline_, _Rollback plan_
+Do NOT include these: *Timeline*, *Rollback plan*

 This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
 Keep it high level. You can reference certain files or functions though.
--- a/5
+++ b/5
@@ -2,10 +2,7 @@ Copyright (c) 2023-present DanswerAI, Inc.

 Portions of this software are licensed as follows:

- All content that resides under "ee" directories of this repository is licensed under the Onyx Enterprise License. Each ee directory contains an identical copy of this license at its root:
-  - backend/ee/LICENSE
-  - web/src/app/ee/LICENSE
-  - web/src/ee/LICENSE
+- All content that resides under "ee" directories of this repository, if that directory exists, is licensed under the license defined in "backend/ee/LICENSE". Specifically all content under "backend/ee" and "web/src/app/ee" is licensed under the license defined in "backend/ee/LICENSE".
 - All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.
 - Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.

--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -134,7 +134,6 @@ COPY --chown=onyx:onyx ./alembic_tenants /app/alembic_tenants
 COPY --chown=onyx:onyx ./alembic.ini /app/alembic.ini
 COPY supervisord.conf /usr/etc/supervisord.conf
 COPY --chown=onyx:onyx ./static /app/static
-COPY --chown=onyx:onyx ./keys /app/keys

 # Escape hatch scripts
 COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
@@ -150,11 +149,6 @@ RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
        ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \
    fi

-# Set Craft template paths to the in-image locations
-# These match the paths where setup_craft_templates.sh creates the templates
-ENV OUTPUTS_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs
-ENV VENV_TEMPLATE_PATH=/app/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv
-
 # Put logo in assets
 COPY --chown=onyx:onyx ./assets /app/assets

--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -48,7 +48,6 @@ WORKDIR /app
 # Utils used by model server
 COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
 COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
-COPY ./onyx/utils/tenant.py /app/onyx/utils/tenant.py

 # Place to fetch version information
 COPY ./onyx/__init__.py /app/onyx/__init__.py
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -57,7 +57,7 @@ if USE_IAM_AUTH:


 def include_object(
-    object: SchemaItem,  # noqa: ARG001
+    object: SchemaItem,
    name: str | None,
    type_: Literal[
        "schema",
@@ -67,8 +67,8 @@ def include_object(
        "unique_constraint",
        "foreign_key_constraint",
    ],
-    reflected: bool,  # noqa: ARG001
-    compare_to: SchemaItem | None,  # noqa: ARG001
+    reflected: bool,
+    compare_to: SchemaItem | None,
 ) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
@@ -244,7 +244,7 @@ def do_run_migrations(


 def provide_iam_token_for_alembic(
-    dialect: Any, conn_rec: Any, cargs: Any, cparams: Any  # noqa: ARG001
+    dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
 ) -> None:
    if USE_IAM_AUTH:
        # Database connection settings
--- a/backend/alembic/run_multitenant_migrations.py
+++ b/backend/alembic/run_multitenant_migrations.py
@@ -1,343 +0,0 @@
-#!/usr/bin/env python3
-"""Parallel Alembic Migration Runner
-
-Upgrades tenant schemas to head in batched, parallel alembic subprocesses.
-Each subprocess handles a batch of schemas (via ``-x schemas=a,b,c``),
-reducing per-process overhead compared to one-schema-per-process.
-
-Usage examples::
-
-    # defaults: 6 workers, 50 schemas/batch
-    python alembic/run_multitenant_migrations.py
-
-    # custom settings
-    python alembic/run_multitenant_migrations.py -j 8 -b 100
-"""
-from __future__ import annotations
-
-import argparse
-import subprocess
-import sys
-import threading
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import List, NamedTuple
-
-from alembic.config import Config
-from alembic.script import ScriptDirectory
-from sqlalchemy import text
-
-from onyx.db.engine.sql_engine import is_valid_schema_name
-from onyx.db.engine.sql_engine import SqlEngine
-from onyx.db.engine.tenant_utils import get_all_tenant_ids
-from shared_configs.configs import TENANT_ID_PREFIX
-
-
-# ---------------------------------------------------------------------------
-# Data types
-# ---------------------------------------------------------------------------
-
-
-class Args(NamedTuple):
-    jobs: int
-    batch_size: int
-
-
-class BatchResult(NamedTuple):
-    schemas: list[str]
-    success: bool
-    output: str
-    elapsed_sec: float
-
-
-# ---------------------------------------------------------------------------
-# Core functions
-# ---------------------------------------------------------------------------
-
-
-def run_alembic_for_batch(schemas: list[str]) -> BatchResult:
-    """Run ``alembic upgrade head`` for a batch of schemas in one subprocess.
-
-    If the batch fails, it is automatically retried with ``-x continue=true``
-    so that the remaining schemas in the batch still get migrated.  The retry
-    output (which contains alembic's per-schema error messages) is returned
-    for diagnosis.
-    """
-    csv = ",".join(schemas)
-    base_cmd = ["alembic", "-x", f"schemas={csv}"]
-
-    start = time.monotonic()
-    result = subprocess.run(
-        [*base_cmd, "upgrade", "head"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-    )
-
-    if result.returncode == 0:
-        elapsed = time.monotonic() - start
-        return BatchResult(schemas, True, result.stdout or "", elapsed)
-
-    # At least one schema failed.  Print the initial error output, then
-    # re-run with continue=true so the remaining schemas still get migrated.
-    if result.stdout:
-        print(f"Initial error output:\n{result.stdout}", file=sys.stderr, flush=True)
-    print(
-        f"Batch failed (exit {result.returncode}), retrying with 'continue=true'...",
-        file=sys.stderr,
-        flush=True,
-    )
-
-    retry = subprocess.run(
-        [*base_cmd, "-x", "continue=true", "upgrade", "head"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-    )
-    elapsed = time.monotonic() - start
-    return BatchResult(schemas, False, retry.stdout or "", elapsed)
-
-
-def get_head_revision() -> str | None:
-    """Get the head revision from the alembic script directory."""
-    alembic_cfg = Config("alembic.ini")
-    script = ScriptDirectory.from_config(alembic_cfg)
-    return script.get_current_head()
-
-
-def get_schemas_needing_migration(
-    tenant_schemas: List[str], head_rev: str
-) -> List[str]:
-    """Return only schemas whose current alembic version is not at head."""
-    if not tenant_schemas:
-        return []
-
-    engine = SqlEngine.get_engine()
-
-    with engine.connect() as conn:
-        # Find which schemas actually have an alembic_version table
-        rows = conn.execute(
-            text(
-                "SELECT table_schema FROM information_schema.tables "
-                "WHERE table_name = 'alembic_version' "
-                "AND table_schema = ANY(:schemas)"
-            ),
-            {"schemas": tenant_schemas},
-        )
-        schemas_with_table = set(row[0] for row in rows)
-
-        # Schemas without the table definitely need migration
-        needs_migration = [s for s in tenant_schemas if s not in schemas_with_table]
-
-        if not schemas_with_table:
-            return needs_migration
-
-        # Validate schema names before interpolating into SQL
-        for schema in schemas_with_table:
-            if not is_valid_schema_name(schema):
-                raise ValueError(f"Invalid schema name: {schema}")
-
-        # Single query to get every schema's current revision at once.
-        # Use integer tags instead of interpolating schema names into
-        # string literals to avoid quoting issues.
-        schema_list = list(schemas_with_table)
-        union_parts = [
-            f'SELECT {i} AS idx, version_num FROM "{schema}".alembic_version'
-            for i, schema in enumerate(schema_list)
-        ]
-        rows = conn.execute(text(" UNION ALL ".join(union_parts)))
-        version_by_schema = {schema_list[row[0]]: row[1] for row in rows}
-
-        needs_migration.extend(
-            s for s in schemas_with_table if version_by_schema.get(s) != head_rev
-        )
-
-    return needs_migration
-
-
-def run_migrations_parallel(
-    schemas: list[str],
-    max_workers: int,
-    batch_size: int,
-) -> bool:
-    """Chunk *schemas* into batches and run them in parallel.
-
-    A background monitor thread prints a status line every 60 s listing
-    which batches are still in-flight, making it easy to spot hung tenants.
-    """
-    batches = [schemas[i : i + batch_size] for i in range(0, len(schemas), batch_size)]
-    total_batches = len(batches)
-    print(
-        f"{len(schemas)} schemas in {total_batches} batch(es) "
-        f"with {max_workers} workers (batch size: {batch_size})...",
-        flush=True,
-    )
-    all_success = True
-
-    # Thread-safe tracking of in-flight batches for the monitor thread.
-    in_flight: dict[int, list[str]] = {}
-    prev_in_flight: set[int] = set()
-    lock = threading.Lock()
-    stop_event = threading.Event()
-
-    def _monitor() -> None:
-        """Print a status line every 60 s listing batches still in-flight.
-
-        Only prints batches that were also present in the previous tick,
-        making it easy to spot batches that are stuck.
-        """
-        nonlocal prev_in_flight
-        while not stop_event.wait(60):
-            with lock:
-                if not in_flight:
-                    prev_in_flight = set()
-                    continue
-                current = set(in_flight)
-                stuck = current & prev_in_flight
-                prev_in_flight = current
-
-                if not stuck:
-                    continue
-
-                schemas = [s for idx in sorted(stuck) for s in in_flight[idx]]
-                print(
-                    f"⏳ batch(es) still running since last check "
-                    f"({', '.join(str(i + 1) for i in sorted(stuck))}): "
-                    + ", ".join(schemas),
-                    flush=True,
-                )
-
-    monitor_thread = threading.Thread(target=_monitor, daemon=True)
-    monitor_thread.start()
-
-    try:
-        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-
-            def _run(batch_idx: int, batch: list[str]) -> BatchResult:
-                with lock:
-                    in_flight[batch_idx] = batch
-                print(
-                    f"Batch {batch_idx + 1}/{total_batches} started "
-                    f"({len(batch)} schemas): {', '.join(batch)}",
-                    flush=True,
-                )
-                result = run_alembic_for_batch(batch)
-                with lock:
-                    in_flight.pop(batch_idx, None)
-                return result
-
-            future_to_idx = {
-                executor.submit(_run, i, b): i for i, b in enumerate(batches)
-            }
-
-            for future in as_completed(future_to_idx):
-                batch_idx = future_to_idx[future]
-                try:
-                    result = future.result()
-                    status = "✓" if result.success else "✗"
-
-                    print(
-                        f"Batch {batch_idx + 1}/{total_batches} "
-                        f"{status} {len(result.schemas)} schemas "
-                        f"in {result.elapsed_sec:.1f}s",
-                        flush=True,
-                    )
-
-                    if not result.success:
-                        # Print last 20 lines of retry output for diagnosis
-                        tail = result.output.strip().splitlines()[-20:]
-                        for line in tail:
-                            print(f"    {line}", flush=True)
-                        all_success = False
-
-                except Exception as e:
-                    print(
-                        f"Batch {batch_idx + 1}/{total_batches} " f"✗ exception: {e}",
-                        flush=True,
-                    )
-                    all_success = False
-    finally:
-        stop_event.set()
-        monitor_thread.join(timeout=2)
-
-    return all_success
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-
-def parse_args() -> Args:
-    parser = argparse.ArgumentParser(
-        description="Run alembic migrations for all tenant schemas in parallel"
-    )
-    parser.add_argument(
-        "-j",
-        "--jobs",
-        type=int,
-        default=6,
-        metavar="N",
-        help="Number of parallel alembic processes (default: 6)",
-    )
-    parser.add_argument(
-        "-b",
-        "--batch-size",
-        type=int,
-        default=50,
-        metavar="N",
-        help="Schemas per alembic process (default: 50)",
-    )
-    args = parser.parse_args()
-    if args.jobs < 1:
-        parser.error("--jobs must be >= 1")
-    if args.batch_size < 1:
-        parser.error("--batch-size must be >= 1")
-    return Args(jobs=args.jobs, batch_size=args.batch_size)
-
-
-def main() -> int:
-    args = parse_args()
-
-    head_rev = get_head_revision()
-    if head_rev is None:
-        print("Could not determine head revision.", file=sys.stderr)
-        return 1
-
-    with SqlEngine.scoped_engine(pool_size=5, max_overflow=2):
-        tenant_ids = get_all_tenant_ids()
-        tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
-
-        if not tenant_schemas:
-            print(
-                "No tenant schemas found. Is MULTI_TENANT=true set?",
-                file=sys.stderr,
-            )
-            return 1
-
-        schemas_to_migrate = get_schemas_needing_migration(tenant_schemas, head_rev)
-
-    if not schemas_to_migrate:
-        print(
-            f"All {len(tenant_schemas)} tenants are already at head "
-            f"revision ({head_rev})."
-        )
-        return 0
-
-    print(
-        f"{len(schemas_to_migrate)}/{len(tenant_schemas)} tenants need "
-        f"migration (head: {head_rev})."
-    )
-
-    success = run_migrations_parallel(
-        schemas_to_migrate,
-        max_workers=args.jobs,
-        batch_size=args.batch_size,
-    )
-
-    print(f"\n{'All migrations successful' if success else 'Some migrations failed'}")
-    return 0 if success else 1
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/backend/alembic/versions/001984c88745_llmprovider_deprecated_fields_are_.py
+++ b/backend/alembic/versions/001984c88745_llmprovider_deprecated_fields_are_.py
@@ -0,0 +1,58 @@
+"""LLMProvider deprecated fields are nullable
+
+Revision ID: 001984c88745
+Revises: 01f8e6d95a33
+Create Date: 2026-02-01 22:24:34.171100
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "001984c88745"
+down_revision = "01f8e6d95a33"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Make default_model_name nullable (was NOT NULL)
+    op.alter_column(
+        "llm_provider",
+        "default_model_name",
+        existing_type=sa.String(),
+        nullable=True,
+    )
+
+    # Remove server_default from is_default_vision_provider (was server_default=false())
+    op.alter_column(
+        "llm_provider",
+        "is_default_vision_provider",
+        existing_type=sa.Boolean(),
+        server_default=None,
+    )
+
+    # is_default_provider and default_vision_model are already nullable with no server_default
+
+
+def downgrade() -> None:
+    # Restore default_model_name to NOT NULL (set empty string for any NULLs first)
+    op.execute(
+        "UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
+    )
+    op.alter_column(
+        "llm_provider",
+        "default_model_name",
+        existing_type=sa.String(),
+        nullable=False,
+    )
+
+    # Restore server_default for is_default_vision_provider
+    op.alter_column(
+        "llm_provider",
+        "is_default_vision_provider",
+        existing_type=sa.Boolean(),
+        server_default=sa.false(),
+    )
--- a/backend/alembic/versions/01f8e6d95a33_populate_flow_mapping_data.py
+++ b/backend/alembic/versions/01f8e6d95a33_populate_flow_mapping_data.py
@@ -1,7 +1,7 @@
 """Populate flow mapping data

 Revision ID: 01f8e6d95a33
-Revises: d5c86e2c6dc6
+Revises: f220515df7b4
 Create Date: 2026-01-31 17:37:10.485558

 """
@@ -11,7 +11,7 @@ from alembic import op

 # revision identifiers, used by Alembic.
 revision = "01f8e6d95a33"
-down_revision = "d5c86e2c6dc6"
+down_revision = "f220515df7b4"
 branch_labels = None
 depends_on = None

@@ -23,7 +23,7 @@ def upgrade() -> None:
        """
        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
        SELECT
-            'CHAT' AS llm_model_flow_type,
+            'chat' AS llm_model_flow_type,
            COALESCE(
                (lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
                FALSE
@@ -44,7 +44,7 @@ def upgrade() -> None:
        """
        INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
        SELECT
-            'VISION' AS llm_model_flow_type,
+            'vision' AS llm_model_flow_type,
            COALESCE(
                (lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
                FALSE
@@ -68,7 +68,7 @@ def downgrade() -> None:
            default_vision_model = mc.name
        FROM llm_model_flow mf
        JOIN model_configuration mc ON mc.id = mf.model_configuration_id
-        WHERE mf.llm_model_flow_type = 'VISION'
+        WHERE mf.llm_model_flow_type = 'vision'
          AND mf.is_default = TRUE
          AND mc.llm_provider_id = lp.id;
        """
@@ -83,7 +83,7 @@ def downgrade() -> None:
            default_model_name = mc.name
        FROM llm_model_flow mf
        JOIN model_configuration mc ON mc.id = mf.model_configuration_id
-        WHERE mf.llm_model_flow_type = 'CHAT'
+        WHERE mf.llm_model_flow_type = 'chat'
          AND mf.is_default = TRUE
          AND mc.llm_provider_id = lp.id;
        """
@@ -100,7 +100,7 @@ def downgrade() -> None:
            FROM model_configuration mc
            JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
            WHERE mc.llm_provider_id = lp.id
-              AND mf.llm_model_flow_type = 'CHAT'
+              AND mf.llm_model_flow_type = 'chat'
            ORDER BY mc.is_visible DESC, mc.id ASC
            LIMIT 1
        )
--- a/backend/alembic/versions/90b409d06e50_add_chat_compression_fields.py
+++ b/backend/alembic/versions/90b409d06e50_add_chat_compression_fields.py
@@ -1,36 +0,0 @@
-"""add_chat_compression_fields
-
-Revision ID: 90b409d06e50
-Revises: f220515df7b4
-Create Date: 2026-01-26 09:13:09.635427
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "90b409d06e50"
-down_revision = "f220515df7b4"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Add last_summarized_message_id to chat_message
-    # This field marks a message as a summary and indicates the last message it covers.
-    # Summaries are branch-aware via their parent_message_id pointing to the branch.
-    op.add_column(
-        "chat_message",
-        sa.Column(
-            "last_summarized_message_id",
-            sa.Integer(),
-            sa.ForeignKey("chat_message.id", ondelete="SET NULL"),
-            nullable=True,
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("chat_message", "last_summarized_message_id")
--- a/backend/alembic/versions/d56ffa94ca32_add_file_content.py
+++ b/backend/alembic/versions/d56ffa94ca32_add_file_content.py
@@ -1,35 +0,0 @@
-"""add_file_content
-
-Revision ID: d56ffa94ca32
-Revises: 01f8e6d95a33
-Create Date: 2026-02-06 15:29:34.192960
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "d56ffa94ca32"
-down_revision = "01f8e6d95a33"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "file_content",
-        sa.Column(
-            "file_id",
-            sa.String(),
-            sa.ForeignKey("file_record.file_id", ondelete="CASCADE"),
-            primary_key=True,
-        ),
-        sa.Column("lobj_oid", sa.BigInteger(), nullable=False),
-        sa.Column("file_size", sa.BigInteger(), nullable=False, server_default="0"),
-    )
-
-
-def downgrade() -> None:
-    op.drop_table("file_content")
--- a/backend/alembic/versions/d5c86e2c6dc6_add_cascade_delete_to_search_query_user_.py
+++ b/backend/alembic/versions/d5c86e2c6dc6_add_cascade_delete_to_search_query_user_.py
@@ -1,35 +0,0 @@
-"""add_cascade_delete_to_search_query_user_id
-
-Revision ID: d5c86e2c6dc6
-Revises: 90b409d06e50
-Create Date: 2026-02-04 16:05:04.749804
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "d5c86e2c6dc6"
-down_revision = "90b409d06e50"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
-    op.create_foreign_key(
-        "search_query_user_id_fkey",
-        "search_query",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade() -> None:
-    op.drop_constraint("search_query_user_id_fkey", "search_query", type_="foreignkey")
-    op.create_foreign_key(
-        "search_query_user_id_fkey", "search_query", "user", ["user_id"], ["id"]
-    )
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -39,7 +39,7 @@ EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}


 def include_object(
-    object: SchemaItem,  # noqa: ARG001
+    object: SchemaItem,
    name: str | None,
    type_: Literal[
        "schema",
@@ -49,8 +49,8 @@ def include_object(
        "unique_constraint",
        "foreign_key_constraint",
    ],
-    reflected: bool,  # noqa: ARG001
-    compare_to: SchemaItem | None,  # noqa: ARG001
+    reflected: bool,
+    compare_to: SchemaItem | None,
 ) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
--- a/backend/ee/LICENSE
+++ b/backend/ee/LICENSE
@@ -1,20 +1,20 @@
-The Onyx Enterprise License (the "Enterprise License")
+The DanswerAI Enterprise license (the “Enterprise License”)
 Copyright (c) 2023-present DanswerAI, Inc.

 With regard to the Onyx Software:

 This software and associated documentation files (the "Software") may only be
 used in production, if you (and any entity that you represent) have agreed to,
-and are in compliance with, the Onyx Subscription Terms of Service, available
-at https://www.onyx.app/legal/self-host (the "Enterprise Terms"), or other
+and are in compliance with, the DanswerAI Subscription Terms of Service, available
+at https://onyx.app/terms (the “Enterprise Terms”), or other
 agreement governing the use of the Software, as agreed by you and DanswerAI,
-and otherwise have a valid Onyx Enterprise License for the
+and otherwise have a valid Onyx Enterprise license for the
 correct number of user seats. Subject to the foregoing sentence, you are free to
 modify this Software and publish patches to the Software. You agree that DanswerAI
 and/or its licensors (as applicable) retain all right, title and interest in and
 to all such modifications and/or patches, and all such modifications and/or
 patches may only be used, copied, modified, displayed, distributed, or otherwise
-exploited with a valid Onyx Enterprise License for the correct
+exploited with a valid Onyx Enterprise license for the correct
 number of user seats. Notwithstanding the foregoing, you may copy and modify
 the Software for development and testing purposes, without requiring a
 subscription. You agree that DanswerAI and/or its licensors (as applicable) retain
--- a/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -951,7 +951,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):

        return False

-    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002
+    def progress(self, tag: str, amount: int) -> None:
        try:
            self.redis_connector.permissions.set_active()

@@ -982,7 +982,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):


 def monitor_ccpair_permissions_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session  # noqa: ARG001
+    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
--- a/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -259,7 +259,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
 def try_creating_external_group_sync_task(
    app: Celery,
    cc_pair_id: int,
-    r: Redis,  # noqa: ARG001
+    r: Redis,
    tenant_id: str,
 ) -> str | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
@@ -344,7 +344,7 @@ def try_creating_external_group_sync_task(
    bind=True,
 )
 def connector_external_group_sync_generator_task(
-    self: Task,  # noqa: ARG001
+    self: Task,
    cc_pair_id: int,
    tenant_id: str,
 ) -> None:
@@ -590,8 +590,8 @@ def _perform_external_group_sync(

 def validate_external_group_sync_fences(
    tenant_id: str,
-    celery_app: Celery,  # noqa: ARG001
-    r: Redis,  # noqa: ARG001
+    celery_app: Celery,
+    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
--- a/backend/ee/onyx/background/celery/tasks/query_history/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/query_history/tasks.py
@@ -40,7 +40,7 @@ def export_query_history_task(
    end: datetime,
    start_time: datetime,
    # Need to include the tenant_id since the TenantAwareTask needs this
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
 ) -> None:
    if not self.request.id:
        raise RuntimeError("No task id defined for this task; cannot identify it")
--- a/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -43,7 +43,7 @@ _TENANT_PROVISIONING_TIME_LIMIT = 60 * 10  # 10 minutes
    trail=False,
    bind=True,
 )
-def check_available_tenants(self: Task) -> None:  # noqa: ARG001
+def check_available_tenants(self: Task) -> None:
    """
    Check if we have enough pre-provisioned tenants available.
    If not, trigger the pre-provisioning of new tenants.
--- a/backend/ee/onyx/background/celery/tasks/usage_reporting/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/usage_reporting/tasks.py
@@ -21,9 +21,9 @@ logger = setup_logger()
    trail=False,
 )
 def generate_usage_report_task(
-    self: Task,  # noqa: ARG001
+    self: Task,
    *,
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
    user_id: str | None = None,
    period_from: str | None = None,
    period_to: str | None = None,
--- a/backend/ee/onyx/background/task_name_builders.py
+++ b/backend/ee/onyx/background/task_name_builders.py
@@ -7,7 +7,7 @@ QUERY_HISTORY_TASK_NAME_PREFIX = OnyxCeleryTask.EXPORT_QUERY_HISTORY_TASK


 def name_chat_ttl_task(
-    retention_limit_days: float, tenant_id: str | None = None  # noqa: ARG001
+    retention_limit_days: float, tenant_id: str | None = None
 ) -> str:
    return f"chat_ttl_{retention_limit_days}_days"

--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -134,7 +134,7 @@ GATED_TENANTS_KEY = "gated_tenants"

 # License enforcement - when True, blocks API access for gated/expired licenses
 LICENSE_ENFORCEMENT_ENABLED = (
-    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "true").lower() == "true"
+    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
 )

 # Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints
--- a/backend/ee/onyx/db/document_set.py
+++ b/backend/ee/onyx/db/document_set.py
@@ -54,7 +54,7 @@ def delete_document_set_privacy__no_commit(
 def fetch_document_sets(
    user_id: UUID | None,
    db_session: Session,
-    include_outdated: bool = True,  # Parameter only for versioned implementation, unused  # noqa: ARG001
+    include_outdated: bool = True,  # Parameter only for versioned implementation, unused
 ) -> list[tuple[DocumentSet, list[ConnectorCredentialPair]]]:
    assert user_id is not None

--- a/backend/ee/onyx/db/hierarchy.py
+++ b/backend/ee/onyx/db/hierarchy.py
@@ -5,10 +5,8 @@ It filters hierarchy nodes based on user email and external group membership.
 """

 from sqlalchemy import any_
-from sqlalchemy import cast
 from sqlalchemy import or_
 from sqlalchemy import select
-from sqlalchemy import String
 from sqlalchemy.dialects import postgresql
 from sqlalchemy.orm import Session
 from sqlalchemy.sql.elements import ColumnElement
@@ -34,7 +32,7 @@ def _build_hierarchy_access_filter(
    if external_group_ids:
        access_filters.append(
            HierarchyNode.external_user_group_ids.overlap(
-                cast(postgresql.array(external_group_ids), postgresql.ARRAY(String))
+                postgresql.array(external_group_ids)
            )
        )
    return or_(*access_filters)
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -11,7 +11,6 @@ from ee.onyx.server.license.models import LicenseMetadata
 from ee.onyx.server.license.models import LicensePayload
 from ee.onyx.server.license.models import LicenseSource
 from onyx.auth.schemas import UserRole
-from onyx.configs.constants import ANONYMOUS_USER_EMAIL
 from onyx.db.models import License
 from onyx.db.models import User
 from onyx.redis.redis_pool import get_redis_client
@@ -108,8 +107,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:
    Get current seat usage directly from database.

    For multi-tenant: counts users in UserTenantMapping for this tenant.
-    For self-hosted: counts all active users (excludes EXT_PERM_USER role
-    and the anonymous system user).
+    For self-hosted: counts all active users (excludes EXT_PERM_USER role).

    TODO: Exclude API key dummy users from seat counting. API keys create
    users with emails like `__DANSWER_API_KEY_*` that should not count toward
@@ -129,7 +127,6 @@ def get_used_seats(tenant_id: str | None = None) -> int:
                .where(
                    User.is_active == True,  # type: ignore  # noqa: E712
                    User.role != UserRole.EXT_PERM_USER,
-                    User.email != ANONYMOUS_USER_EMAIL,  # type: ignore
                )
            )
            return result.scalar() or 0
@@ -263,15 +260,9 @@ def refresh_license_cache(

    try:
        payload = verify_license_signature(license_record.license_data)
-        # Derive source from payload: manual licenses lack stripe_customer_id
-        source: LicenseSource = (
-            LicenseSource.AUTO_FETCH
-            if payload.stripe_customer_id
-            else LicenseSource.MANUAL_UPLOAD
-        )
        return update_license_cache(
            payload,
-            source=source,
+            source=LicenseSource.AUTO_FETCH,
            tenant_id=tenant_id,
        )
    except ValueError as e:
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -643,7 +643,7 @@ def add_users_to_user_group(

 def update_user_group(
    db_session: Session,
-    user: User,  # noqa: ARG001
+    user: User,
    user_group_id: int,
    user_group_update: UserGroupUpdate,
 ) -> UserGroup:
--- a/backend/ee/onyx/external_permissions/confluence/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/doc_sync.py
@@ -25,7 +25,7 @@ CONFLUENCE_DOC_SYNC_LABEL = "confluence_doc_sync"

 def confluence_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[ElementExternalAccess, None, None]:
--- a/backend/ee/onyx/external_permissions/confluence/page_access.py
+++ b/backend/ee/onyx/external_permissions/confluence/page_access.py
@@ -1,8 +1,6 @@
 from typing import Any

 from onyx.access.models import ExternalAccess
-from onyx.access.utils import build_ext_group_name_for_onyx
-from onyx.configs.constants import DocumentSource
 from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
 )
@@ -74,7 +72,6 @@ def get_page_restrictions(
    page_id: str,
    page_restrictions: dict[str, Any],
    ancestors: list[dict[str, Any]],
-    add_prefix: bool = False,
 ) -> ExternalAccess | None:
    """
    This function gets the restrictions for a page. In Confluence, a child can have
@@ -82,9 +79,6 @@ def get_page_restrictions(

    If no restrictions are found anywhere, then return None, indicating that the page
    should inherit the space's restrictions.
-
-    add_prefix: When True, prefix group IDs with source type (for indexing path).
-               When False (default), leave unprefixed (for permission sync path).
    """
    found_user_emails: set[str] = set()
    found_group_names: set[str] = set()
@@ -98,22 +92,13 @@ def get_page_restrictions(
            restrictions=page_restrictions,
        )
    )
-
-    def _maybe_prefix_groups(group_names: set[str]) -> set[str]:
-        if add_prefix:
-            return {
-                build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
-                for g in group_names
-            }
-        return group_names
-
    # if there are individual page-level restrictions, then this is the accurate
    # restriction for the page. You cannot both have page-level restrictions AND
    # inherit restrictions from the parent.
    if found_any_page_level_restriction:
        return ExternalAccess(
            external_user_emails=found_user_emails,
-            external_user_group_ids=_maybe_prefix_groups(found_group_names),
+            external_user_group_ids=found_group_names,
            is_public=False,
        )

@@ -140,7 +125,7 @@ def get_page_restrictions(
            )
            return ExternalAccess(
                external_user_emails=ancestor_user_emails,
-                external_user_group_ids=_maybe_prefix_groups(ancestor_group_names),
+                external_user_group_ids=ancestor_group_names,
                is_public=False,
            )

--- a/backend/ee/onyx/external_permissions/confluence/space_access.py
+++ b/backend/ee/onyx/external_permissions/confluence/space_access.py
@@ -3,8 +3,6 @@ from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GR
 from ee.onyx.external_permissions.confluence.constants import REQUEST_PAGINATION_LIMIT
 from ee.onyx.external_permissions.confluence.constants import VIEWSPACE_PERMISSION_TYPE
 from onyx.access.models import ExternalAccess
-from onyx.access.utils import build_ext_group_name_for_onyx
-from onyx.configs.constants import DocumentSource
 from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
 )
@@ -114,7 +112,6 @@ def get_space_permission(
    confluence_client: OnyxConfluence,
    space_key: str,
    is_cloud: bool,
-    add_prefix: bool = False,
 ) -> ExternalAccess:
    if is_cloud:
        space_permissions = _get_cloud_space_permissions(confluence_client, space_key)
@@ -133,32 +130,13 @@ def get_space_permission(
            f"permissions for space '{space_key}'"
        )

-    # Prefix group IDs with source type if requested (for indexing path)
-    if add_prefix and space_permissions.external_user_group_ids:
-        prefixed_groups = {
-            build_ext_group_name_for_onyx(g, DocumentSource.CONFLUENCE)
-            for g in space_permissions.external_user_group_ids
-        }
-        return ExternalAccess(
-            external_user_emails=space_permissions.external_user_emails,
-            external_user_group_ids=prefixed_groups,
-            is_public=space_permissions.is_public,
-        )
-
    return space_permissions


 def get_all_space_permissions(
    confluence_client: OnyxConfluence,
    is_cloud: bool,
-    add_prefix: bool = False,
 ) -> dict[str, ExternalAccess]:
-    """
-    Get access permissions for all spaces in Confluence.
-
-    add_prefix: When True, prefix group IDs with source type (for indexing path).
-               When False (default), leave unprefixed (for permission sync path).
-    """
    logger.debug("Getting space permissions")
    # Gets all the spaces in the Confluence instance
    all_space_keys = [
@@ -173,9 +151,7 @@ def get_all_space_permissions(
    logger.debug(f"Got {len(all_space_keys)} spaces from confluence")
    space_permissions_by_space_key: dict[str, ExternalAccess] = {}
    for space_key in all_space_keys:
-        space_permissions = get_space_permission(
-            confluence_client, space_key, is_cloud, add_prefix
-        )
+        space_permissions = get_space_permission(confluence_client, space_key, is_cloud)

        # Stores the permissions for each space
        space_permissions_by_space_key[space_key] = space_permissions
--- a/backend/ee/onyx/external_permissions/github/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/github/doc_sync.py
@@ -34,7 +34,7 @@ GITHUB_DOC_SYNC_LABEL = "github_doc_sync"
 def github_doc_sync(
    cc_pair: ConnectorCredentialPair,
    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
-    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None = None,
 ) -> Generator[DocExternalAccess, None, None]:
    """
--- a/backend/ee/onyx/external_permissions/github/group_sync.py
+++ b/backend/ee/onyx/external_permissions/github/group_sync.py
@@ -12,7 +12,7 @@ logger = setup_logger()


 def github_group_sync(
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> Generator[ExternalUserGroup, None, None]:
    github_connector: GithubConnector = GithubConnector(
--- a/backend/ee/onyx/external_permissions/github/utils.py
+++ b/backend/ee/onyx/external_permissions/github/utils.py
@@ -91,7 +91,7 @@ class TeamInfo(BaseModel):


 def _fetch_organization_members(
-    github_client: Github, org_name: str, retry_count: int = 0  # noqa: ARG001
+    github_client: Github, org_name: str, retry_count: int = 0
 ) -> List[UserInfo]:
    """Fetch all organization members including owners and regular members."""
    org_members: List[UserInfo] = []
@@ -124,7 +124,7 @@ def _fetch_organization_members(


 def _fetch_repository_teams_detailed(
-    repo: Repository, github_client: Github, retry_count: int = 0  # noqa: ARG001
+    repo: Repository, github_client: Github, retry_count: int = 0
 ) -> List[TeamInfo]:
    """Fetch teams with access to the repository and their members."""
    teams_data: List[TeamInfo] = []
@@ -167,7 +167,7 @@ def _fetch_repository_teams_detailed(


 def fetch_repository_team_slugs(
-    repo: Repository, github_client: Github, retry_count: int = 0  # noqa: ARG001
+    repo: Repository, github_client: Github, retry_count: int = 0
 ) -> List[str]:
    """Fetch team slugs with access to the repository."""
    logger.info(f"Fetching team slugs for repository {repo.full_name}")
--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -39,8 +39,8 @@ def _get_slim_doc_generator(

 def gmail_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
-    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
+    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[ElementExternalAccess, None, None]:
    """
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -13,7 +13,6 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ElementExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.access.models import NodeExternalAccess
-from onyx.access.utils import build_ext_group_name_for_onyx
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.google_drive.connector import GoogleDriveConnector
 from onyx.connectors.google_drive.models import GoogleDriveFileType
@@ -68,17 +67,11 @@ def get_external_access_for_raw_gdrive_file(
    company_domain: str,
    retriever_drive_service: GoogleDriveService | None,
    admin_drive_service: GoogleDriveService,
-    add_prefix: bool = False,
 ) -> ExternalAccess:
    """
    Get the external access for a raw Google Drive file.

    Assumes the file we retrieved has EITHER `permissions` or `permission_ids`
-
-    add_prefix: When this method is called during the initial indexing via the connector,
-                set add_prefix to True so group IDs are prefixed with the source type.
-                When invoked from doc_sync (permission sync), use the default (False)
-                since upsert_document_external_perms handles prefixing.
    """
    doc_id = file.get("id")
    if not doc_id:
@@ -171,13 +164,6 @@ def get_external_access_for_raw_gdrive_file(
        | ({drive_id} if drive_id is not None else set())
    )

-    # Prefix group IDs with source type if requested (for indexing path)
-    if add_prefix:
-        group_ids = {
-            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
-            for group_id in group_ids
-        }
-
    return ExternalAccess(
        external_user_emails=user_emails,
        external_user_group_ids=group_ids,
@@ -189,7 +175,6 @@ def get_external_access_for_folder(
    folder: GoogleDriveFileType,
    google_domain: str,
    drive_service: GoogleDriveService,
-    add_prefix: bool = False,
 ) -> ExternalAccess:
    """
    Extract ExternalAccess from a folder's permissions.
@@ -201,8 +186,6 @@ def get_external_access_for_folder(
        folder: The folder metadata from Google Drive API (must include permissionIds field)
        google_domain: The company's Google Workspace domain (e.g., "company.com")
        drive_service: Google Drive service for fetching permission details
-        add_prefix: When True, prefix group IDs with source type (for indexing path).
-                   When False (default), leave unprefixed (for permission sync path).

    Returns:
        ExternalAccess with extracted permission info
@@ -265,25 +248,17 @@ def get_external_access_for_folder(
            # If allowFileDiscovery is False, it's "link only" access
            is_public = permission.allow_file_discovery is not False

-    # Prefix group IDs with source type if requested (for indexing path)
-    group_ids: set[str] = group_emails
-    if add_prefix:
-        group_ids = {
-            build_ext_group_name_for_onyx(group_id, DocumentSource.GOOGLE_DRIVE)
-            for group_id in group_emails
-        }
-
    return ExternalAccess(
        external_user_emails=user_emails,
-        external_user_group_ids=group_ids,
+        external_user_group_ids=group_emails,
        is_public=is_public,
    )


 def gdrive_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
-    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
+    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[ElementExternalAccess, None, None]:
    """
--- a/backend/ee/onyx/external_permissions/google_drive/group_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/group_sync.py
@@ -384,7 +384,7 @@ def _build_onyx_groups(


 def gdrive_group_sync(
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> Generator[ExternalUserGroup, None, None]:
    # Initialize connector and build credential/service objects
--- a/backend/ee/onyx/external_permissions/jira/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/jira/doc_sync.py
@@ -17,7 +17,7 @@ JIRA_DOC_SYNC_TAG = "jira_doc_sync"

 def jira_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None = None,
 ) -> Generator[ElementExternalAccess, None, None]:
--- a/backend/ee/onyx/external_permissions/jira/group_sync.py
+++ b/backend/ee/onyx/external_permissions/jira/group_sync.py
@@ -102,7 +102,7 @@ def _build_group_member_email_map(


 def jira_group_sync(
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> Generator[ExternalUserGroup, None, None]:
    """
--- a/backend/ee/onyx/external_permissions/jira/page_access.py
+++ b/backend/ee/onyx/external_permissions/jira/page_access.py
@@ -8,8 +8,6 @@ from ee.onyx.external_permissions.jira.models import Holder
 from ee.onyx.external_permissions.jira.models import Permission
 from ee.onyx.external_permissions.jira.models import User
 from onyx.access.models import ExternalAccess
-from onyx.access.utils import build_ext_group_name_for_onyx
-from onyx.configs.constants import DocumentSource
 from onyx.utils.logger import setup_logger

 HolderMap = dict[str, list[Holder]]
@@ -254,14 +252,7 @@ def _build_external_access_from_holder_map(
 def get_project_permissions(
    jira_client: JIRA,
    jira_project: str,
-    add_prefix: bool = False,
 ) -> ExternalAccess | None:
-    """
-    Get project permissions from Jira.
-
-    add_prefix: When True, prefix group IDs with source type (for indexing path).
-               When False (default), leave unprefixed (for permission sync path).
-    """
    project_permissions: PermissionScheme = jira_client.project_permissionscheme(
        project=jira_project
    )
@@ -276,20 +267,6 @@ def get_project_permissions(

    holder_map = _build_holder_map(permissions=project_permissions.permissions)

-    external_access = _build_external_access_from_holder_map(
+    return _build_external_access_from_holder_map(
        jira_client=jira_client, jira_project=jira_project, holder_map=holder_map
    )
-
-    # Prefix group IDs with source type if requested (for indexing path)
-    if add_prefix and external_access and external_access.external_user_group_ids:
-        prefixed_groups = {
-            build_ext_group_name_for_onyx(g, DocumentSource.JIRA)
-            for g in external_access.external_user_group_ids
-        }
-        return ExternalAccess(
-            external_user_emails=external_access.external_user_emails,
-            external_user_group_ids=prefixed_groups,
-            is_public=external_access.is_public,
-        )
-
-    return external_access
--- a/backend/ee/onyx/external_permissions/salesforce/postprocessing.py
+++ b/backend/ee/onyx/external_permissions/salesforce/postprocessing.py
@@ -23,7 +23,7 @@ ContentRange = tuple[int, int | None]  # (start_index, end_index) None means to

 # NOTE: Used for testing timing
 def _get_dummy_object_access_map(
-    object_ids: set[str], user_email: str, chunks: list[InferenceChunk]  # noqa: ARG001
+    object_ids: set[str], user_email: str, chunks: list[InferenceChunk]
 ) -> dict[str, bool]:
    time.sleep(0.15)
    # return {object_id: True for object_id in object_ids}
--- a/backend/ee/onyx/external_permissions/sharepoint/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/doc_sync.py
@@ -17,7 +17,7 @@ SHAREPOINT_DOC_SYNC_TAG = "sharepoint_doc_sync"

 def sharepoint_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None = None,
 ) -> Generator[ElementExternalAccess, None, None]:
--- a/backend/ee/onyx/external_permissions/sharepoint/group_sync.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/group_sync.py
@@ -15,7 +15,7 @@ logger = setup_logger()


 def sharepoint_group_sync(
-    tenant_id: str,  # noqa: ARG001
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> Generator[ExternalUserGroup, None, None]:
    """Sync SharePoint groups and their members"""
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -103,7 +103,7 @@ def _fetch_channel_permissions(

 def _get_slack_document_access(
    slack_connector: SlackConnector,
-    channel_permissions: dict[str, ExternalAccess],  # noqa: ARG001
+    channel_permissions: dict[str, ExternalAccess],
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[DocExternalAccess, None, None]:
    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
@@ -136,8 +136,8 @@ def _get_slack_document_access(

 def slack_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
-    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
+    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[DocExternalAccess, None, None]:
    """
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -72,10 +72,10 @@ class SyncConfig(BaseModel):

 # Mock doc sync function for testing (no-op)
 def mock_doc_sync(
-    cc_pair: "ConnectorCredentialPair",  # noqa: ARG001
-    fetch_all_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
-    fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,  # noqa: ARG001
-    callback: Optional["IndexingHeartbeatInterface"],  # noqa: ARG001
+    cc_pair: "ConnectorCredentialPair",
+    fetch_all_docs_fn: FetchAllDocumentsFunction,
+    fetch_all_docs_ids_fn: FetchAllDocumentsIdsFunction,
+    callback: Optional["IndexingHeartbeatInterface"],
 ) -> Generator["DocExternalAccess", None, None]:
    """Mock doc sync function for testing - returns empty list since permissions are fetched during indexing"""
    yield from []
--- a/backend/ee/onyx/external_permissions/teams/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/teams/doc_sync.py
@@ -18,7 +18,7 @@ TEAMS_DOC_SYNC_LABEL = "teams_doc_sync"

 def teams_doc_sync(
    cc_pair: ConnectorCredentialPair,
-    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,  # noqa: ARG001
+    fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[ElementExternalAccess, None, None]:
--- a/backend/ee/onyx/server/billing/api.py
+++ b/backend/ee/onyx/server/billing/api.py
@@ -32,7 +32,6 @@ from sqlalchemy.orm import Session

 from ee.onyx.auth.users import current_admin_user
 from ee.onyx.db.license import get_license
-from ee.onyx.db.license import get_used_seats
 from ee.onyx.server.billing.models import BillingInformationResponse
 from ee.onyx.server.billing.models import CreateCheckoutSessionRequest
 from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
@@ -165,16 +164,6 @@ async def create_checkout_session(
    seats = request.seats if request else None
    email = request.email if request else None

-    # Validate that requested seats is not less than current used seats
-    if seats is not None:
-        used_seats = get_used_seats(tenant_id)
-        if seats < used_seats:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Cannot subscribe with fewer seats than current usage. "
-                f"You have {used_seats} active users/integrations but requested {seats} seats.",
-            )
-
    # Build redirect URL for after checkout completion
    redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"

@@ -276,15 +265,6 @@ async def update_seats(
    if not MULTI_TENANT and not license_data:
        raise HTTPException(status_code=400, detail="No license found")

-    # Validate that new seat count is not less than current used seats
-    used_seats = get_used_seats(tenant_id)
-    if request.new_seat_count < used_seats:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Cannot reduce seats below current usage. "
-            f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
-        )
-
    try:
        result = await update_seat_service(
            new_seat_count=request.new_seat_count,
--- a/backend/ee/onyx/server/billing/service.py
+++ b/backend/ee/onyx/server/billing/service.py
@@ -109,9 +109,7 @@ async def _make_billing_request(
    headers = _get_headers(license_data)

    try:
-        async with httpx.AsyncClient(
-            timeout=_REQUEST_TIMEOUT, follow_redirects=True
-        ) as client:
+        async with httpx.AsyncClient(timeout=_REQUEST_TIMEOUT) as client:
            if method == "GET":
                response = await client.get(url, headers=headers, params=params)
            else:
--- a/backend/ee/onyx/server/enterprise_settings/api.py
+++ b/backend/ee/onyx/server/enterprise_settings/api.py
@@ -139,7 +139,7 @@ def put_logo(
    upload_logo(file=file, is_logotype=is_logotype)


-def fetch_logo_helper(db_session: Session) -> Response:  # noqa: ARG001
+def fetch_logo_helper(db_session: Session) -> Response:
    try:
        file_store = get_default_file_store()
        onyx_file = file_store.get_file_with_mime_type(get_logo_filename())
@@ -155,7 +155,7 @@ def fetch_logo_helper(db_session: Session) -> Response:  # noqa: ARG001
        return Response(content=onyx_file.data, media_type=onyx_file.mime_type)


-def fetch_logotype_helper(db_session: Session) -> Response:  # noqa: ARG001
+def fetch_logotype_helper(db_session: Session) -> Response:
    try:
        file_store = get_default_file_store()
        onyx_file = file_store.get_file_with_mime_type(get_logotype_filename())
--- a/backend/ee/onyx/server/evals/api.py
+++ b/backend/ee/onyx/server/evals/api.py
@@ -17,7 +17,7 @@ router = APIRouter(prefix="/evals")
@router.post("/eval_run", response_model=EvalRunAck)
 def eval_run(
    request: EvalConfigurationOptions,
-    user: User = Depends(current_cloud_superuser),  # noqa: ARG001
+    user: User = Depends(current_cloud_superuser),
 ) -> EvalRunAck:
    """
    Run an evaluation with the given message and optional dataset.
--- a/backend/ee/onyx/server/license/api.py
+++ b/backend/ee/onyx/server/license/api.py
@@ -42,20 +42,6 @@ logger = setup_logger()

 router = APIRouter(prefix="/license")

-# PEM-style delimiters used in license file format
-_PEM_BEGIN = "-----BEGIN ONYX LICENSE-----"
-_PEM_END = "-----END ONYX LICENSE-----"
-
-
-def _strip_pem_delimiters(content: str) -> str:
-    """Strip PEM-style delimiters from license content if present."""
-    content = content.strip()
-    if content.startswith(_PEM_BEGIN) and content.endswith(_PEM_END):
-        # Remove first and last lines (the delimiters)
-        lines = content.split("\n")
-        return "\n".join(lines[1:-1]).strip()
-    return content
-

@router.get("")
 async def get_license_status(
@@ -120,11 +106,6 @@ async def claim_license(
    - Updating seats via the billing API
    - Returning from the Stripe customer portal
    - Any operation that regenerates the license on control plane
-    Claim a license from the control plane (self-hosted only).
-
-    Two modes:
-    1. With session_id: After Stripe checkout, exchange session_id for license
-    2. Without session_id: Re-claim using existing license for auth
    """
    if MULTI_TENANT:
        raise HTTPException(
@@ -229,10 +210,6 @@ async def upload_license(
    try:
        content = await license_file.read()
        license_data = content.decode("utf-8").strip()
-        # Strip PEM-style delimiters if present (used in .lic file format)
-        license_data = _strip_pem_delimiters(license_data)
-        # Remove any stray whitespace/newlines from user input
-        license_data = license_data.strip()
    except UnicodeDecodeError:
        raise HTTPException(status_code=400, detail="Invalid license file format")

--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -260,7 +260,7 @@ def confluence_oauth_accessible_resources(
    credential_id: int,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    """Atlassian's API is weird and does not supply us with enough info to be in a
    usable state after authorizing.  All API's require a cloud id. We have to list
@@ -323,7 +323,7 @@ def confluence_oauth_finalize(
    cloud_url: str,
    user: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),  # noqa: ARG001
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    """Saves the info for the selected cloud site to the credential.
    This is the final step in the confluence oauth flow where after the traditional
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -78,7 +78,7 @@ def fetch_and_process_chat_session_history(
    db_session: Session,
    start: datetime,
    end: datetime,
-    limit: int | None = 500,  # noqa: ARG001
+    limit: int | None = 500,
 ) -> Generator[ChatSessionSnapshot]:
    PAGE_SIZE = 100

--- a/backend/ee/onyx/server/reporting/usage_export_api.py
+++ b/backend/ee/onyx/server/reporting/usage_export_api.py
@@ -59,7 +59,7 @@ def generate_report(
 def read_usage_report(
    report_name: str,
    _: User = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),  # noqa: ARG001
+    db_session: Session = Depends(get_session),
 ) -> Response:
    try:
        file = get_usage_report_data(report_name)
--- a/backend/ee/onyx/server/seeding.py
+++ b/backend/ee/onyx/server/seeding.py
@@ -123,9 +123,14 @@ def _seed_llms(
            upsert_llm_provider(llm_upsert_request, db_session)
            for llm_upsert_request in llm_upsert_requests
        ]
-        update_default_provider(
-            provider_id=seeded_providers[0].id, db_session=db_session
-        )
+
+        if len(seeded_providers[0].model_configurations) > 0:
+            default_model = seeded_providers[0].model_configurations[0].name
+            update_default_provider(
+                provider_id=seeded_providers[0].id,
+                model_name=default_model,
+                db_session=db_session,
+            )


 def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:
--- a/backend/ee/onyx/server/settings/api.py
+++ b/backend/ee/onyx/server/settings/api.py
@@ -1,13 +1,9 @@
 """EE Settings API - provides license-aware settings override."""

 from redis.exceptions import RedisError
-from sqlalchemy.exc import SQLAlchemyError

 from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
 from ee.onyx.db.license import get_cached_license_metadata
-from ee.onyx.db.license import refresh_license_cache
-from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.server.settings.models import ApplicationStatus
 from onyx.server.settings.models import Settings
 from onyx.utils.logger import setup_logger
@@ -44,14 +40,6 @@ def check_ee_features_enabled() -> bool:
    tenant_id = get_current_tenant_id()
    try:
        metadata = get_cached_license_metadata(tenant_id)
-        if not metadata:
-            # Cache miss — warm from DB so cold-start doesn't block EE features
-            try:
-                with get_session_with_current_tenant() as db_session:
-                    metadata = refresh_license_cache(db_session, tenant_id)
-            except SQLAlchemyError as db_error:
-                logger.warning(f"Failed to load license from DB: {db_error}")
-
        if metadata and metadata.status != _BLOCKING_STATUS:
            # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
            return True
@@ -70,58 +58,26 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
    For self-hosted, looks up license metadata and overrides application_status
    if the license indicates GATED_ACCESS (fully expired).

-    Also sets ee_features_enabled based on license status to control
-    visibility of EE features in the UI.
-
    For multi-tenant (cloud), the settings already have the correct status
    from the control plane, so no override is needed.

-    If LICENSE_ENFORCEMENT_ENABLED is false, ee_features_enabled is set to True
-    (since EE code was loaded via ENABLE_PAID_ENTERPRISE_EDITION_FEATURES).
+    If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
+    allowing the product to function normally without license checks.
    """
    if not LICENSE_ENFORCEMENT_ENABLED:
-        # License enforcement disabled - EE code is loaded via
-        # ENABLE_PAID_ENTERPRISE_EDITION_FEATURES, so EE features are on
-        settings.ee_features_enabled = True
        return settings

    if MULTI_TENANT:
-        # Cloud mode - EE features always available (gating handled by is_tenant_gated)
-        settings.ee_features_enabled = True
        return settings

    tenant_id = get_current_tenant_id()
    try:
        metadata = get_cached_license_metadata(tenant_id)
-        if not metadata:
-            # Cache miss (e.g. after TTL expiry). Fall back to DB so
-            # the /settings request doesn't falsely return GATED_ACCESS
-            # while the cache is cold.
-            try:
-                with get_session_with_current_tenant() as db_session:
-                    metadata = refresh_license_cache(db_session, tenant_id)
-            except SQLAlchemyError as db_error:
-                logger.warning(
-                    f"Failed to load license from DB for settings: {db_error}"
-                )
-
-        if metadata:
-            if metadata.status == _BLOCKING_STATUS:
-                settings.application_status = metadata.status
-                settings.ee_features_enabled = False
-            else:
-                # Has a valid license (GRACE_PERIOD/PAYMENT_REMINDER still allow EE features)
-                settings.ee_features_enabled = True
-        else:
-            # No license found in cache or DB.
-            if ENTERPRISE_EDITION_ENABLED:
-                # Legacy EE flag is set → prior EE usage (e.g. permission
-                # syncing) means indexed data may need protection.
-                settings.application_status = _BLOCKING_STATUS
-            settings.ee_features_enabled = False
+        if metadata and metadata.status == _BLOCKING_STATUS:
+            settings.application_status = metadata.status
+        # No license = user hasn't purchased yet, allow access for upgrade flow
+        # GRACE_PERIOD/PAYMENT_REMINDER don't block - they're for notifications
    except RedisError as e:
        logger.warning(f"Failed to check license metadata for settings: {e}")
-        # Fail closed - disable EE features if we can't verify license
-        settings.ee_features_enabled = False

    return settings
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -19,7 +19,6 @@ logger = setup_logger()
 def fetch_stripe_checkout_session(
    tenant_id: str,
    billing_period: Literal["monthly", "annual"] = "monthly",
-    seats: int | None = None,
 ) -> str:
    token = generate_data_plane_token()
    headers = {
@@ -30,23 +29,10 @@ def fetch_stripe_checkout_session(
    payload = {
        "tenant_id": tenant_id,
        "billing_period": billing_period,
-        "seats": seats,
    }
    response = requests.post(url, headers=headers, json=payload)
-    if not response.ok:
-        try:
-            data = response.json()
-            error_msg = (
-                data.get("error")
-                or f"Request failed with status {response.status_code}"
-            )
-        except (ValueError, requests.exceptions.JSONDecodeError):
-            error_msg = f"Request failed with status {response.status_code}: {response.text[:200]}"
-        raise Exception(error_msg)
-    data = response.json()
-    if data.get("error"):
-        raise Exception(data["error"])
-    return data["sessionId"]
+    response.raise_for_status()
+    return response.json()["sessionId"]


 def fetch_tenant_stripe_information(tenant_id: str) -> dict:
@@ -65,6 +51,7 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
 def fetch_billing_information(
    tenant_id: str,
 ) -> BillingInformation | SubscriptionStatusResponse:
+    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -29,7 +29,6 @@ from ee.onyx.server.tenants.billing import fetch_billing_information
 from ee.onyx.server.tenants.billing import fetch_customer_portal_session
 from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
 from ee.onyx.server.tenants.models import BillingInformation
-from ee.onyx.server.tenants.models import CreateCheckoutSessionRequest
 from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
 from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
@@ -115,30 +114,12 @@ async def create_customer_portal_session(

    try:
        portal_url = fetch_customer_portal_session(tenant_id, return_url)
-        return {"stripe_customer_portal_url": portal_url}
+        return {"url": portal_url}
    except Exception as e:
        logger.exception("Failed to create customer portal session")
        raise HTTPException(status_code=500, detail=str(e))


-@router.post("/create-checkout-session")
-async def create_checkout_session(
-    request: CreateCheckoutSessionRequest | None = None,
-    _: User = Depends(current_admin_user),
-) -> dict:
-    """Create a Stripe checkout session via the control plane."""
-    tenant_id = get_current_tenant_id()
-    billing_period = request.billing_period if request else "monthly"
-    seats = request.seats if request else None
-
-    try:
-        checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
-        return {"stripe_checkout_url": checkout_url}
-    except Exception as e:
-        logger.exception("Failed to create checkout session")
-        raise HTTPException(status_code=500, detail=str(e))
-
-
@router.post("/create-subscription-session")
 async def create_subscription_session(
    request: CreateSubscriptionSessionRequest | None = None,
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -42,12 +42,6 @@ class BillingInformation(BaseModel):
    payment_method_enabled: bool


-class CreateCheckoutSessionRequest(BaseModel):
-    billing_period: Literal["monthly", "annual"] = "monthly"
-    seats: int | None = None
-    email: str | None = None
-
-
 class CheckoutSessionCreationResponse(BaseModel):
    id: str

--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -121,9 +121,7 @@ async def get_or_provision_tenant(
        )


-async def create_tenant(
-    email: str, referral_source: str | None = None  # noqa: ARG001
-) -> str:
+async def create_tenant(email: str, referral_source: str | None = None) -> str:
    """
    Create a new tenant on-demand when no pre-provisioned tenants are available.
    This is the fallback method when we can't use a pre-provisioned tenant.
@@ -302,12 +300,12 @@ def configure_default_api_keys(db_session: Session) -> None:

    has_set_default_provider = False

-    def _upsert(request: LLMProviderUpsertRequest) -> None:
+    def _upsert(request: LLMProviderUpsertRequest, default_model: str) -> None:
        nonlocal has_set_default_provider
        try:
            provider = upsert_llm_provider(request, db_session)
            if not has_set_default_provider:
-                update_default_provider(provider.id, db_session)
+                update_default_provider(provider.id, default_model, db_session)
                has_set_default_provider = True
        except Exception as e:
            logger.error(f"Failed to configure {request.provider} provider: {e}")
@@ -325,14 +323,13 @@ def configure_default_api_keys(db_session: Session) -> None:
            name="OpenAI",
            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
            model_configurations=_build_model_configuration_upsert_requests(
                OPENAI_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
-        _upsert(openai_provider)
+        _upsert(openai_provider, default_model_name)

        # Create default image generation config using the OpenAI API key
        try:
@@ -361,14 +358,13 @@ def configure_default_api_keys(db_session: Session) -> None:
            name="Anthropic",
            provider=ANTHROPIC_PROVIDER_NAME,
            api_key=ANTHROPIC_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
            model_configurations=_build_model_configuration_upsert_requests(
                ANTHROPIC_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
-        _upsert(anthropic_provider)
+        _upsert(anthropic_provider, default_model_name)
    else:
        logger.info(
            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
@@ -393,14 +389,13 @@ def configure_default_api_keys(db_session: Session) -> None:
            name="Google Vertex AI",
            provider=VERTEXAI_PROVIDER_NAME,
            custom_config=custom_config,
-            default_model_name=default_model_name,
            model_configurations=_build_model_configuration_upsert_requests(
                VERTEXAI_PROVIDER_NAME, recommendations
            ),
            api_key_changed=True,
            is_auto_mode=True,
        )
-        _upsert(vertexai_provider)
+        _upsert(vertexai_provider, default_model_name)
    else:
        logger.info(
            "VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
@@ -432,12 +427,11 @@ def configure_default_api_keys(db_session: Session) -> None:
            name="OpenRouter",
            provider=OPENROUTER_PROVIDER_NAME,
            api_key=OPENROUTER_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
            model_configurations=model_configurations,
            api_key_changed=True,
            is_auto_mode=True,
        )
-        _upsert(openrouter_provider)
+        _upsert(openrouter_provider, default_model_name)
    else:
        logger.info(
            "OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
@@ -677,7 +671,7 @@ async def setup_tenant(tenant_id: str) -> None:


 async def assign_tenant_to_user(
-    tenant_id: str, email: str, referral_source: str | None = None  # noqa: ARG001
+    tenant_id: str, email: str, referral_source: str | None = None
 ) -> None:
    """
    Assign a tenant to a user and perform necessary operations.
--- a/backend/ee/onyx/server/tenants/proxy.py
+++ b/backend/ee/onyx/server/tenants/proxy.py
@@ -177,7 +177,7 @@ async def forward_to_control_plane(
    url = f"{CONTROL_PLANE_API_BASE_URL}{path}"

    try:
-        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        async with httpx.AsyncClient(timeout=30.0) as client:
            if method == "GET":
                response = await client.get(url, headers=headers, params=params)
            elif method == "POST":
--- a/backend/ee/onyx/server/tenants/schema_management.py
+++ b/backend/ee/onyx/server/tenants/schema_management.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import re
 from types import SimpleNamespace

 from sqlalchemy import text
@@ -11,30 +10,9 @@ from alembic import command
 from alembic.config import Config
 from onyx.db.engine.sql_engine import build_connection_string
 from onyx.db.engine.sql_engine import get_sqlalchemy_engine
-from shared_configs.configs import TENANT_ID_PREFIX

 logger = logging.getLogger(__name__)

-# Regex pattern for valid tenant IDs:
-# - UUID format: tenant_xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-# - AWS instance ID format: tenant_i-xxxxxxxxxxxxxxxxx
-# Also useful for not accidentally dropping `public` schema
-TENANT_ID_PATTERN = re.compile(
-    rf"^{re.escape(TENANT_ID_PREFIX)}("
-    r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"  # UUID
-    r"|i-[a-f0-9]+"  # AWS instance ID
-    r")$"
-)
-
-
-def validate_tenant_id(tenant_id: str) -> bool:
-    """Validate that tenant_id matches expected format.
-
-    This is important for SQL injection prevention since schema names
-    cannot be parameterized in SQL and must be formatted directly.
-    """
-    return bool(TENANT_ID_PATTERN.match(tenant_id))
-

 def run_alembic_migrations(schema_name: str) -> None:
    logger.info(f"Starting Alembic migrations for schema: {schema_name}")
@@ -89,18 +67,13 @@ def create_schema_if_not_exists(tenant_id: str) -> bool:


 def drop_schema(tenant_id: str) -> None:
-    """Drop a tenant's schema.
-
-    Uses strict regex validation to reject unexpected formats early,
-    preventing SQL injection since schema names cannot be parameterized.
-    """
-    if not validate_tenant_id(tenant_id):
-        raise ValueError(f"Invalid tenant_id format: {tenant_id}")
-
+    if not tenant_id.isidentifier():
+        raise ValueError("Invalid tenant_id.")
    with get_sqlalchemy_engine().connect() as connection:
-        with connection.begin():
-            # Use string formatting with validated tenant_id (safe after validation)
-            connection.execute(text(f'DROP SCHEMA IF EXISTS "{tenant_id}" CASCADE'))
+        connection.execute(
+            text("DROP SCHEMA IF EXISTS %(schema_name)s CASCADE"),
+            {"schema_name": tenant_id},
+        )


 def get_current_alembic_version(tenant_id: str) -> str:
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -319,13 +319,11 @@ def get_tenant_count(tenant_id: str) -> int:
    A user counts toward the seat count if:
    1. They have an active mapping to this tenant (UserTenantMapping.active == True)
    2. AND the User is active (User.is_active == True)
-    3. AND the User is not the anonymous system user

    TODO: Exclude API key dummy users from seat counting. API keys create
    users with emails like `__DANSWER_API_KEY_*` that should not count toward
    seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
    """
-    from onyx.configs.constants import ANONYMOUS_USER_EMAIL
    from onyx.db.models import User

    # First get all emails with active mappings to this tenant
@@ -335,7 +333,6 @@ def get_tenant_count(tenant_id: str) -> int:
            .filter(
                UserTenantMapping.tenant_id == tenant_id,
                UserTenantMapping.active == True,  # noqa: E712
-                UserTenantMapping.email != ANONYMOUS_USER_EMAIL,
            )
            .all()
        )
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -12,14 +12,12 @@ from ee.onyx.db.user_group import prepare_user_group_for_deletion
 from ee.onyx.db.user_group import update_user_curator_relationship
 from ee.onyx.db.user_group import update_user_group
 from ee.onyx.server.user_group.models import AddUsersToUserGroupRequest
-from ee.onyx.server.user_group.models import MinimalUserGroupSnapshot
 from ee.onyx.server.user_group.models import SetCuratorRequest
 from ee.onyx.server.user_group.models import UserGroup
 from ee.onyx.server.user_group.models import UserGroupCreate
 from ee.onyx.server.user_group.models import UserGroupUpdate
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
-from onyx.auth.users import current_user
 from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
@@ -47,23 +45,6 @@ def list_user_groups(
    return [UserGroup.from_model(user_group) for user_group in user_groups]


-@router.get("/user-groups/minimal")
-def list_minimal_user_groups(
-    user: User = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> list[MinimalUserGroupSnapshot]:
-    if user.role == UserRole.ADMIN:
-        user_groups = fetch_user_groups(db_session, only_up_to_date=False)
-    else:
-        user_groups = fetch_user_groups_for_user(
-            db_session=db_session,
-            user_id=user.id,
-        )
-    return [
-        MinimalUserGroupSnapshot.from_model(user_group) for user_group in user_groups
-    ]
-
-
@router.post("/admin/user-group")
 def create_user_group(
    user_group: UserGroupCreate,
--- a/backend/ee/onyx/server/user_group/models.py
+++ b/backend/ee/onyx/server/user_group/models.py
@@ -76,18 +76,6 @@ class UserGroup(BaseModel):
        )


-class MinimalUserGroupSnapshot(BaseModel):
-    id: int
-    name: str
-
-    @classmethod
-    def from_model(cls, user_group_model: UserGroupModel) -> "MinimalUserGroupSnapshot":
-        return cls(
-            id=user_group_model.id,
-            name=user_group_model.name,
-        )
-
-
 class UserGroupCreate(BaseModel):
    name: str
    user_ids: list[UUID]
--- a/backend/onyx/access/access.py
+++ b/backend/onyx/access/access.py
@@ -96,7 +96,7 @@ def get_access_for_documents(
    return versioned_get_access_for_documents_fn(document_ids, db_session)


-def _get_acl_for_user(user: User, db_session: Session) -> set[str]:  # noqa: ARG001
+def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
    """Returns a list of ACL entries that the user has access to. This is meant to be
    used downstream to filter out documents that the user does not have access to. The
    user should have access to a document if at least one entry in the document's ACL
--- a/backend/onyx/access/hierarchy_access.py
+++ b/backend/onyx/access/hierarchy_access.py
@@ -4,9 +4,7 @@ from onyx.db.models import User
 from onyx.utils.variable_functionality import fetch_versioned_implementation


-def _get_user_external_group_ids(
-    db_session: Session, user: User  # noqa: ARG001
-) -> list[str]:
+def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
    return []


--- a/backend/onyx/auth/oauth_refresher.py
+++ b/backend/onyx/auth/oauth_refresher.py
@@ -30,7 +30,7 @@ REFRESH_ENDPOINTS = {
 async def _test_expire_oauth_token(
    user: User,
    oauth_account: OAuthAccount,
-    db_session: AsyncSession,  # noqa: ARG001
+    db_session: AsyncSession,
    user_manager: BaseUserManager[User, Any],
    expire_in_seconds: int = 10,
 ) -> bool:
@@ -59,7 +59,7 @@ async def _test_expire_oauth_token(
 async def refresh_oauth_token(
    user: User,
    oauth_account: OAuthAccount,
-    db_session: AsyncSession,  # noqa: ARG001
+    db_session: AsyncSession,
    user_manager: BaseUserManager[User, Any],
 ) -> bool:
    """
@@ -182,7 +182,7 @@ async def check_and_refresh_oauth_tokens(


 async def check_oauth_account_has_refresh_token(
-    user: User,  # noqa: ARG001
+    user: User,
    oauth_account: OAuthAccount,
 ) -> bool:
    """
--- a/backend/onyx/auth/schemas.py
+++ b/backend/onyx/auth/schemas.py
@@ -58,4 +58,3 @@ class UserUpdate(schemas.BaseUserUpdate):
 class AuthBackend(str, Enum):
    REDIS = "redis"
    POSTGRES = "postgres"
-    JWT = "jwt"
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -38,7 +38,6 @@ from fastapi_users import schemas
 from fastapi_users import UUIDIDMixin
 from fastapi_users.authentication import AuthenticationBackend
 from fastapi_users.authentication import CookieTransport
-from fastapi_users.authentication import JWTStrategy
 from fastapi_users.authentication import RedisStrategy
 from fastapi_users.authentication import Strategy
 from fastapi_users.authentication.strategy.db import AccessTokenDatabase
@@ -60,7 +59,6 @@ from sqlalchemy import nulls_last
 from sqlalchemy import select
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import Session

 from onyx.auth.api_key import get_hashed_api_key_from_request
 from onyx.auth.disposable_email_validator import is_disposable_email
@@ -111,7 +109,6 @@ from onyx.db.auth import get_user_db
 from onyx.db.auth import SQLAlchemyUserAdminDB
 from onyx.db.engine.async_sql_engine import get_async_session
 from onyx.db.engine.async_sql_engine import get_async_session_context_manager
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.sql_engine import get_session_with_tenant
 from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
@@ -274,22 +271,6 @@ def verify_email_domain(email: str) -> None:
            )


-def enforce_seat_limit(db_session: Session, seats_needed: int = 1) -> None:
-    """Raise HTTPException(402) if adding users would exceed the seat limit.
-
-    No-op for multi-tenant or CE deployments.
-    """
-    if MULTI_TENANT:
-        return
-
-    result = fetch_ee_implementation_or_noop(
-        "onyx.db.license", "check_seat_availability", None
-    )(db_session, seats_needed=seats_needed)
-
-    if result is not None and not result.available:
-        raise HTTPException(status_code=402, detail=result.error_message)
-
-
 class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    reset_password_token_secret = USER_AUTH_SECRET
    verification_token_secret = USER_AUTH_SECRET
@@ -419,12 +400,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                    ):
                        user_create.role = UserRole.ADMIN

-                # Check seat availability for new users (single-tenant only)
-                with get_session_with_current_tenant() as sync_db:
-                    existing = get_user_by_email(user_create.email, sync_db)
-                    if existing is None:
-                        enforce_seat_limit(sync_db)
-
                user_created = False
                try:
                    user = await super().create(user_create, safe=safe, request=request)
@@ -634,10 +609,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                        raise exceptions.UserNotExists()

                except exceptions.UserNotExists:
-                    # Check seat availability before creating (single-tenant only)
-                    with get_session_with_current_tenant() as sync_db:
-                        enforce_seat_limit(sync_db)
-
                    password = self.password_helper.generate()
                    user_dict = {
                        "email": account_email,
@@ -809,7 +780,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        )

    async def on_after_forgot_password(
-        self, user: User, token: str, request: Optional[Request] = None  # noqa: ARG002
+        self, user: User, token: str, request: Optional[Request] = None
    ) -> None:
        if not EMAIL_CONFIGURED:
            logger.error(
@@ -828,7 +799,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)

    async def on_after_request_verify(
-        self, user: User, token: str, request: Optional[Request] = None  # noqa: ARG002
+        self, user: User, token: str, request: Optional[Request] = None
    ) -> None:
        verify_email_domain(user.email)

@@ -1012,7 +983,7 @@ class TenantAwareRedisStrategy(RedisStrategy[User, uuid.UUID]):
        except (exceptions.UserNotExists, exceptions.InvalidID, KeyError):
            return None

-    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002
+    async def destroy_token(self, token: str, user: User) -> None:
        """Properly delete the token from async redis."""
        redis = await get_async_redis_connection()
        await redis.delete(f"{self.key_prefix}{token}")
@@ -1075,61 +1046,6 @@ class RefreshableDatabaseStrategy(DatabaseStrategy[User, uuid.UUID, AccessToken]
        return token


-class SingleTenantJWTStrategy(JWTStrategy[User, uuid.UUID]):
-    """Stateless JWT strategy for single-tenant deployments.
-
-    Tokens are self-contained and verified via signature — no Redis or DB
-    lookup required per request. An ``iat`` claim is embedded so that
-    downstream code can determine when the token was created without
-    querying an external store.
-
-    Refresh is implemented by issuing a brand-new JWT (the old one remains
-    valid until its natural expiry).  ``destroy_token`` is a no-op because
-    JWTs cannot be server-side invalidated.
-    """
-
-    def __init__(
-        self,
-        secret: SecretType,
-        lifetime_seconds: int | None = SESSION_EXPIRE_TIME_SECONDS,
-        token_audience: list[str] | None = None,
-        algorithm: str = "HS256",
-        public_key: SecretType | None = None,
-    ):
-        super().__init__(
-            secret=secret,
-            lifetime_seconds=lifetime_seconds,
-            token_audience=token_audience or ["fastapi-users:auth"],
-            algorithm=algorithm,
-            public_key=public_key,
-        )
-
-    async def write_token(self, user: User) -> str:
-        data = {
-            "sub": str(user.id),
-            "aud": self.token_audience,
-            "iat": int(datetime.now(timezone.utc).timestamp()),
-        }
-        return generate_jwt(
-            data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm
-        )
-
-    async def destroy_token(self, token: str, user: User) -> None:  # noqa: ARG002
-        # JWTs are stateless — nothing to invalidate server-side.
-        # NOTE: a compromise that makes JWT auth stateful but revocable
-        # is to include a token_version claim in the JWT payload. The token_version
-        # is incremented whenever the user logs out (or gets login revoked). Whenever
-        # the JWT is used, it is only valid if the token_version claim is the same as the one
-        # in the db. If not, the JWT is invalid and the user needs to login again.
-        return
-
-    async def refresh_token(
-        self, token: Optional[str], user: User  # noqa: ARG002
-    ) -> str:
-        """Issue a fresh JWT with a new expiry."""
-        return await self.write_token(user)
-
-
 def get_redis_strategy() -> TenantAwareRedisStrategy:
    return TenantAwareRedisStrategy()

@@ -1142,22 +1058,6 @@ def get_database_strategy(
    )


-def get_jwt_strategy() -> SingleTenantJWTStrategy:
-    return SingleTenantJWTStrategy(
-        secret=USER_AUTH_SECRET,
-        lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,
-    )
-
-
-if AUTH_BACKEND == AuthBackend.JWT:
-    if MULTI_TENANT or AUTH_TYPE == AuthType.CLOUD:
-        raise ValueError(
-            "JWT auth backend is only supported for single-tenant, self-hosted deployments. "
-            "Use 'redis' or 'postgres' instead."
-        )
-    if not USER_AUTH_SECRET:
-        raise ValueError("USER_AUTH_SECRET is required for JWT auth backend.")
-
 if AUTH_BACKEND == AuthBackend.REDIS:
    auth_backend = AuthenticationBackend(
        name="redis", transport=cookie_transport, get_strategy=get_redis_strategy
@@ -1166,10 +1066,6 @@ elif AUTH_BACKEND == AuthBackend.POSTGRES:
    auth_backend = AuthenticationBackend(
        name="postgres", transport=cookie_transport, get_strategy=get_database_strategy
    )
-elif AUTH_BACKEND == AuthBackend.JWT:
-    auth_backend = AuthenticationBackend(
-        name="jwt", transport=cookie_transport, get_strategy=get_jwt_strategy
-    )
 else:
    raise ValueError(f"Invalid auth backend: {AUTH_BACKEND}")

@@ -1432,6 +1328,14 @@ async def optional_user(
    user: User | None = Depends(optional_fastapi_current_user),
 ) -> User | None:

+    tenant_id = get_current_tenant_id()
+    if (
+        user is not None
+        and user.is_anonymous
+        and anonymous_user_enabled(tenant_id=tenant_id)
+    ):
+        return get_anonymous_user()
+
    if user := await _check_for_saml_and_jwt(request, user, async_db_session):
        # If user is already set, _check_for_saml_and_jwt returns the same user object
        return user
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -43,7 +43,7 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
-from onyx.tracing.setup import setup_tracing
+from onyx.tracing.braintrust_tracing import setup_braintrust_if_creds_available
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import LoggerContextVars
 from onyx.utils.logger import PlainFormatter
@@ -93,12 +93,12 @@ class TenantAwareTask(Task):

@task_prerun.connect
 def on_task_prerun(
-    sender: Any | None = None,  # noqa: ARG001
-    task_id: str | None = None,  # noqa: ARG001
-    task: Task | None = None,  # noqa: ARG001
-    args: tuple[Any, ...] | None = None,  # noqa: ARG001
-    kwargs: dict[str, Any] | None = None,  # noqa: ARG001
-    **other_kwargs: Any,  # noqa: ARG001
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple[Any, ...] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    **other_kwargs: Any,
 ) -> None:
    # Reset any per-task logging context so that prefixes (e.g. pruning_ctx)
    # from a previous task executed in the same worker process do not leak
@@ -110,14 +110,14 @@ def on_task_prerun(


 def on_task_postrun(
-    sender: Any | None = None,  # noqa: ARG001
+    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
-    args: tuple | None = None,  # noqa: ARG001
+    args: tuple | None = None,
    kwargs: dict[str, Any] | None = None,
-    retval: Any | None = None,  # noqa: ARG001
+    retval: Any | None = None,
    state: str | None = None,
-    **kwds: Any,  # noqa: ARG001
+    **kwds: Any,
 ) -> None:
    """We handle this signal in order to remove completed tasks
    from their respective tasksets. This allows us to track the progress of document set
@@ -209,9 +209,7 @@ def on_task_postrun(
        return


-def on_celeryd_init(
-    sender: str, conf: Any = None, **kwargs: Any  # noqa: ARG001
-) -> None:
+def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
    """The first signal sent on celery worker startup"""

    # NOTE(rkuo): start method "fork" is unsafe and we really need it to be "spawn"
@@ -240,11 +238,11 @@ def on_celeryd_init(
        f"Multiprocessing selected start method: {multiprocessing.get_start_method()}"
    )

-    # Initialize tracing in workers if credentials are available.
-    setup_tracing()
+    # Initialize Braintrust tracing in workers if credentials are available.
+    setup_braintrust_if_creds_available()


-def wait_for_redis(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def wait_for_redis(sender: Any, **kwargs: Any) -> None:
    """Waits for redis to become ready subject to a hardcoded timeout.
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""
@@ -287,7 +285,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    return


-def wait_for_db(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def wait_for_db(sender: Any, **kwargs: Any) -> None:
    """Waits for the db to become ready subject to a hardcoded timeout.
    Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""

@@ -329,7 +327,7 @@ def wait_for_db(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    return


-def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
    logger.info(f"Running as a secondary celery worker: pid={os.getpid()}")

    # Set up variables for waiting on primary worker
@@ -361,7 +359,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:  # noqa: ARG00
    return


-def on_worker_ready(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def on_worker_ready(sender: Any, **kwargs: Any) -> None:
    task_logger.info("worker_ready signal received.")

    # file based way to do readiness/liveness probes
@@ -374,7 +372,7 @@ def on_worker_ready(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
    logger.info(f"Readiness signal touched at {path}.")


-def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    HttpxPool.close_all()

    hostname: str = cast(str, sender.hostname)
@@ -407,9 +405,9 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
 def on_setup_logging(
    loglevel: int,
    logfile: str | None,
-    format: str,  # noqa: ARG001
-    colorize: bool,  # noqa: ARG001
-    **kwargs: Any,  # noqa: ARG001
+    format: str,
+    colorize: bool,
+    **kwargs: Any,
 ) -> None:
    # TODO: could unhardcode format and colorize and accept these as options from
    # celery's config
@@ -510,18 +508,18 @@ class TenantContextFilter(logging.Filter):

@task_postrun.connect
 def reset_tenant_id(
-    sender: Any | None = None,  # noqa: ARG001
-    task_id: str | None = None,  # noqa: ARG001
-    task: Task | None = None,  # noqa: ARG001
-    args: tuple[Any, ...] | None = None,  # noqa: ARG001
-    kwargs: dict[str, Any] | None = None,  # noqa: ARG001
-    **other_kwargs: Any,  # noqa: ARG001
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple[Any, ...] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    **other_kwargs: Any,
 ) -> None:
    """Signal handler to reset tenant ID in context var after task ends."""
    CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)


-def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:  # noqa: ARG001
+def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""

@@ -555,12 +553,12 @@ class LivenessProbe(bootsteps.StartStopStep):
            priority=10,
        )

-    def stop(self, worker: Any) -> None:  # noqa: ARG002
+    def stop(self, worker: Any) -> None:
        self.path.unlink(missing_ok=True)
        if self.task_tref:
            self.task_tref.cancel()

-    def update_liveness_file(self, worker: Any) -> None:  # noqa: ARG002
+    def update_liveness_file(self, worker: Any) -> None:
        self.path.touch()


--- a/backend/onyx/background/celery/apps/background.py
+++ b/backend/onyx/background/celery/apps/background.py
@@ -102,7 +102,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:


@worker_process_init.connect
-def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
+def init_worker(**kwargs: Any) -> None:
    SqlEngine.reset_engine()


--- a/backend/onyx/background/celery/apps/docprocessing.py
+++ b/backend/onyx/background/celery/apps/docprocessing.py
@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:


@worker_process_init.connect
-def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
+def init_worker(**kwargs: Any) -> None:
    SqlEngine.reset_engine()


--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -244,7 +244,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
    # it's unclear to me whether using the hub's timer or the bootstep timer is better
    requires = {"celery.worker.components:Hub"}

-    def __init__(self, worker: Any, **kwargs: Any) -> None:  # noqa: ARG002
+    def __init__(self, worker: Any, **kwargs: Any) -> None:
        self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8  # Interval in seconds
        self.task_tref = None

@@ -300,7 +300,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):
        except Exception:
            task_logger.exception("Periodic task failed.")

-    def stop(self, worker: Any) -> None:  # noqa: ARG002
+    def stop(self, worker: Any) -> None:
        # Cancel the scheduled task when the worker stops
        if self.task_tref:
            self.task_tref.cancel()
--- a/backend/onyx/background/celery/apps/user_file_processing.py
+++ b/backend/onyx/background/celery/apps/user_file_processing.py
@@ -91,7 +91,7 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:


@worker_process_init.connect
-def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
+def init_worker(**kwargs: Any) -> None:
    SqlEngine.reset_engine()


--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -217,11 +217,9 @@ if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        {
            "name": "check-for-documents-for-opensearch-migration",
            "task": OnyxCeleryTask.CHECK_FOR_DOCUMENTS_FOR_OPENSEARCH_MIGRATION_TASK,
-            # Try to enqueue an invocation of this task with this frequency.
            "schedule": timedelta(seconds=120),  # 2 minutes
            "options": {
                "priority": OnyxCeleryPriority.LOW,
-                # If the task was not dequeued in this time, revoke it.
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        }
@@ -229,18 +227,10 @@ if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
    beat_task_templates.append(
        {
            "name": "migrate-documents-from-vespa-to-opensearch",
-            "task": OnyxCeleryTask.MIGRATE_DOCUMENTS_FROM_VESPA_TO_OPENSEARCH_TASK,
-            # Try to enqueue an invocation of this task with this frequency.
-            # NOTE: If MIGRATION_TASK_SOFT_TIME_LIMIT_S is greater than this
-            # value and the task is maximally busy, we can expect to see some
-            # enqueued tasks be revoked over time. This is ok; by erring on the
-            # side of "there will probably always be at least one task of this
-            # type in the queue", we are minimizing this task's idleness while
-            # still giving chances for other tasks to execute.
+            "task": OnyxCeleryTask.MIGRATE_DOCUMENT_FROM_VESPA_TO_OPENSEARCH_TASK,
            "schedule": timedelta(seconds=120),  # 2 minutes
            "options": {
                "priority": OnyxCeleryPriority.LOW,
-                # If the task was not dequeued in this time, revoke it.
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        }
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -366,7 +366,7 @@ def try_generate_document_cc_pair_cleanup_tasks(


 def monitor_connector_deletion_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis  # noqa: ARG001
+    tenant_id: str, key_bytes: bytes, r: Redis
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -1071,7 +1071,7 @@ def check_for_checkpoint_cleanup(self: Task, *, tenant_id: str) -> None:
    bind=True,
 )
 def cleanup_checkpoint_task(
-    self: Task, *, index_attempt_id: int, tenant_id: str | None  # noqa: ARG001
+    self: Task, *, index_attempt_id: int, tenant_id: str | None
 ) -> None:
    """Clean up a checkpoint for a given index attempt"""

@@ -1160,7 +1160,7 @@ def check_for_index_attempt_cleanup(self: Task, *, tenant_id: str) -> None:
    bind=True,
 )
 def cleanup_index_attempt_task(
-    self: Task, *, index_attempt_ids: list[int], tenant_id: str  # noqa: ARG001
+    self: Task, *, index_attempt_ids: list[int], tenant_id: str
 ) -> None:
    """Clean up an index attempt"""
    start = time.monotonic()
@@ -1266,7 +1266,7 @@ def _resolve_indexing_document_errors(
    bind=True,
 )
 def docprocessing_task(
-    self: Task,  # noqa: ARG001
+    self: Task,
    index_attempt_id: int,
    cc_pair_id: int,
    tenant_id: str,
--- a/backend/onyx/background/celery/tasks/docprocessing/utils.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/utils.py
@@ -57,7 +57,7 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
        # TODO: Pass index_attempt_id to the callback and check cancellation using the db
        return bool(self.redis_connector.stop.fenced)

-    def progress(self, tag: str, amount: int) -> None:  # noqa: ARG002
+    def progress(self, tag: str, amount: int) -> None:
        """Amount isn't used yet."""

        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside
--- a/backend/onyx/background/celery/tasks/evals/tasks.py
+++ b/backend/onyx/background/celery/tasks/evals/tasks.py
@@ -26,7 +26,7 @@ logger = setup_logger()
    trail=False,
 )
 def eval_run_task(
-    self: Task,  # noqa: ARG001
+    self: Task,
    *,
    configuration_dict: dict[str, Any],
 ) -> None:
@@ -48,7 +48,7 @@ def eval_run_task(
    bind=True,
    trail=False,
 )
-def scheduled_eval_task(self: Task, **kwargs: Any) -> None:  # noqa: ARG001
+def scheduled_eval_task(self: Task, **kwargs: Any) -> None:
    """
    Scheduled task to run evaluations on configured datasets.
    Runs weekly on Sunday at midnight UTC.
--- a/backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/hierarchyfetching/tasks.py
@@ -322,7 +322,7 @@ def _run_hierarchy_extraction(
    bind=True,
 )
 def connector_hierarchy_fetching_task(
-    self: Task,  # noqa: ARG001
+    self: Task,
    *,
    cc_pair_id: int,
    tenant_id: str,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dane Urban	908d360011	.	2026-02-06 17:52:23 -08:00
Dane Urban	30578bdf9a	n	2026-02-06 17:38:36 -08:00
Dane Urban	aebde89432	nits	2026-02-06 16:25:00 -08:00
Dane Urban	4a4b4bb378	t	2026-02-06 13:39:05 -08:00
Dane Urban	a8d231976a	nit	2026-02-06 09:56:16 -08:00
Dane Urban	9c8ae5bb4b	nit	2026-02-05 17:07:24 -08:00
Dane Urban	0fc1fa3d36	nits	2026-02-05 10:28:59 -08:00
Dane Urban	94633698c3	nit	2026-02-03 00:42:20 -08:00
Dane Urban	6ae15589cd	nits	2026-02-02 18:56:22 -08:00
Dane Urban	c24a8bb228	Add change	2026-02-02 18:55:38 -08:00
Dane Urban	01945abd86	fix test	2026-02-02 16:49:31 -08:00
Dane Urban	658632195f	nit	2026-02-02 16:47:21 -08:00
Dane Urban	ec6fd01ba4	Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2	2026-02-02 15:02:12 -08:00
Dane Urban	148e6fb97d	nit	2026-02-02 15:01:57 -08:00
Dane Urban	6598c1a48d	nit	2026-02-02 14:59:42 -08:00
Dane Urban	497ce43bd8	Fix some tests	2026-02-02 13:36:42 -08:00
Dane Urban	8634cb0446	Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2	2026-02-02 13:28:29 -08:00
Dane Urban	8d56fd3dc6	.	2026-02-02 13:27:08 -08:00
Dane Urban	a7579a99d0	Resolve merge conflicts	2026-02-02 12:01:44 -08:00
Dane Urban	3533c10da4	n	2026-02-02 11:48:28 -08:00
Dane Urban	7b0414bf0d	fix migration	2026-02-02 11:48:08 -08:00
Dane Urban	b500ea537a	nits	2026-02-02 11:46:52 -08:00
Dane Urban	abd6d55add	Merge branch 'flow_mapping_table' into llm_provider_refactor_1	2026-02-02 11:44:27 -08:00
Dane Urban	f15b6b8034	Merge branch 'main' into llm_provider_refactor_1	2026-02-02 11:44:17 -08:00
Dane Urban	fb40485f25	Update this	2026-02-02 11:43:58 -08:00
Dane Urban	22e85f1f28	Merge branch 'main' into flow_mapping_table	2026-02-02 11:43:24 -08:00
Dane Urban	2ef7c3e6f3	rename	2026-02-02 11:40:21 -08:00
Dane Urban	92a471ed2b	.	2026-02-02 11:35:09 -08:00
Dane Urban	d1b7e529a4	nit	2026-02-02 11:32:33 -08:00
Dane Urban	95c3579264	nits	2026-02-02 11:19:51 -08:00
Dane Urban	8802e5cad3	nit	2026-02-02 11:02:58 -08:00
Dane Urban	a41b4bbc82	fix tests	2026-02-01 22:59:15 -08:00
Dane Urban	c026c077b5	nit	2026-02-01 22:53:38 -08:00
Dane Urban	3eee539a86	Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2	2026-02-01 22:13:54 -08:00
Dane Urban	143e7a0d72	nits	2026-02-01 22:13:21 -08:00
Dane Urban	4572358038	nits	2026-02-01 22:10:37 -08:00
Dane Urban	1753f94c11	start fixes	2026-02-01 21:51:02 -08:00
Dane Urban	120ddf2ef6	Merge branch 'llm_provider_refactor_1' into llm_provider_refactor_2	2026-02-01 21:42:40 -08:00
Dane Urban	2cce5bc58f	Merge branch 'main' into flow_mapping_table	2026-02-01 21:38:54 -08:00
Dane Urban	383a6001d2	nit	2026-02-01 21:37:35 -08:00
Dane Urban	3a6f45bfca	Merge branch 'main' into llm_provider_refactor_1	2026-02-01 19:36:43 -08:00
Dane Urban	e06b5ef202	Merge branch 'flow_mapping_table' into llm_provider_refactor_1	2026-02-01 15:23:59 -08:00
Dane Urban	c13ce816fa	fix revision id	2026-02-01 13:55:01 -08:00
Dane Urban	39f3e872ec	Merge branch 'main' into flow_mapping_table	2026-02-01 13:53:53 -08:00
Dane Urban	b033c00217	.	2026-02-01 13:52:58 -08:00
Dane Urban	6d47c5f21a	nit	2026-02-01 13:51:54 -08:00
Dane Urban	0645540e24	.	2026-01-31 23:44:17 -08:00
Dane Urban	a2c0fc4df0	.	2026-01-31 19:23:46 -08:00
Dane Urban	7dccc88b35	.	2026-01-31 18:24:42 -08:00
Dane Urban	ac617a51ce	nits	2026-01-31 17:30:49 -08:00
Dane Urban	339a111a8f	.	2026-01-30 18:19:03 -08:00
Dane Urban	09b7e6fc9b	fix revision id	2026-01-30 17:39:02 -08:00
Dane Urban	135238014f	Merge branch 'main' into flow_mapping_table	2026-01-30 17:38:20 -08:00
Dane Urban	303e37bf53	migrate	2026-01-30 17:38:15 -08:00
Dane Urban	6a888e9900	nit	2026-01-30 17:01:22 -08:00
Dane Urban	e90a7767c6	nit	2026-01-30 15:35:31 -08:00
Dane Urban	1ded3af63c	nit	2026-01-30 14:22:27 -08:00
Dane Urban	c53546c000	nit	2026-01-30 13:03:05 -08:00
Dane Urban	9afa12edda	nit	2026-01-30 13:02:48 -08:00
Dane Urban	32046de962	nit	2026-01-30 13:01:36 -08:00