Edit props passed to ActionToggle

Add back assistants selector
Add back FilePicker and DeepResearchToggle
2026-02-23 02:35:45 +00:00 · 2025-09-28 12:04:53 -07:00 · 2025-09-28 12:00:25 -07:00 · 2025-09-28 09:50:21 -07:00 · 2025-09-27 23:34:13 -07:00 · 2025-09-27 23:27:47 -07:00
471 changed files with 26985 additions and 15823 deletions
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -8,9 +8,9 @@ on:
 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+  
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}

 jobs:
  build-and-push:
@@ -33,16 +33,7 @@ jobs:
        run: |
          platform=${{ matrix.platform }}
          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
+          
      - name: Checkout code
        uses: actions/checkout@v4

@@ -55,8 +46,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
            
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -129,8 +119,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -11,8 +11,8 @@ env:
  BUILDKIT_PROGRESS: plain
  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}

-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
  
 jobs:

@@ -145,15 +145,6 @@ jobs:
    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on: ubuntu-latest
    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
@@ -166,16 +157,11 @@ jobs:
          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
+          if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
          fi
-          if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi

      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@v3
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -7,10 +7,7 @@ on:

 env:
  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
  DEPLOYMENT: standalone

 jobs:
@@ -48,15 +45,6 @@ jobs:
          platform=${{ matrix.platform }}
          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV

-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
      - name: Checkout
        uses: actions/checkout@v4

@@ -69,8 +57,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -139,8 +126,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -25,11 +25,9 @@ jobs:

      - name: Add required Helm repositories
        run: |
-          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+          helm repo add bitnami https://charts.bitnami.com/bitnami
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-          helm repo add minio https://charts.min.io/
+          helm repo add keda https://kedacore.github.io/charts
          helm repo update

      - name: Build chart dependencies
--- a/.github/workflows/pr-backport-autotrigger.yml
+++ b/.github/workflows/pr-backport-autotrigger.yml
@@ -0,0 +1,124 @@
+name: Backport on Merge
+
+# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
+
+on:
+  pull_request:
+    types: [closed] # Later we check for merge so only PRs that go in can get backported
+
+permissions:
+  contents: write
+  actions: write
+
+jobs:
+  backport:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+          fetch-depth: 0
+
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"
+          git fetch --prune
+
+      - name: Check for Backport Checkbox
+        id: checkbox-check
+        run: |
+          PR_BODY="${{ github.event.pull_request.body }}"
+          if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
+            echo "backport=true" >> $GITHUB_OUTPUT
+          else
+            echo "backport=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: List and sort release branches
+        id: list-branches
+        run: |
+          git fetch --all --tags
+          BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
+          BETA=$(echo "$BRANCHES" | head -n 1)
+          STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
+          echo "beta=release/$BETA" >> $GITHUB_OUTPUT
+          echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
+          # Fetch latest tags for beta and stable
+          LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
+          LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
+
+          # Handle case where no beta tags exist
+          if [[ -z "$LATEST_BETA_TAG" ]]; then
+            NEW_BETA_TAG="v1.0.0-beta.1"
+          else
+            NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
+          fi
+
+          # Increment latest stable tag
+          NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
+          echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
+          echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
+
+      - name: Echo branch and tag information
+        run: |
+          echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
+          echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
+          echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
+          echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
+          echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
+          echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
+
+      - name: Trigger Backport
+        if: steps.checkbox-check.outputs.backport == 'true'
+        run: |
+          set -e
+          echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
+
+          # Echo the merge commit SHA
+          echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
+
+          # Fetch all history for all branches and tags
+          git fetch --prune
+
+          # Reset and prepare the beta branch
+          git checkout ${{ steps.list-branches.outputs.beta }}
+          echo "Last 5 commits on beta branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to beta failed due to conflicts."
+            exit 1
+          }
+
+          # Create new beta branch/tag
+          git tag ${{ steps.list-branches.outputs.new_beta_tag }}
+          # Push the changes and tag to the beta branch using PAT
+          git push origin ${{ steps.list-branches.outputs.beta }}
+          git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
+
+          # Reset and prepare the stable branch
+          git checkout ${{ steps.list-branches.outputs.stable }}
+          echo "Last 5 commits on stable branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to stable failed due to conflicts."
+            exit 1
+          }
+
+          # Create new stable branch/tag
+          git tag ${{ steps.list-branches.outputs.new_stable_tag }}
+          # Push the changes and tag to the stable branch using PAT
+          git push origin ${{ steps.list-branches.outputs.stable }}
+          git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -20,7 +20,6 @@ env:
  CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -65,45 +65,35 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      run: |
        echo "=== Adding Helm repositories ==="
-        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+        helm repo add bitnami https://charts.bitnami.com/bitnami
        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-        helm repo add minio https://charts.min.io/
        helm repo update

-    - name: Install Redis operator
-      if: steps.list-changed.outputs.changed == 'true'
-      shell: bash
-      run: |
-        echo "=== Installing redis-operator CRDs ==="
-        helm upgrade --install redis-operator ot-container-kit/redis-operator \
-          --namespace redis-operator --create-namespace --wait --timeout 300s
-
-    - name: Pre-pull required images
+    - name: Pre-pull critical images
      if: steps.list-changed.outputs.changed == 'true'
      run: |
-        echo "=== Pre-pulling required images to avoid timeout ==="
+        echo "=== Pre-pulling critical images to avoid timeout ==="
+        # Get kind cluster name
        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
        echo "Kind cluster: $KIND_CLUSTER"
-
-        IMAGES=(
-          "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
-          "quay.io/opstree/redis:v7.0.15"
-          "docker.io/onyxdotapp/onyx-web-server:latest"
-        )
-
-        for image in "${IMAGES[@]}"; do
-          echo "Pre-pulling $image"
-          if docker pull "$image"; then
-            kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
-          else
-            echo "Failed to pull $image"
-          fi
-        done
-
+        
+        # Pre-pull images that are likely to be used
+        echo "Pre-pulling PostgreSQL image..."
+        docker pull postgres:15-alpine || echo "Failed to pull postgres:15-alpine"
+        kind load docker-image postgres:15-alpine --name $KIND_CLUSTER || echo "Failed to load postgres image"
+        
+        echo "Pre-pulling Redis image..."
+        docker pull redis:7-alpine || echo "Failed to pull redis:7-alpine"
+        kind load docker-image redis:7-alpine --name $KIND_CLUSTER || echo "Failed to load redis image"
+        
+        echo "Pre-pulling Onyx images..."
+        docker pull docker.io/onyxdotapp/onyx-web-server:latest || echo "Failed to pull onyx web server"
+        docker pull docker.io/onyxdotapp/onyx-backend:latest || echo "Failed to pull onyx backend"
+        kind load docker-image docker.io/onyxdotapp/onyx-web-server:latest --name $KIND_CLUSTER || echo "Failed to load onyx web server"
+        kind load docker-image docker.io/onyxdotapp/onyx-backend:latest --name $KIND_CLUSTER || echo "Failed to load onyx backend"
+        
        echo "=== Images loaded into Kind cluster ==="
-        docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
+        docker exec $KIND_CLUSTER-control-plane crictl images | grep -E "(postgres|redis|onyx)" || echo "Some images may still be loading..."

    - name: Validate chart dependencies
      if: steps.list-changed.outputs.changed == 'true'
@@ -159,7 +149,6 @@ jobs:
        
        # Run the actual installation with detailed logging
        echo "=== Starting ct install ==="
-        set +e
        ct install --all \
          --helm-extra-set-args="\
            --set=nginx.enabled=false \
@@ -167,10 +156,8 @@ jobs:
            --set=vespa.enabled=false \
            --set=slackbot.enabled=false \
            --set=postgresql.enabled=true \
-            --set=postgresql.nameOverride=cloudnative-pg \
-            --set=postgresql.cluster.storage.storageClass=standard \
+            --set=postgresql.primary.persistence.enabled=false \
            --set=redis.enabled=true \
-            --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
            --set=webserver.replicaCount=1 \
            --set=api.replicaCount=0 \
            --set=inferenceCapability.replicaCount=0 \
@@ -186,16 +173,8 @@ jobs:
            --set=celery_worker_user_files_indexing.replicaCount=0" \
          --helm-extra-args="--timeout 900s --debug" \
          --debug --config ct.yaml
-        CT_EXIT=$?
-        set -e
-
-        if [[ $CT_EXIT -ne 0 ]]; then
-          echo "ct install failed with exit code $CT_EXIT"
-          exit $CT_EXIT
-        else
-          echo "=== Installation completed successfully ==="
-        fi
-
+        
+        echo "=== Installation completed successfully ==="
        kubectl get pods --all-namespaces

    - name: Post-install verification
@@ -220,7 +199,7 @@ jobs:
        
        echo "=== Recent logs for debugging ==="
        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
-
+        
        echo "=== Helm releases ==="
        helm list --all-namespaces
      # the following would install only changed charts, but we only have one chart so 
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -22,11 +22,9 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -133,7 +131,6 @@ jobs:
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
          outputs: type=registry
-          no-cache: true

  build-model-server-image:
    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -161,7 +158,6 @@ jobs:
          push: true
          outputs: type=registry
          provenance: false
-          no-cache: true

  build-integration-image:
    needs: prepare-build
@@ -195,7 +191,6 @@ jobs:
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
          push: true
          outputs: type=registry
-          no-cache: true

  integration-tests:
    needs:
@@ -342,11 +337,9 @@ jobs:
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -19,11 +19,9 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -130,7 +128,6 @@ jobs:
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
          outputs: type=registry
-          no-cache: true

  build-model-server-image:
    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -158,7 +155,6 @@ jobs:
          push: true
          outputs: type=registry
          provenance: false
-          no-cache: true

  build-integration-image:
    needs: prepare-build
@@ -192,7 +188,6 @@ jobs:
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
          push: true
          outputs: type=registry
-          no-cache: true

  integration-tests-mit:
    needs:
@@ -339,11 +334,9 @@ jobs:
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -56,8 +56,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  build-backend-image:
    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -89,8 +87,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  build-model-server-image:
    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -122,8 +118,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  playwright-tests:
    needs: [build-web-image, build-backend-image, build-model-server-image]
@@ -185,21 +179,16 @@ jobs:
        working-directory: ./web
        run: npx playwright install --with-deps

-      - name: Create .env file for Docker Compose
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          AUTH_TYPE=basic
-          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
-          EXA_API_KEY=${{ env.EXA_API_KEY }}
-          REQUIRE_EMAIL_VERIFICATION=false
-          DISABLE_TELEMETRY=true
-          IMAGE_TAG=test
-          EOF
-
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }} \
+          EXA_API_KEY=${{ env.EXA_API_KEY }} \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
        id: start_docker

@@ -239,16 +228,14 @@ jobs:

      - name: Run Playwright tests
        working-directory: ./web
-        run: |
-          # Create test-results directory to ensure it exists for artifact upload
-          mkdir -p test-results
-          npx playwright test
+        run: npx playwright test

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          # Includes test results and debug screenshots
-          name: playwright-test-results-${{ github.run_id }}
+          # Chromatic automatically defaults to the test-results directory.
+          # Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
+          name: test-results
          path: ./web/test-results
          retention-days: 30

--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -20,13 +20,11 @@ env:
  CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

  # Jira
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}

  # Gong
  GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -13,50 +13,6 @@
      "presentation": {
        "group": "1"
      }
-    },
-    {
-      "name": "Run All Onyx Services",
-      "configurations": [
-        "Web Server",
-        "Model Server",
-        "API Server",
-        "Slack Bot",
-        "Celery primary",
-        "Celery light",
-        "Celery heavy",
-        "Celery docfetching",
-        "Celery docprocessing",
-        "Celery beat",
-        "Celery monitoring",
-        "Celery user file processing"
-      ],
-      "presentation": {
-        "group": "1"
-      }
-    },
-    {
-      "name": "Web / Model / API",
-      "configurations": ["Web Server", "Model Server", "API Server"],
-      "presentation": {
-        "group": "1"
-      }
-    },
-    {
-      "name": "Celery (all)",
-      "configurations": [
-        "Celery primary",
-        "Celery light",
-        "Celery heavy",
-        "Celery docfetching",
-        "Celery docprocessing",
-        "Celery beat",
-        "Celery monitoring",
-        "Celery user file processing"
-      ],
-      "presentation": {
-        "group": "1"
-      },
-      "stopAll": true
    }
  ],
  "configurations": [
@@ -258,6 +214,250 @@
      "consoleTitle": "Celery docfetching Console",
      "justMyCode": false
    },
+    {
+      "name": "Run All Onyx Services",
+      "configurations": [
+        "Web Server",
+        "Model Server",
+        "API Server",
+        "Slack Bot",
+        "Celery primary",
+        "Celery light",
+        "Celery heavy",
+        "Celery docfetching",
+        "Celery docprocessing",
+        "Celery beat",
+        "Celery monitoring",
+        "Celery user file processing"
+      ],
+      "presentation": {
+        "group": "1"
+      }
+    },
+    {
+      "name": "Web / Model / API",
+      "configurations": ["Web Server", "Model Server", "API Server"],
+      "presentation": {
+        "group": "1"
+      }
+    },
+    {
+      "name": "Celery (all)",
+      "configurations": [
+        "Celery primary",
+        "Celery light",
+        "Celery heavy",
+        "Celery docfetching",
+        "Celery docprocessing",
+        "Celery beat",
+        "Celery monitoring",
+        "Celery user file processing"
+      ],
+      "presentation": {
+        "group": "1"
+      },
+      "stopAll": true
+    }
+  ],
+  "configurations": [
+    {
+      // Dummy entry used to label the group
+      "name": "--- Individual ---",
+      "type": "node",
+      "request": "launch",
+      "presentation": {
+        "group": "2",
+        "order": 0
+      }
+    },
+    {
+      "name": "Web Server",
+      "type": "node",
+      "request": "launch",
+      "cwd": "${workspaceRoot}/web",
+      "runtimeExecutable": "npm",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "runtimeArgs": ["run", "dev"],
+      "presentation": {
+        "group": "2"
+      },
+      "console": "integratedTerminal",
+      "consoleTitle": "Web Server Console"
+    },
+    {
+      "name": "Model Server",
+      "consoleName": "Model Server",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1"
+      },
+      "args": ["model_server.main:app", "--reload", "--port", "9000"],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Model Server Console"
+    },
+    {
+      "name": "API Server",
+      "consoleName": "API Server",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_DANSWER_MODEL_INTERACTIONS": "True",
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1"
+      },
+      "args": ["onyx.main:app", "--reload", "--port", "8080"],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "API Server Console"
+    },
+    // For the listener to access the Slack API,
+    // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
+    {
+      "name": "Slack Bot",
+      "consoleName": "Slack Bot",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "onyx/onyxbot/slack/listener.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Slack Bot Console"
+    },
+    {
+      "name": "Celery primary",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.primary",
+        "worker",
+        "--pool=threads",
+        "--concurrency=4",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=primary@%n",
+        "-Q",
+        "celery"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery primary Console"
+    },
+    {
+      "name": "Celery light",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.light",
+        "worker",
+        "--pool=threads",
+        "--concurrency=64",
+        "--prefetch-multiplier=8",
+        "--loglevel=INFO",
+        "--hostname=light@%n",
+        "-Q",
+        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery light Console"
+    },
+    {
+      "name": "Celery heavy",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.heavy",
+        "worker",
+        "--pool=threads",
+        "--concurrency=4",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=heavy@%n",
+        "-Q",
+        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery heavy Console"
+    },
+    {
+      "name": "Celery docfetching",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.docfetching",
+        "worker",
+        "--pool=threads",
+        "--concurrency=1",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=docfetching@%n",
+        "-Q",
+        "connector_doc_fetching,user_files_indexing"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery docfetching Console",
+      "justMyCode": false
+    },
    {
      "name": "Celery docprocessing",
      "type": "debugpy",
@@ -286,83 +486,8 @@
      "presentation": {
        "group": "2"
      },
-      "consoleTitle": "Celery docprocessing Console"
-    },
-    {
-      "name": "Celery beat",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.beat",
-        "beat",
-        "--loglevel=INFO"
-      ],
-      "presentation": {
-        "group": "2"
-      },
      "consoleTitle": "Celery beat Console"
    },
-    {
-      "name": "Celery monitoring",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {},
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.monitoring",
-        "worker",
-        "--pool=solo",
-        "--concurrency=1",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=monitoring@%n",
-        "-Q",
-        "monitoring"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery monitoring Console"
-    },
-    {
-      "name": "Celery user file processing",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.user_file_processing",
-        "worker",
-        "--loglevel=INFO",
-        "--hostname=user_file_processing@%n",
-        "--pool=threads",
-        "-Q",
-        "user_file_processing,user_file_project_sync"
-      ],
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery user file processing Console"
-    },
    {
      "name": "Pytest",
      "consoleName": "Pytest",
@@ -378,8 +503,8 @@
      },
      "args": [
        "-v"
-        // Specify a specific module/test to run or provide nothing to run all tests
-        // "tests/unit/onyx/llm/answering/test_prune_and_merge.py"
+        // Specify a sepcific module/test to run or provide nothing to run all tests
+        //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
      ],
      "presentation": {
        "group": "2"
@@ -463,6 +588,144 @@
        "group": "3"
      }
    },
+    {
+      "name": "Celery monitoring",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {},
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.monitoring",
+        "worker",
+        "--pool=solo",
+        "--concurrency=1",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=monitoring@%n",
+        "-Q",
+        "monitoring"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery monitoring Console"
+    },
+    {
+      "name": "Celery beat",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.beat",
+        "beat",
+        "--loglevel=INFO"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery beat Console"
+    },
+    {
+      "name": "Celery user file processing",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.user_file_processing",
+        "worker",
+        "--loglevel=INFO",
+        "--hostname=user_file_processing@%n",
+        "--pool=threads",
+        "-Q",
+        "user_file_processing,user_file_project_sync"
+      ],
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery user file processing Console"
+    },
+    {
+      "name": "Pytest",
+      "consoleName": "Pytest",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "pytest",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-v"
+        // Specify a sepcific module/test to run or provide nothing to run all tests
+        //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Pytest Console"
+    },
+    {
+      // Dummy entry used to label the group
+      "name": "--- Tasks ---",
+      "type": "node",
+      "request": "launch",
+      "presentation": {
+        "group": "3",
+        "order": 0
+      }
+    },
+    {
+      "name": "Clear and Restart External Volumes and Containers",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "${workspaceFolder}/backend/scripts/restart_containers.sh"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "stopOnEntry": true,
+      "presentation": {
+        "group": "3"
+      }
+    },
+    {
+      "name": "Install Python Requirements",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "-c",
+        "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "3"
+      }
+    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -105,11 +105,6 @@ pip install -r backend/requirements/ee.txt
 pip install -r backend/requirements/model_server.txt
 ```

-Fix vscode/cursor auto-imports:
-```bash
-pip install -e .
-```
-
 Install Playwright for Python (headless browser required by the Web Connector)

 In the activated Python virtualenv, install Playwright for Python by running:
--- a/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
+++ b/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
@@ -128,7 +128,7 @@ def upgrade() -> None:
                    AND a.attname = 'cc_pair_id'
                  )
              ) LOOP
-                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
+                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT %I', r.conname);
              END LOOP;
            END$$;
        """
--- a/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
+++ b/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
@@ -167,10 +167,7 @@ def upgrade() -> None:
        )

        # Delete related records
-        # Clean child tables first to satisfy foreign key constraints,
-        # then the parent tables
        tables_to_clean = [
-            ("index_attempt_errors", "connector_credential_pair_id"),
            ("index_attempt", "connector_credential_pair_id"),
            ("background_error", "cc_pair_id"),
            ("document_set__connector_credential_pair", "connector_credential_pair_id"),
@@ -245,7 +242,7 @@ def upgrade() -> None:
                  AND t.relname = 'user_file'
                  AND ft.relname = 'connector_credential_pair'
              ) LOOP
-                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
+                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT %I', r.conname);
              END LOOP;
            END$$;
        """
--- a/backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py
+++ b/backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py
@@ -1,37 +0,0 @@
-"""Add image input support to model config
-
-Revision ID: 64bd5677aeb6
-Revises: b30353be4eec
-Create Date: 2025-09-28 15:48:12.003612
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "64bd5677aeb6"
-down_revision = "b30353be4eec"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "model_configuration",
-        sa.Column("supports_image_input", sa.Boolean(), nullable=True),
-    )
-
-    # Seems to be left over from when model visibility was introduced and a nullable field.
-    # Set any null is_visible values to False
-    connection = op.get_bind()
-    connection.execute(
-        sa.text(
-            "UPDATE model_configuration SET is_visible = false WHERE is_visible IS NULL"
-        )
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("model_configuration", "supports_image_input")
--- a/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
+++ b/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
@@ -1,123 +0,0 @@
-"""add_mcp_auth_performer
-
-Revision ID: b30353be4eec
-Revises: 2b75d0a8ffcb
-Create Date: 2025-09-13 14:58:08.413534
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from onyx.db.enums import MCPAuthenticationPerformer, MCPTransport
-
-
-# revision identifiers, used by Alembic.
-revision = "b30353be4eec"
-down_revision = "2b75d0a8ffcb"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """moving to a better way of handling auth performer and transport"""
-    # Add nullable column first for backward compatibility
-    op.add_column(
-        "mcp_server",
-        sa.Column(
-            "auth_performer",
-            sa.Enum(MCPAuthenticationPerformer, native_enum=False),
-            nullable=True,
-        ),
-    )
-
-    op.add_column(
-        "mcp_server",
-        sa.Column(
-            "transport",
-            sa.Enum(MCPTransport, native_enum=False),
-            nullable=True,
-        ),
-    )
-
-    # # Backfill values using existing data and inference rules
-    bind = op.get_bind()
-
-    # 1) OAUTH servers are always PER_USER
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'PER_USER'
-        WHERE auth_type = 'OAUTH'
-        """
-        )
-    )
-
-    # 2) If there is no admin connection config, mark as ADMIN (and not set yet)
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'ADMIN'
-        WHERE admin_connection_config_id IS NULL
-          AND auth_performer IS NULL
-        """
-        )
-    )
-
-    # 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server AS ms
-        SET auth_performer = 'PER_USER'
-        FROM mcp_connection_config AS mcc
-        WHERE mcc.mcp_server_id = ms.id
-          AND COALESCE(mcc.user_email, '') <> ''
-          AND ms.auth_performer IS NULL
-        """
-        )
-    )
-
-    # 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'ADMIN'
-        WHERE auth_performer IS NULL
-        """
-        )
-    )
-
-    # Finally, make the column non-nullable
-    op.alter_column(
-        "mcp_server",
-        "auth_performer",
-        existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),
-        nullable=False,
-    )
-
-    # Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET transport = 'STREAMABLE_HTTP'
-        WHERE transport IS NULL
-        """
-        )
-    )
-
-    op.alter_column(
-        "mcp_server",
-        "transport",
-        existing_type=sa.Enum(MCPTransport, native_enum=False),
-        nullable=False,
-    )
-
-
-def downgrade() -> None:
-    """remove cols"""
-    op.drop_column("mcp_server", "transport")
-    op.drop_column("mcp_server", "auth_performer")
--- a/backend/ee/onyx/external_permissions/confluence/space_access.py
+++ b/backend/ee/onyx/external_permissions/confluence/space_access.py
@@ -124,9 +124,9 @@ def get_space_permission(
        and not space_permissions.external_user_group_ids
    ):
        logger.warning(
-            f"No permissions found for space '{space_key}'. This is very unlikely "
-            "to be correct and is more likely caused by an access token with "
-            "insufficient permissions. Make sure that the access token has Admin "
+            f"No permissions found for space '{space_key}'. This is very unlikely"
+            "to be correct and is more likely caused by an access token with"
+            "insufficient permissions. Make sure that the access token has Admin"
            f"permissions for space '{space_key}'"
        )

--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -26,7 +26,7 @@ def _get_slim_doc_generator(
        else 0.0
    )

-    return gmail_connector.retrieve_all_slim_docs_perm_sync(
+    return gmail_connector.retrieve_all_slim_documents(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -34,7 +34,7 @@ def _get_slim_doc_generator(
        else 0.0
    )

-    return google_drive_connector.retrieve_all_slim_docs_perm_sync(
+    return google_drive_connector.retrieve_all_slim_documents(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
--- a/backend/ee/onyx/external_permissions/jira/page_access.py
+++ b/backend/ee/onyx/external_permissions/jira/page_access.py
@@ -59,7 +59,7 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:

    for raw_perm in permissions:
        if not hasattr(raw_perm, "raw"):
-            logger.warning(f"Expected a 'raw' field, but none was found: {raw_perm=}")
+            logger.warn(f"Expected a 'raw' field, but none was found: {raw_perm=}")
            continue

        permission = Permission(**raw_perm.raw)
@@ -71,14 +71,14 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
        # In order to associate this permission to some Atlassian entity, we need the "Holder".
        # If this doesn't exist, then we cannot associate this permission to anyone; just skip.
        if not permission.holder:
-            logger.warning(
+            logger.warn(
                f"Expected to find a permission holder, but none was found: {permission=}"
            )
            continue

        type = permission.holder.get("type")
        if not type:
-            logger.warning(
+            logger.warn(
                f"Expected to find the type of permission holder, but none was found: {permission=}"
            )
            continue
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -105,9 +105,7 @@ def _get_slack_document_access(
    channel_permissions: dict[str, ExternalAccess],
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[DocExternalAccess, None, None]:
-    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
-        callback=callback
-    )
+    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)

    for doc_metadata_batch in slim_doc_generator:
        for doc_metadata in doc_metadata_batch:
--- a/backend/ee/onyx/external_permissions/utils.py
+++ b/backend/ee/onyx/external_permissions/utils.py
@@ -4,7 +4,7 @@ from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFun
 from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -17,7 +17,7 @@ def generic_doc_sync(
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
    doc_source: DocumentSource,
-    slim_connector: SlimConnectorWithPermSync,
+    slim_connector: SlimConnector,
    label: str,
 ) -> Generator[DocExternalAccess, None, None]:
    """
@@ -40,7 +40,7 @@ def generic_doc_sync(
    newly_fetched_doc_ids: set[str] = set()

    logger.info(f"Fetching all slim documents from {doc_source}")
-    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(callback=callback):
+    for doc_batch in slim_connector.retrieve_all_slim_documents(callback=callback):
        logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")

        if callback:
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -16,7 +16,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    # saml
    ("/auth/saml/authorize", {"GET"}),
    ("/auth/saml/callback", {"POST"}),
-    ("/auth/saml/callback", {"GET"}),
    ("/auth/saml/logout", {"POST"}),
 ]

--- a/backend/ee/onyx/server/saml.py
+++ b/backend/ee/onyx/server/saml.py
@@ -110,6 +110,7 @@ async def upsert_saml_user(email: str) -> User:


 async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
+    form_data = await request.form()
    if request.client is None:
        raise ValueError("Invalid request for SAML")

@@ -124,27 +125,14 @@ async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
        "post_data": {},
        "get_data": {},
    }
-
-    # Handle query parameters (for GET requests)
    if request.query_params:
-        rv["get_data"] = dict(request.query_params)
-
-    # Handle form data (for POST requests)
-    if request.method == "POST":
-        form_data = await request.form()
-        if "SAMLResponse" in form_data:
-            SAMLResponse = form_data["SAMLResponse"]
-            rv["post_data"]["SAMLResponse"] = SAMLResponse
-        if "RelayState" in form_data:
-            RelayState = form_data["RelayState"]
-            rv["post_data"]["RelayState"] = RelayState
-    else:
-        # For GET requests, check if SAMLResponse is in query params
-        if "SAMLResponse" in request.query_params:
-            rv["get_data"]["SAMLResponse"] = request.query_params["SAMLResponse"]
-        if "RelayState" in request.query_params:
-            rv["get_data"]["RelayState"] = request.query_params["RelayState"]
-
+        rv["get_data"] = (request.query_params,)
+    if "SAMLResponse" in form_data:
+        SAMLResponse = form_data["SAMLResponse"]
+        rv["post_data"]["SAMLResponse"] = SAMLResponse
+    if "RelayState" in form_data:
+        RelayState = form_data["RelayState"]
+        rv["post_data"]["RelayState"] = RelayState
    return rv


@@ -160,27 +148,10 @@ async def saml_login(request: Request) -> SAMLAuthorizeResponse:
    return SAMLAuthorizeResponse(authorization_url=callback_url)


-@router.get("/callback")
-async def saml_login_callback_get(
-    request: Request,
-    db_session: Session = Depends(get_session),
-) -> Response:
-    """Handle SAML callback via HTTP-Redirect binding (GET request)"""
-    return await _process_saml_callback(request, db_session)
-
-
@router.post("/callback")
 async def saml_login_callback(
    request: Request,
    db_session: Session = Depends(get_session),
-) -> Response:
-    """Handle SAML callback via HTTP-POST binding (POST request)"""
-    return await _process_saml_callback(request, db_session)
-
-
-async def _process_saml_callback(
-    request: Request,
-    db_session: Session,
 ) -> Response:
    req = await prepare_from_fastapi_request(request)
    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -6,6 +6,7 @@ from typing import Optional
 from fastapi import APIRouter
 from fastapi import HTTPException
 from fastapi import Request
+from litellm.exceptions import RateLimitError
 from sentence_transformers import CrossEncoder  # type: ignore
 from sentence_transformers import SentenceTransformer  # type: ignore

@@ -206,8 +207,6 @@ async def route_bi_encoder_embed(
 async def process_embed_request(
    embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
 ) -> EmbedResponse:
-    from litellm.exceptions import RateLimitError
-
    # Only local models should use this endpoint - API providers should make direct API calls
    if embed_request.provider_type is not None:
        raise ValueError(
--- a/backend/onyx/agents/agent_search/dr/conditional_edges.py
+++ b/backend/onyx/agents/agent_search/dr/conditional_edges.py
@@ -24,8 +24,6 @@ def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
        return END
    elif next_tool_name == DRPath.LOGGER.value:
        return DRPath.LOGGER
-    elif next_tool_name == DRPath.CLOSER.value:
-        return DRPath.CLOSER
    else:
        return DRPath.ORCHESTRATOR

--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -643,16 +643,9 @@ def clarifier(
                        datetime_aware=True,
                    )

-                system_prompt_to_use_content = build_citations_system_message(
+                system_prompt_to_use = build_citations_system_message(
                    prompt_config
                ).content
-                system_prompt_to_use: str = cast(str, system_prompt_to_use_content)
-                if graph_config.inputs.project_instructions:
-                    system_prompt_to_use = (
-                        system_prompt_to_use
-                        + PROJECT_INSTRUCTIONS_SEPARATOR
-                        + graph_config.inputs.project_instructions
-                    )
                user_prompt_to_use = build_citations_user_message(
                    user_query=original_question,
                    files=[],
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
@@ -181,15 +181,6 @@ def orchestrator(
            remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[research_type]

        elif remaining_time_budget <= 0:
-
-            write_custom_event(
-                current_step_nr,
-                SectionEnd(),
-                writer,
-            )
-
-            current_step_nr += 1
-
            return OrchestrationUpdate(
                tools_used=[DRPath.CLOSER.value],
                current_step_nr=current_step_nr,
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
@@ -1,147 +0,0 @@
-import json
-from concurrent.futures import ThreadPoolExecutor
-
-import requests
-
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetContent,
-)
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetSearchProvider,
-)
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetSearchResult,
-)
-from onyx.configs.chat_configs import SERPER_API_KEY
-from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
-from onyx.utils.retry_wrapper import retry_builder
-
-SERPER_SEARCH_URL = "https://google.serper.dev/search"
-SERPER_CONTENTS_URL = "https://scrape.serper.dev"
-
-
-class SerperClient(InternetSearchProvider):
-    def __init__(self, api_key: str | None = SERPER_API_KEY) -> None:
-        self.headers = {
-            "X-API-KEY": api_key,
-            "Content-Type": "application/json",
-        }
-
-    @retry_builder(tries=3, delay=1, backoff=2)
-    def search(self, query: str) -> list[InternetSearchResult]:
-        payload = {
-            "q": query,
-        }
-
-        response = requests.post(
-            SERPER_SEARCH_URL,
-            headers=self.headers,
-            data=json.dumps(payload),
-        )
-
-        response.raise_for_status()
-
-        results = response.json()
-        organic_results = results["organic"]
-
-        return [
-            InternetSearchResult(
-                title=result["title"],
-                link=result["link"],
-                snippet=result["snippet"],
-                author=None,
-                published_date=None,
-            )
-            for result in organic_results
-        ]
-
-    def contents(self, urls: list[str]) -> list[InternetContent]:
-        if not urls:
-            return []
-
-        # Serper can responds with 500s regularly. We want to retry,
-        # but in the event of failure, return an unsuccesful scrape.
-        def safe_get_webpage_content(url: str) -> InternetContent:
-            try:
-                return self._get_webpage_content(url)
-            except Exception:
-                return InternetContent(
-                    title="",
-                    link=url,
-                    full_content="",
-                    published_date=None,
-                    scrape_successful=False,
-                )
-
-        with ThreadPoolExecutor(max_workers=min(8, len(urls))) as e:
-            return list(e.map(safe_get_webpage_content, urls))
-
-    @retry_builder(tries=3, delay=1, backoff=2)
-    def _get_webpage_content(self, url: str) -> InternetContent:
-        payload = {
-            "url": url,
-        }
-
-        response = requests.post(
-            SERPER_CONTENTS_URL,
-            headers=self.headers,
-            data=json.dumps(payload),
-        )
-
-        # 400 returned when serper cannot scrape
-        if response.status_code == 400:
-            return InternetContent(
-                title="",
-                link=url,
-                full_content="",
-                published_date=None,
-                scrape_successful=False,
-            )
-
-        response.raise_for_status()
-
-        response_json = response.json()
-
-        # Response only guarantees text
-        text = response_json["text"]
-
-        # metadata & jsonld is not guaranteed to be present
-        metadata = response_json.get("metadata", {})
-        jsonld = response_json.get("jsonld", {})
-
-        title = extract_title_from_metadata(metadata)
-
-        # Serper does not provide a reliable mechanism to extract the url
-        response_url = url
-        published_date_str = extract_published_date_from_jsonld(jsonld)
-        published_date = None
-
-        if published_date_str:
-            try:
-                published_date = time_str_to_utc(published_date_str)
-            except Exception:
-                published_date = None
-
-        return InternetContent(
-            title=title or "",
-            link=response_url,
-            full_content=text or "",
-            published_date=published_date,
-        )
-
-
-def extract_title_from_metadata(metadata: dict[str, str]) -> str | None:
-    keys = ["title", "og:title"]
-    return extract_value_from_dict(metadata, keys)
-
-
-def extract_published_date_from_jsonld(jsonld: dict[str, str]) -> str | None:
-    keys = ["dateModified"]
-    return extract_value_from_dict(jsonld, keys)
-
-
-def extract_value_from_dict(data: dict[str, str], keys: list[str]) -> str | None:
-    for key in keys:
-        if key in data:
-            return data[key]
-    return None
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
@@ -26,7 +26,6 @@ class InternetContent(BaseModel):
    link: str
    full_content: str
    published_date: datetime | None = None
-    scrape_successful: bool = True


 class InternetSearchProvider(ABC):
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/providers.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/providers.py
@@ -1,19 +1,13 @@
 from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import (
    ExaClient,
 )
-from onyx.agents.agent_search.dr.sub_agents.web_search.clients.serper_client import (
-    SerperClient,
-)
 from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
    InternetSearchProvider,
 )
 from onyx.configs.chat_configs import EXA_API_KEY
-from onyx.configs.chat_configs import SERPER_API_KEY


 def get_default_provider() -> InternetSearchProvider | None:
    if EXA_API_KEY:
        return ExaClient()
-    if SERPER_API_KEY:
-        return SerperClient()
    return None
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
@@ -34,7 +34,7 @@ def dummy_inference_section_from_internet_content(
            boost=1,
            recency_bias=1.0,
            score=1.0,
-            hidden=(not result.scrape_successful),
+            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary=truncated_content,
--- a/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
@@ -8,6 +8,8 @@ from typing import TypeVar
 from langchain.schema.language_model import LanguageModelInput
 from langchain_core.messages import HumanMessage
 from langgraph.types import StreamWriter
+from litellm import get_supported_openai_params
+from litellm import supports_response_schema
 from pydantic import BaseModel

 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
@@ -145,7 +147,6 @@ def invoke_llm_json(
    Invoke an LLM, forcing it to respond in a specified JSON format if possible,
    and return an object of that schema.
    """
-    from litellm.utils import get_supported_openai_params, supports_response_schema

    # check if the model supports response_format: json_schema
    supports_json = "response_format" in (
--- a/backend/onyx/background/celery/celery_utils.py
+++ b/backend/onyx/background/celery/celery_utils.py
@@ -19,9 +19,7 @@ from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SlimConnector
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import Document
-from onyx.connectors.models import SlimDocument
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -32,7 +30,7 @@ PRUNING_CHECKPOINTED_BATCH_SIZE = 32


 def document_batch_to_ids(
-    doc_batch: Iterator[list[Document]] | Iterator[list[SlimDocument]],
+    doc_batch: Iterator[list[Document]],
 ) -> Generator[set[str], None, None]:
    for doc_list in doc_batch:
        yield {doc.id for doc in doc_list}
@@ -43,24 +41,20 @@ def extract_ids_from_runnable_connector(
    callback: IndexingHeartbeatInterface | None = None,
 ) -> set[str]:
    """
-    If the given connector is neither a SlimConnector nor a SlimConnectorWithPermSync, just pull
+    If the SlimConnector hasnt been implemented for the given connector, just pull
    all docs using the load_from_state and grab out the IDs.

    Optionally, a callback can be passed to handle the length of each document batch.
    """
    all_connector_doc_ids: set[str] = set()

-    doc_batch_id_generator = None
    if isinstance(runnable_connector, SlimConnector):
-        doc_batch_id_generator = document_batch_to_ids(
-            runnable_connector.retrieve_all_slim_docs()
-        )
-    elif isinstance(runnable_connector, SlimConnectorWithPermSync):
-        doc_batch_id_generator = document_batch_to_ids(
-            runnable_connector.retrieve_all_slim_docs_perm_sync()
-        )
-    # If the connector isn't slim, fall back to running it normally to get ids
-    elif isinstance(runnable_connector, LoadConnector):
+        for metadata_batch in runnable_connector.retrieve_all_slim_documents():
+            all_connector_doc_ids.update({doc.id for doc in metadata_batch})
+
+    doc_batch_id_generator = None
+
+    if isinstance(runnable_connector, LoadConnector):
        doc_batch_id_generator = document_batch_to_ids(
            runnable_connector.load_from_state()
        )
@@ -84,14 +78,13 @@ def extract_ids_from_runnable_connector(
        raise RuntimeError("Pruning job could not find a valid runnable_connector.")

    # this function is called per batch for rate limiting
-    doc_batch_processing_func = (
-        rate_limit_builder(
+    def doc_batch_processing_func(doc_batch_ids: set[str]) -> set[str]:
+        return doc_batch_ids
+
+    if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE:
+        doc_batch_processing_func = rate_limit_builder(
            max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
        )(lambda x: x)
-        if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
-        else lambda x: x
-    )
-
    for doc_batch_ids in doc_batch_id_generator:
        if callback:
            if callback.should_stop():
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -41,7 +41,7 @@ beat_task_templates: list[dict] = [
        "task": OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
        "schedule": timedelta(minutes=1),
        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
+            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
            "queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
        },
@@ -85,9 +85,9 @@ beat_task_templates: list[dict] = [
    {
        "name": "check-for-index-attempt-cleanup",
        "task": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
-        "schedule": timedelta(minutes=30),
+        "schedule": timedelta(hours=1),
        "options": {
-            "priority": OnyxCeleryPriority.MEDIUM,
+            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -89,7 +89,6 @@ from onyx.indexing.adapters.document_indexing_adapter import (
    DocumentIndexingBatchAdapter,
 )
 from onyx.indexing.embedder import DefaultIndexingEmbedder
-from onyx.indexing.indexing_pipeline import run_indexing_pipeline
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import (
    InformationContentClassificationModel,
@@ -1271,6 +1270,8 @@ def _docprocessing_task(
    tenant_id: str,
    batch_num: int,
 ) -> None:
+    from onyx.indexing.indexing_pipeline import run_indexing_pipeline
+
    start_time = time.monotonic()

    if tenant_id:
--- a/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
@@ -0,0 +1,266 @@
+import time
+from typing import List
+
+from celery import shared_task
+from celery import Task
+from celery.exceptions import SoftTimeLimitExceeded
+from redis.lock import Lock as RedisLock
+from sqlalchemy.orm import Session
+from tenacity import RetryError
+
+from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
+from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
+from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
+from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
+from onyx.configs.app_configs import JOB_TIMEOUT
+from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import OnyxRedisLocks
+from onyx.db.connector_credential_pair import (
+    get_connector_credential_pairs_with_user_files,
+)
+from onyx.db.document import get_document
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.models import Document
+from onyx.db.models import DocumentByConnectorCredentialPair
+from onyx.db.search_settings import get_active_search_settings
+from onyx.db.user_documents import fetch_user_files_for_documents
+from onyx.db.user_documents import fetch_user_folders_for_documents
+from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.interfaces import VespaDocumentUserFields
+from onyx.httpx.httpx_pool import HttpxPool
+from onyx.redis.redis_pool import get_redis_client
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@shared_task(
+    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
+    ignore_result=True,
+    soft_time_limit=JOB_TIMEOUT,
+    trail=False,
+    bind=True,
+)
+def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None:
+    """Runs periodically to check for documents that need user file folder metadata updates.
+    This task fetches all connector credential pairs with user files, gets the documents
+    associated with them, and updates the user file and folder metadata in Vespa.
+    """
+
+    time_start = time.monotonic()
+
+    r = get_redis_client()
+
+    lock_beat: RedisLock = r.lock(
+        OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK,
+        timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT,
+    )
+
+    # these tasks should never overlap
+    if not lock_beat.acquire(blocking=False):
+        return None
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            # Get all connector credential pairs that have user files
+            cc_pairs = get_connector_credential_pairs_with_user_files(db_session)
+
+            if not cc_pairs:
+                task_logger.info("No connector credential pairs with user files found")
+                return True
+
+            # Get all documents associated with these cc_pairs
+            document_ids = get_documents_for_cc_pairs(cc_pairs, db_session)
+
+            if not document_ids:
+                task_logger.info(
+                    "No documents found for connector credential pairs with user files"
+                )
+                return True
+
+            # Fetch current user file and folder IDs for these documents
+            doc_id_to_user_file_id = fetch_user_files_for_documents(
+                document_ids=document_ids, db_session=db_session
+            )
+            doc_id_to_user_folder_id = fetch_user_folders_for_documents(
+                document_ids=document_ids, db_session=db_session
+            )
+
+            # Update Vespa metadata for each document
+            for doc_id in document_ids:
+                user_file_id = doc_id_to_user_file_id.get(doc_id)
+                user_folder_id = doc_id_to_user_folder_id.get(doc_id)
+
+                if user_file_id is not None or user_folder_id is not None:
+                    # Schedule a task to update the document metadata
+                    update_user_file_folder_metadata.apply_async(
+                        args=(doc_id,),  # Use tuple instead of list for args
+                        kwargs={
+                            "tenant_id": tenant_id,
+                            "user_file_id": user_file_id,
+                            "user_folder_id": user_folder_id,
+                        },
+                        queue="vespa_metadata_sync",
+                    )
+
+            task_logger.info(
+                f"Scheduled metadata updates for {len(document_ids)} documents. "
+                f"Elapsed time: {time.monotonic() - time_start:.2f}s"
+            )
+
+            return True
+    except Exception as e:
+        task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}")
+        return False
+    finally:
+        lock_beat.release()
+
+
+def get_documents_for_cc_pairs(
+    cc_pairs: List[ConnectorCredentialPair], db_session: Session
+) -> List[str]:
+    """Get all document IDs associated with the given connector credential pairs."""
+    if not cc_pairs:
+        return []
+
+    cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs]
+
+    # Query to get document IDs from DocumentByConnectorCredentialPair
+    # Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id
+    doc_cc_pairs = (
+        db_session.query(Document.id)
+        .join(
+            DocumentByConnectorCredentialPair,
+            Document.id == DocumentByConnectorCredentialPair.id,
+        )
+        .filter(
+            db_session.query(ConnectorCredentialPair)
+            .filter(
+                ConnectorCredentialPair.id.in_(cc_pair_ids),
+                ConnectorCredentialPair.connector_id
+                == DocumentByConnectorCredentialPair.connector_id,
+                ConnectorCredentialPair.credential_id
+                == DocumentByConnectorCredentialPair.credential_id,
+            )
+            .exists()
+        )
+        .all()
+    )
+
+    return [doc_id for (doc_id,) in doc_cc_pairs]
+
+
+@shared_task(
+    name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA,
+    bind=True,
+    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
+    time_limit=LIGHT_TIME_LIMIT,
+    max_retries=3,
+)
+def update_user_file_folder_metadata(
+    self: Task,
+    document_id: str,
+    *,
+    tenant_id: str,
+    user_file_id: int | None,
+    user_folder_id: int | None,
+) -> bool:
+    """Updates the user file and folder metadata for a document in Vespa."""
+    start = time.monotonic()
+    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            active_search_settings = get_active_search_settings(db_session)
+            doc_index = get_default_document_index(
+                search_settings=active_search_settings.primary,
+                secondary_search_settings=active_search_settings.secondary,
+                httpx_client=HttpxPool.get("vespa"),
+            )
+
+            retry_index = RetryDocumentIndex(doc_index)
+
+            doc = get_document(document_id, db_session)
+            if not doc:
+                elapsed = time.monotonic() - start
+                task_logger.info(
+                    f"doc={document_id} "
+                    f"action=no_operation "
+                    f"elapsed={elapsed:.2f}"
+                )
+                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
+                return False
+
+            # Create user fields object with file and folder IDs
+            user_fields = VespaDocumentUserFields(
+                user_file_id=str(user_file_id) if user_file_id is not None else None,
+                user_folder_id=(
+                    str(user_folder_id) if user_folder_id is not None else None
+                ),
+            )
+
+            # Update Vespa. OK if doc doesn't exist. Raises exception otherwise.
+            chunks_affected = retry_index.update_single(
+                document_id,
+                tenant_id=tenant_id,
+                chunk_count=doc.chunk_count,
+                fields=None,  # We're only updating user fields
+                user_fields=user_fields,
+            )
+
+            elapsed = time.monotonic() - start
+            task_logger.info(
+                f"doc={document_id} "
+                f"action=user_file_folder_sync "
+                f"user_file_id={user_file_id} "
+                f"user_folder_id={user_folder_id} "
+                f"chunks={chunks_affected} "
+                f"elapsed={elapsed:.2f}"
+            )
+            completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
+            return True
+
+    except SoftTimeLimitExceeded:
+        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
+        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
+    except Exception as ex:
+        e: Exception | None = None
+        while True:
+            if isinstance(ex, RetryError):
+                task_logger.warning(
+                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
+                )
+
+                # only set the inner exception if it is of type Exception
+                e_temp = ex.last_attempt.exception()
+                if isinstance(e_temp, Exception):
+                    e = e_temp
+            else:
+                e = ex
+
+            task_logger.exception(
+                f"update_user_file_folder_metadata exceptioned: doc={document_id}"
+            )
+
+            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
+            if (
+                self.max_retries is not None
+                and self.request.retries >= self.max_retries
+            ):
+                completion_status = (
+                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
+                )
+
+            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
+            countdown = 2 ** (self.request.retries + 4)
+            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
+            break  # we won't hit this, but it looks weird not to have it
+    finally:
+        task_logger.info(
+            f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}"
+        )
+
+    return False
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -236,11 +236,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
                    f"process_single_user_file - Indexing pipeline completed ={index_pipeline_result}"
                )

-                if (
-                    index_pipeline_result.failures
-                    or index_pipeline_result.total_docs != len(documents)
-                    or index_pipeline_result.total_chunks == 0
-                ):
+                if index_pipeline_result.failures:
                    task_logger.error(
                        f"process_single_user_file - Indexing pipeline failed id={user_file_id}"
                    )
@@ -546,78 +542,39 @@ def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
                    task_logger.warning(
                        f"Tenant={tenant_id} failed Vespa update for doc_id={new_uuid} - {e.__class__.__name__}"
                    )
+
            # Update search_doc records to refer to the UUID string
-            # we are not using document_id_migrated = false because if the migration already completed,
-            # it will not run again and we will not update the search_doc records because of the issue currently fixed
-            user_files = (
-                db_session.execute(
-                    sa.select(UserFile).where(UserFile.document_id.is_not(None))
+            uf_id_subq = (
+                sa.select(sa.cast(UserFile.id, sa.String))
+                .where(
+                    UserFile.document_id.is_not(None),
+                    UserFile.document_id_migrated.is_(False),
+                    SearchDoc.document_id == UserFile.document_id,
                )
-                .scalars()
-                .all()
+                .correlate(SearchDoc)
+                .scalar_subquery()
            )
-
-            # Query all SearchDocs that need updating
-            search_docs = (
-                db_session.execute(
-                    sa.select(SearchDoc).where(
-                        SearchDoc.document_id.like("%FILE_CONNECTOR__%")
+            db_session.execute(
+                sa.update(SearchDoc)
+                .where(
+                    sa.exists(
+                        sa.select(sa.literal(1)).where(
+                            UserFile.document_id.is_not(None),
+                            UserFile.document_id_migrated.is_(False),
+                            SearchDoc.document_id == UserFile.document_id,
+                        )
                    )
                )
-                .scalars()
-                .all()
+                .values(document_id=uf_id_subq)
            )
-
-            task_logger.info(f"Found {len(user_files)} user files to update")
-            task_logger.info(f"Found {len(search_docs)} search docs to update")
-
-            # Build a map of normalized doc IDs to SearchDocs
-            search_doc_map: dict[str, list[SearchDoc]] = {}
-            for sd in search_docs:
-                doc_id = sd.document_id
-                if search_doc_map.get(doc_id) is None:
-                    search_doc_map[doc_id] = []
-                search_doc_map[doc_id].append(sd)
-
-            task_logger.debug(
-                f"Built search doc map with {len(search_doc_map)} entries"
-            )
-            ids_preview = list(search_doc_map.keys())[:5]
-            task_logger.debug(
-                f"First few search_doc_map ids: {ids_preview if ids_preview else 'No ids found'}"
-            )
-            task_logger.debug(
-                f"search_doc_map total items: {sum(len(docs) for docs in search_doc_map.values())}"
-            )
-            # Process each UserFile and update matching SearchDocs
-            updated_count = 0
-            for uf in user_files:
-                doc_id = uf.document_id
-                if doc_id.startswith("USER_FILE_CONNECTOR__"):
-                    doc_id = "FILE_CONNECTOR__" + doc_id[len("USER_FILE_CONNECTOR__") :]
-
-                task_logger.debug(f"Processing user file {uf.id} with doc_id {doc_id}")
-                task_logger.debug(
-                    f"doc_id in search_doc_map: {doc_id in search_doc_map}"
+            # Mark all processed user_files as migrated
+            db_session.execute(
+                sa.update(UserFile)
+                .where(
+                    UserFile.document_id.is_not(None),
+                    UserFile.document_id_migrated.is_(False),
                )
-
-                if doc_id in search_doc_map:
-                    search_docs = search_doc_map[doc_id]
-                    task_logger.debug(
-                        f"Found {len(search_docs)} search docs to update for user file {uf.id}"
-                    )
-                    # Update the SearchDoc to use the UserFile's UUID
-                    for search_doc in search_docs:
-                        search_doc.document_id = str(uf.id)
-                        db_session.add(search_doc)
-
-                    # Mark UserFile as migrated
-                    uf.document_id_migrated = True
-                    db_session.add(uf)
-                    updated_count += 1
-
-            task_logger.info(
-                f"Updated {updated_count} SearchDoc records with new UUIDs"
+                .values(document_id_migrated=True)
            )
            db_session.commit()

--- a/backend/onyx/background/indexing/index_attempt_utils.py
+++ b/backend/onyx/background/indexing/index_attempt_utils.py
@@ -5,7 +5,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
 from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.models import IndexAttempt
-from onyx.db.models import IndexAttemptError


 def get_old_index_attempts(
@@ -22,10 +21,6 @@ def get_old_index_attempts(

 def cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:
    """Clean up multiple index attempts"""
-    db_session.query(IndexAttemptError).filter(
-        IndexAttemptError.index_attempt_id.in_(index_attempt_ids)
-    ).delete(synchronize_session=False)
-
    db_session.query(IndexAttempt).filter(
        IndexAttempt.id.in_(index_attempt_ids)
    ).delete(synchronize_session=False)
--- a/backend/onyx/background/indexing/run_docfetching.py
+++ b/backend/onyx/background/indexing/run_docfetching.py
@@ -28,7 +28,6 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.factory import instantiate_connector
 from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorStopSignal
@@ -102,6 +101,7 @@ def _get_connector_runner(
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """
+    from onyx.connectors.factory import instantiate_connector

    task = attempt.connector_credential_pair.connector.input_type

--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -138,34 +138,23 @@ def _build_project_llm_docs(

    project_file_id_set = set(project_file_ids)
    for f in in_memory_user_files:
+        # Only include files that belong to the project (not ad-hoc uploads)
        if project_file_id_set and (f.file_id in project_file_id_set):
-
-            def _strip_nuls(s: str) -> str:
-                return s.replace("\x00", "") if s else s
-
-            cleaned_filename = _strip_nuls(f.filename or str(f.file_id))
-
-            if f.file_type.is_text_file():
-                try:
-                    text_content = f.content.decode("utf-8", errors="ignore")
-                    text_content = _strip_nuls(text_content)
-                except Exception:
-                    text_content = ""
-
-                # Build a short blurb from the file content for better UI display
-                blurb = (
-                    (text_content[:200] + "...")
-                    if len(text_content) > 200
-                    else text_content
-                )
-            else:
-                # Non-text (e.g., images): do not decode bytes; keep empty content but allow citation
+            try:
+                text_content = f.content.decode("utf-8", errors="ignore")
+            except Exception:
                text_content = ""
-                blurb = f"[{f.file_type.value}] {cleaned_filename}"
+
+            # Build a short blurb from the file content for better UI display
+            blurb = (
+                (text_content[:200] + "...")
+                if len(text_content) > 200
+                else text_content
+            )

            # Provide basic metadata to improve SavedSearchDoc display
            file_metadata: dict[str, str | list[str]] = {
-                "filename": cleaned_filename,
+                "filename": f.filename or str(f.file_id),
                "file_type": f.file_type.value,
            }

@@ -174,7 +163,7 @@ def _build_project_llm_docs(
                    document_id=str(f.file_id),
                    content=text_content,
                    blurb=blurb,
-                    semantic_identifier=cleaned_filename,
+                    semantic_identifier=f.filename or str(f.file_id),
                    source_type=DocumentSource.USER_FILE,
                    metadata=file_metadata,
                    updated_at=None,
@@ -514,8 +503,7 @@ def stream_chat_message_objects(
            for fd in new_msg_req.current_message_files:
                uid = fd.get("user_file_id")
                if uid is not None:
-                    user_file_id = UUID(uid)
-                    user_file_ids.append(user_file_id)
+                    user_file_ids.append(uid)

        # Load in user files into memory and create search tool override kwargs if needed
        # if we have enough tokens, we don't need to use search
--- a/backend/onyx/chat/user_files/parse_user_files.py
+++ b/backend/onyx/chat/user_files/parse_user_files.py
@@ -100,14 +100,12 @@ def parse_user_files(
        persona=persona,
        actual_user_input=actual_user_input,
    )
-    uploaded_context_cap = int(available_tokens * 0.5)

    logger.debug(
-        f"Total file tokens: {total_tokens}, Available tokens: {available_tokens},"
-        f"Allowed uploaded context tokens: {uploaded_context_cap}"
+        f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}"
    )

-    have_enough_tokens = total_tokens <= uploaded_context_cap
+    have_enough_tokens = total_tokens <= available_tokens

    # If we have enough tokens, we don't need search
    # we can just pass them into the prompt directly
--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -90,7 +90,6 @@ HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"

 # Internet Search
 EXA_API_KEY = os.environ.get("EXA_API_KEY") or None
-SERPER_API_KEY = os.environ.get("SERPER_API_KEY") or None

 NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
 NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)
--- a/backend/onyx/connectors/README.md
+++ b/backend/onyx/connectors/README.md
@@ -41,7 +41,7 @@ All new connectors should have tests added to the `backend/tests/daily/connector

 #### Implementing the new Connector

-The connector must subclass one or more of LoadConnector, PollConnector, CheckpointedConnector, or CheckpointedConnectorWithPermSync
+The connector must subclass one or more of LoadConnector, PollConnector, SlimConnector, or EventConnector.

 The `__init__` should take arguments for configuring what documents the connector will and where it finds those
 documents. For example, if you have a wiki site, it may include the configuration for the team, topic, folder, etc. of
--- a/backend/onyx/connectors/bitbucket/connector.py
+++ b/backend/onyx/connectors/bitbucket/connector.py
@@ -25,7 +25,7 @@ from onyx.connectors.exceptions import UnexpectedValidationError
 from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
@@ -56,7 +56,7 @@ class BitbucketConnectorCheckpoint(ConnectorCheckpoint):

 class BitbucketConnector(
    CheckpointedConnector[BitbucketConnectorCheckpoint],
-    SlimConnectorWithPermSync,
+    SlimConnector,
 ):
    """Connector for indexing Bitbucket Cloud pull requests.

@@ -266,7 +266,7 @@ class BitbucketConnector(
        """Validate and deserialize a checkpoint instance from JSON."""
        return BitbucketConnectorCheckpoint.model_validate_json(checkpoint_json)

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -5,7 +5,6 @@ from datetime import timezone
 from typing import Any
 from urllib.parse import quote

-from atlassian.errors import ApiError  # type: ignore
 from requests.exceptions import HTTPError
 from typing_extensions import override

@@ -42,7 +41,6 @@ from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
@@ -93,7 +91,6 @@ class ConfluenceCheckpoint(ConnectorCheckpoint):
 class ConfluenceConnector(
    CheckpointedConnector[ConfluenceCheckpoint],
    SlimConnector,
-    SlimConnectorWithPermSync,
    CredentialsConnector,
 ):
    def __init__(
@@ -111,7 +108,6 @@ class ConfluenceConnector(
        # pages.
        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
-        scoped_token: bool = False,
    ) -> None:
        self.wiki_base = wiki_base
        self.is_cloud = is_cloud
@@ -122,7 +118,6 @@ class ConfluenceConnector(
        self.batch_size = batch_size
        self.labels_to_skip = labels_to_skip
        self.timezone_offset = timezone_offset
-        self.scoped_token = scoped_token
        self._confluence_client: OnyxConfluence | None = None
        self._low_timeout_confluence_client: OnyxConfluence | None = None
        self._fetched_titles: set[str] = set()
@@ -200,7 +195,6 @@ class ConfluenceConnector(
            is_cloud=self.is_cloud,
            url=self.wiki_base,
            credentials_provider=credentials_provider,
-            scoped_token=self.scoped_token,
        )
        confluence_client._probe_connection(**self.probe_kwargs)
        confluence_client._initialize_connection(**self.final_kwargs)
@@ -213,7 +207,6 @@ class ConfluenceConnector(
            url=self.wiki_base,
            credentials_provider=credentials_provider,
            timeout=3,
-            scoped_token=self.scoped_token,
        )
        low_timeout_confluence_client._probe_connection(**self.probe_kwargs)
        low_timeout_confluence_client._initialize_connection(**self.final_kwargs)
@@ -565,21 +558,7 @@ class ConfluenceConnector(
    def validate_checkpoint_json(self, checkpoint_json: str) -> ConfluenceCheckpoint:
        return ConfluenceCheckpoint.model_validate_json(checkpoint_json)

-    @override
-    def retrieve_all_slim_docs(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
-    ) -> GenerateSlimDocumentOutput:
-        return self._retrieve_all_slim_docs(
-            start=start,
-            end=end,
-            callback=callback,
-            include_permissions=False,
-        )
-
-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
@@ -589,28 +568,12 @@ class ConfluenceConnector(
        Return 'slim' docs (IDs + minimal permission data).
        Does not fetch actual text. Used primarily for incremental permission sync.
        """
-        return self._retrieve_all_slim_docs(
-            start=start,
-            end=end,
-            callback=callback,
-            include_permissions=True,
-        )
-
-    def _retrieve_all_slim_docs(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
-        include_permissions: bool = True,
-    ) -> GenerateSlimDocumentOutput:
        doc_metadata_list: list[SlimDocument] = []
        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

-        space_level_access_info: dict[str, ExternalAccess] = {}
-        if include_permissions:
-            space_level_access_info = get_all_space_permissions(
-                self.confluence_client, self.is_cloud
-            )
+        space_level_access_info = get_all_space_permissions(
+            self.confluence_client, self.is_cloud
+        )

        def get_external_access(
            doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]
@@ -637,10 +600,8 @@ class ConfluenceConnector(
            doc_metadata_list.append(
                SlimDocument(
                    id=page_id,
-                    external_access=(
-                        get_external_access(page_id, page_restrictions, page_ancestors)
-                        if include_permissions
-                        else None
+                    external_access=get_external_access(
+                        page_id, page_restrictions, page_ancestors
                    ),
                )
            )
@@ -675,12 +636,8 @@ class ConfluenceConnector(
                doc_metadata_list.append(
                    SlimDocument(
                        id=attachment_id,
-                        external_access=(
-                            get_external_access(
-                                attachment_id, attachment_restrictions, []
-                            )
-                            if include_permissions
-                            else None
+                        external_access=get_external_access(
+                            attachment_id, attachment_restrictions, []
                        ),
                    )
                )
@@ -691,10 +648,10 @@ class ConfluenceConnector(

                if callback and callback.should_stop():
                    raise RuntimeError(
-                        "retrieve_all_slim_docs_perm_sync: Stop signal detected"
+                        "retrieve_all_slim_documents: Stop signal detected"
                    )
                if callback:
-                    callback.progress("retrieve_all_slim_docs_perm_sync", 1)
+                    callback.progress("retrieve_all_slim_documents", 1)

        yield doc_metadata_list

@@ -719,14 +676,6 @@ class ConfluenceConnector(
                f"Unexpected error while validating Confluence settings: {e}"
            )

-        if self.space:
-            try:
-                self.low_timeout_confluence_client.get_space(self.space)
-            except ApiError as e:
-                raise ConnectorValidationError(
-                    "Invalid Confluence space key provided"
-                ) from e
-
        if not spaces or not spaces.get("results"):
            raise ConnectorValidationError(
                "No Confluence spaces found. Either your credentials lack permissions, or "
@@ -775,7 +724,7 @@ if __name__ == "__main__":
    end = datetime.now().timestamp()

    # Fetch all `SlimDocuments`.
-    for slim_doc in confluence_connector.retrieve_all_slim_docs_perm_sync():
+    for slim_doc in confluence_connector.retrieve_all_slim_documents():
        print(slim_doc)

    # Fetch all `Documents`.
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -41,7 +41,6 @@ from onyx.connectors.confluence.utils import _handle_http_error
 from onyx.connectors.confluence.utils import confluence_refresh_tokens
 from onyx.connectors.confluence.utils import get_start_param_from_url
 from onyx.connectors.confluence.utils import update_param_in_path
-from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
 from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.file_processing.html_utils import format_document_soup
 from onyx.redis.redis_pool import get_redis_client
@@ -88,20 +87,16 @@ class OnyxConfluence:
        url: str,
        credentials_provider: CredentialsProviderInterface,
        timeout: int | None = None,
-        scoped_token: bool = False,
        # should generally not be passed in, but making it overridable for
        # easier testing
        confluence_user_profiles_override: list[dict[str, str]] | None = (
            CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
        ),
    ) -> None:
-        self.base_url = url  #'/'.join(url.rstrip("/").split("/")[:-1])
-        url = scoped_url(url, "confluence") if scoped_token else url
-
        self._is_cloud = is_cloud
        self._url = url.rstrip("/")
        self._credentials_provider = credentials_provider
-        self.scoped_token = scoped_token
+
        self.redis_client: Redis | None = None
        self.static_credentials: dict[str, Any] | None = None
        if self._credentials_provider.is_dynamic():
@@ -223,34 +218,6 @@ class OnyxConfluence:

        with self._credentials_provider:
            credentials, _ = self._renew_credentials()
-            if self.scoped_token:
-                # v2 endpoint doesn't always work with scoped tokens, use v1
-                token = credentials["confluence_access_token"]
-                probe_url = f"{self.base_url}/rest/api/space?limit=1"
-                import requests
-
-                logger.info(f"First and Last 5 of token: {token[:5]}...{token[-5:]}")
-
-                try:
-                    r = requests.get(
-                        probe_url,
-                        headers={"Authorization": f"Bearer {token}"},
-                        timeout=10,
-                    )
-                    r.raise_for_status()
-                except HTTPError as e:
-                    if e.response.status_code == 403:
-                        logger.warning(
-                            "scoped token authenticated but not valid for probe endpoint (spaces)"
-                        )
-                    else:
-                        if "WWW-Authenticate" in e.response.headers:
-                            logger.warning(
-                                f"WWW-Authenticate: {e.response.headers['WWW-Authenticate']}"
-                            )
-                            logger.warning(f"Full error: {e.response.text}")
-                        raise e
-                return

            # probe connection with direct client, no retries
            if "confluence_refresh_token" in credentials:
@@ -269,7 +236,6 @@ class OnyxConfluence:
                logger.info("Probing Confluence with Personal Access Token.")
                url = self._url
                if self._is_cloud:
-                    logger.info("running with cloud client")
                    confluence_client_with_minimal_retries = Confluence(
                        url=url,
                        username=credentials["confluence_username"],
@@ -338,9 +304,7 @@ class OnyxConfluence:
            url = f"https://api.atlassian.com/ex/confluence/{credentials['cloud_id']}"
            confluence = Confluence(url=url, oauth2=oauth2_dict, **kwargs)
        else:
-            logger.info(
-                f"Connecting to Confluence with Personal Access Token as user: {credentials['confluence_username']}"
-            )
+            logger.info("Connecting to Confluence with Personal Access Token.")
            if self._is_cloud:
                confluence = Confluence(
                    url=self._url,
--- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -5,10 +5,7 @@ from datetime import datetime
 from datetime import timezone
 from typing import Any
 from typing import TypeVar
-from urllib.parse import urljoin
-from urllib.parse import urlparse

-import requests
 from dateutil.parser import parse

 from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
@@ -151,17 +148,3 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:

 def is_atlassian_date_error(e: Exception) -> bool:
    return "field 'updated' is invalid" in str(e)
-
-
-def get_cloudId(base_url: str) -> str:
-    tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
-    response = requests.get(tenant_info_url, timeout=10)
-    response.raise_for_status()
-    return response.json()["cloudId"]
-
-
-def scoped_url(url: str, product: str) -> str:
-    parsed = urlparse(url)
-    base_url = parsed.scheme + "://" + parsed.netloc
-    cloud_id = get_cloudId(base_url)
-    return f"https://api.atlassian.com/ex/{product}/{cloud_id}{parsed.path}"
--- a/backend/onyx/connectors/factory.py
+++ b/backend/onyx/connectors/factory.py
@@ -1,4 +1,3 @@
-import importlib
 from typing import Any
 from typing import Type

@@ -7,16 +6,60 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.constants import DocumentSource
 from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
+from onyx.connectors.airtable.airtable_connector import AirtableConnector
+from onyx.connectors.asana.connector import AsanaConnector
+from onyx.connectors.axero.connector import AxeroConnector
+from onyx.connectors.bitbucket.connector import BitbucketConnector
+from onyx.connectors.blob.connector import BlobStorageConnector
+from onyx.connectors.bookstack.connector import BookstackConnector
+from onyx.connectors.clickup.connector import ClickupConnector
+from onyx.connectors.confluence.connector import ConfluenceConnector
 from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
+from onyx.connectors.discord.connector import DiscordConnector
+from onyx.connectors.discourse.connector import DiscourseConnector
+from onyx.connectors.document360.connector import Document360Connector
+from onyx.connectors.dropbox.connector import DropboxConnector
+from onyx.connectors.egnyte.connector import EgnyteConnector
 from onyx.connectors.exceptions import ConnectorValidationError
+from onyx.connectors.file.connector import LocalFileConnector
+from onyx.connectors.fireflies.connector import FirefliesConnector
+from onyx.connectors.freshdesk.connector import FreshdeskConnector
+from onyx.connectors.gitbook.connector import GitbookConnector
+from onyx.connectors.github.connector import GithubConnector
+from onyx.connectors.gitlab.connector import GitlabConnector
+from onyx.connectors.gmail.connector import GmailConnector
+from onyx.connectors.gong.connector import GongConnector
+from onyx.connectors.google_drive.connector import GoogleDriveConnector
+from onyx.connectors.google_site.connector import GoogleSitesConnector
+from onyx.connectors.guru.connector import GuruConnector
+from onyx.connectors.highspot.connector import HighspotConnector
+from onyx.connectors.hubspot.connector import HubSpotConnector
+from onyx.connectors.imap.connector import ImapConnector
 from onyx.connectors.interfaces import BaseConnector
 from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import CredentialsConnector
 from onyx.connectors.interfaces import EventConnector
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
+from onyx.connectors.jira.connector import JiraConnector
+from onyx.connectors.linear.connector import LinearConnector
+from onyx.connectors.loopio.connector import LoopioConnector
+from onyx.connectors.mediawiki.wiki import MediaWikiConnector
+from onyx.connectors.mock_connector.connector import MockConnector
 from onyx.connectors.models import InputType
-from onyx.connectors.registry import CONNECTOR_CLASS_MAP
+from onyx.connectors.notion.connector import NotionConnector
+from onyx.connectors.outline.connector import OutlineConnector
+from onyx.connectors.productboard.connector import ProductboardConnector
+from onyx.connectors.salesforce.connector import SalesforceConnector
+from onyx.connectors.sharepoint.connector import SharepointConnector
+from onyx.connectors.slab.connector import SlabConnector
+from onyx.connectors.slack.connector import SlackConnector
+from onyx.connectors.teams.connector import TeamsConnector
+from onyx.connectors.web.connector import WebConnector
+from onyx.connectors.wikipedia.connector import WikipediaConnector
+from onyx.connectors.xenforo.connector import XenforoConnector
+from onyx.connectors.zendesk.connector import ZendeskConnector
+from onyx.connectors.zulip.connector import ZulipConnector
 from onyx.db.connector import fetch_connector_by_id
 from onyx.db.credentials import backend_update_credential_json
 from onyx.db.credentials import fetch_credential_by_id
@@ -29,75 +72,101 @@ class ConnectorMissingException(Exception):
    pass


-# Cache for already imported connector classes
-_connector_cache: dict[DocumentSource, Type[BaseConnector]] = {}
-
-
-def _load_connector_class(source: DocumentSource) -> Type[BaseConnector]:
-    """Dynamically load and cache a connector class."""
-    if source in _connector_cache:
-        return _connector_cache[source]
-
-    if source not in CONNECTOR_CLASS_MAP:
-        raise ConnectorMissingException(f"Connector not found for source={source}")
-
-    mapping = CONNECTOR_CLASS_MAP[source]
-
-    try:
-        module = importlib.import_module(mapping.module_path)
-        connector_class = getattr(module, mapping.class_name)
-        _connector_cache[source] = connector_class
-        return connector_class
-    except (ImportError, AttributeError) as e:
-        raise ConnectorMissingException(
-            f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
-        )
-
-
-def _validate_connector_supports_input_type(
-    connector: Type[BaseConnector],
-    input_type: InputType | None,
-    source: DocumentSource,
-) -> None:
-    """Validate that a connector supports the requested input type."""
-    if input_type is None:
-        return
-
-    # Check each input type requirement separately for clarity
-    load_state_unsupported = input_type == InputType.LOAD_STATE and not issubclass(
-        connector, LoadConnector
-    )
-
-    poll_unsupported = (
-        input_type == InputType.POLL
-        # Either poll or checkpoint works for this, in the future
-        # all connectors should be checkpoint connectors
-        and (
-            not issubclass(connector, PollConnector)
-            and not issubclass(connector, CheckpointedConnector)
-        )
-    )
-
-    event_unsupported = input_type == InputType.EVENT and not issubclass(
-        connector, EventConnector
-    )
-
-    if any([load_state_unsupported, poll_unsupported, event_unsupported]):
-        raise ConnectorMissingException(
-            f"Connector for source={source} does not accept input_type={input_type}"
-        )
-
-
 def identify_connector_class(
    source: DocumentSource,
    input_type: InputType | None = None,
 ) -> Type[BaseConnector]:
-    # Load the connector class using lazy loading
-    connector = _load_connector_class(source)
+    connector_map = {
+        DocumentSource.WEB: WebConnector,
+        DocumentSource.FILE: LocalFileConnector,
+        DocumentSource.SLACK: {
+            InputType.POLL: SlackConnector,
+            InputType.SLIM_RETRIEVAL: SlackConnector,
+        },
+        DocumentSource.GITHUB: GithubConnector,
+        DocumentSource.GMAIL: GmailConnector,
+        DocumentSource.GITLAB: GitlabConnector,
+        DocumentSource.GITBOOK: GitbookConnector,
+        DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
+        DocumentSource.BOOKSTACK: BookstackConnector,
+        DocumentSource.OUTLINE: OutlineConnector,
+        DocumentSource.CONFLUENCE: ConfluenceConnector,
+        DocumentSource.JIRA: JiraConnector,
+        DocumentSource.PRODUCTBOARD: ProductboardConnector,
+        DocumentSource.SLAB: SlabConnector,
+        DocumentSource.NOTION: NotionConnector,
+        DocumentSource.ZULIP: ZulipConnector,
+        DocumentSource.GURU: GuruConnector,
+        DocumentSource.LINEAR: LinearConnector,
+        DocumentSource.HUBSPOT: HubSpotConnector,
+        DocumentSource.DOCUMENT360: Document360Connector,
+        DocumentSource.GONG: GongConnector,
+        DocumentSource.GOOGLE_SITES: GoogleSitesConnector,
+        DocumentSource.ZENDESK: ZendeskConnector,
+        DocumentSource.LOOPIO: LoopioConnector,
+        DocumentSource.DROPBOX: DropboxConnector,
+        DocumentSource.SHAREPOINT: SharepointConnector,
+        DocumentSource.TEAMS: TeamsConnector,
+        DocumentSource.SALESFORCE: SalesforceConnector,
+        DocumentSource.DISCOURSE: DiscourseConnector,
+        DocumentSource.AXERO: AxeroConnector,
+        DocumentSource.CLICKUP: ClickupConnector,
+        DocumentSource.MEDIAWIKI: MediaWikiConnector,
+        DocumentSource.WIKIPEDIA: WikipediaConnector,
+        DocumentSource.ASANA: AsanaConnector,
+        DocumentSource.S3: BlobStorageConnector,
+        DocumentSource.R2: BlobStorageConnector,
+        DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
+        DocumentSource.OCI_STORAGE: BlobStorageConnector,
+        DocumentSource.XENFORO: XenforoConnector,
+        DocumentSource.DISCORD: DiscordConnector,
+        DocumentSource.FRESHDESK: FreshdeskConnector,
+        DocumentSource.FIREFLIES: FirefliesConnector,
+        DocumentSource.EGNYTE: EgnyteConnector,
+        DocumentSource.AIRTABLE: AirtableConnector,
+        DocumentSource.HIGHSPOT: HighspotConnector,
+        DocumentSource.IMAP: ImapConnector,
+        DocumentSource.BITBUCKET: BitbucketConnector,
+        # just for integration tests
+        DocumentSource.MOCK_CONNECTOR: MockConnector,
+    }
+    connector_by_source = connector_map.get(source, {})

-    # Validate connector supports the requested input_type
-    _validate_connector_supports_input_type(connector, input_type, source)
+    if isinstance(connector_by_source, dict):
+        if input_type is None:
+            # If not specified, default to most exhaustive update
+            connector = connector_by_source.get(InputType.LOAD_STATE)
+        else:
+            connector = connector_by_source.get(input_type)
+    else:
+        connector = connector_by_source
+    if connector is None:
+        raise ConnectorMissingException(f"Connector not found for source={source}")

+    if any(
+        [
+            (
+                input_type == InputType.LOAD_STATE
+                and not issubclass(connector, LoadConnector)
+            ),
+            (
+                input_type == InputType.POLL
+                # either poll or checkpoint works for this, in the future
+                # all connectors should be checkpoint connectors
+                and (
+                    not issubclass(connector, PollConnector)
+                    and not issubclass(connector, CheckpointedConnector)
+                )
+            ),
+            (
+                input_type == InputType.EVENT
+                and not issubclass(connector, EventConnector)
+            ),
+        ]
+    ):
+        raise ConnectorMissingException(
+            f"Connector for source={source} does not accept input_type={input_type}"
+        )
    return connector


--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -219,19 +219,12 @@ def _get_batch_rate_limited(


 def _get_userinfo(user: NamedUser) -> dict[str, str]:
-    def _safe_get(attr_name: str) -> str | None:
-        try:
-            return cast(str | None, getattr(user, attr_name))
-        except GithubException:
-            logger.debug(f"Error getting {attr_name} for user")
-            return None
-
    return {
        k: v
        for k, v in {
-            "login": _safe_get("login"),
-            "name": _safe_get("name"),
-            "email": _safe_get("email"),
+            "login": user.login,
+            "name": user.name,
+            "email": user.email,
        }.items()
        if v is not None
    }
--- a/backend/onyx/connectors/gmail/connector.py
+++ b/backend/onyx/connectors/gmail/connector.py
@@ -28,7 +28,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import Document
 from onyx.connectors.models import ImageSection
@@ -232,7 +232,7 @@ def thread_to_document(
    )


-class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
+class GmailConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size

@@ -397,10 +397,10 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                        if callback:
                            if callback.should_stop():
                                raise RuntimeError(
-                                    "retrieve_all_slim_docs_perm_sync: Stop signal detected"
+                                    "retrieve_all_slim_documents: Stop signal detected"
                                )

-                            callback.progress("retrieve_all_slim_docs_perm_sync", 1)
+                            callback.progress("retrieve_all_slim_documents", 1)
            except HttpError as e:
                if _is_mail_service_disabled_error(e):
                    logger.warning(
@@ -431,7 +431,7 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
            raise e

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -64,7 +64,7 @@ from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
@@ -153,7 +153,7 @@ class DriveIdStatus(Enum):


 class GoogleDriveConnector(
-    SlimConnectorWithPermSync, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
+    SlimConnector, CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint]
 ):
    def __init__(
        self,
@@ -1137,9 +1137,7 @@ class GoogleDriveConnector(
                        convert_func,
                        (
                            [file.user_email, self.primary_admin_email]
-                            + get_file_owners(
-                                file.drive_file, self.primary_admin_email
-                            ),
+                            + get_file_owners(file.drive_file),
                            file.drive_file,
                        ),
                    )
@@ -1296,7 +1294,7 @@ class GoogleDriveConnector(
                    callback.progress("_extract_slim_docs_from_google_drive", 1)
        yield slim_batch

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/google_utils/google_utils.py
+++ b/backend/onyx/connectors/google_utils/google_utils.py
@@ -97,15 +97,14 @@ def _execute_with_retry(request: Any) -> Any:
    raise Exception(f"Failed to execute request after {max_attempts} attempts")


-def get_file_owners(file: GoogleDriveFileType, primary_admin_email: str) -> list[str]:
+def get_file_owners(file: GoogleDriveFileType) -> list[str]:
    """
    Get the owners of a file if the attribute is present.
    """
    return [
-        email
+        owner.get("emailAddress")
        for owner in file.get("owners", [])
-        if (email := owner.get("emailAddress"))
-        and email.split("@")[-1] == primary_admin_email.split("@")[-1]
+        if owner.get("emailAddress")
    ]


--- a/backend/onyx/connectors/highspot/connector.py
+++ b/backend/onyx/connectors/highspot/connector.py
@@ -18,7 +18,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import SlimDocument
@@ -38,7 +38,7 @@ class HighspotSpot(BaseModel):
    name: str


-class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
+class HighspotConnector(LoadConnector, PollConnector, SlimConnector):
    """
    Connector for loading data from Highspot.

@@ -362,7 +362,7 @@ class HighspotConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync)
        description = item_details.get("description", "")
        return title, description

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/hubspot/connector.py
+++ b/backend/onyx/connectors/hubspot/connector.py
@@ -1,18 +1,14 @@
 import re
-from collections.abc import Callable
-from collections.abc import Generator
 from datetime import datetime
 from datetime import timezone
 from typing import Any
 from typing import cast
-from typing import TypeVar

 import requests
 from hubspot import HubSpot  # type: ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.hubspot.rate_limit import HubSpotRateLimiter
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
@@ -29,10 +25,6 @@ HUBSPOT_API_URL = "https://api.hubapi.com/integrations/v1/me"
 # Available HubSpot object types
 AVAILABLE_OBJECT_TYPES = {"tickets", "companies", "deals", "contacts"}

-HUBSPOT_PAGE_SIZE = 100
-
-T = TypeVar("T")
-
 logger = setup_logger()


@@ -46,7 +38,6 @@ class HubSpotConnector(LoadConnector, PollConnector):
        self.batch_size = batch_size
        self._access_token = access_token
        self._portal_id: str | None = None
-        self._rate_limiter = HubSpotRateLimiter()

        # Set object types to fetch, default to all available types
        if object_types is None:
@@ -86,37 +77,6 @@ class HubSpotConnector(LoadConnector, PollConnector):
        """Set the portal ID."""
        self._portal_id = value

-    def _call_hubspot(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
-        return self._rate_limiter.call(func, *args, **kwargs)
-
-    def _paginated_results(
-        self,
-        fetch_page: Callable[..., Any],
-        **kwargs: Any,
-    ) -> Generator[Any, None, None]:
-        base_kwargs = dict(kwargs)
-        base_kwargs.setdefault("limit", HUBSPOT_PAGE_SIZE)
-
-        after: str | None = None
-        while True:
-            page_kwargs = base_kwargs.copy()
-            if after is not None:
-                page_kwargs["after"] = after
-
-            page = self._call_hubspot(fetch_page, **page_kwargs)
-            results = getattr(page, "results", [])
-            for result in results:
-                yield result
-
-            paging = getattr(page, "paging", None)
-            next_page = getattr(paging, "next", None) if paging else None
-            if next_page is None:
-                break
-
-            after = getattr(next_page, "after", None)
-            if after is None:
-                break
-
    def _clean_html_content(self, html_content: str) -> str:
        """Clean HTML content and extract raw text"""
        if not html_content:
@@ -190,82 +150,78 @@ class HubSpotConnector(LoadConnector, PollConnector):
    ) -> list[dict[str, Any]]:
        """Get associated objects for a given object"""
        try:
-            associations_iter = self._paginated_results(
-                api_client.crm.associations.v4.basic_api.get_page,
+            associations = api_client.crm.associations.v4.basic_api.get_page(
                object_type=from_object_type,
                object_id=object_id,
                to_object_type=to_object_type,
            )

-            object_ids = [assoc.to_object_id for assoc in associations_iter]
+            associated_objects = []
+            if associations.results:
+                object_ids = [assoc.to_object_id for assoc in associations.results]

-            associated_objects: list[dict[str, Any]] = []
+                # Batch get the associated objects
+                if to_object_type == "contacts":
+                    for obj_id in object_ids:
+                        try:
+                            obj = api_client.crm.contacts.basic_api.get_by_id(
+                                contact_id=obj_id,
+                                properties=[
+                                    "firstname",
+                                    "lastname",
+                                    "email",
+                                    "company",
+                                    "jobtitle",
+                                ],
+                            )
+                            associated_objects.append(obj.to_dict())
+                        except Exception as e:
+                            logger.warning(f"Failed to fetch contact {obj_id}: {e}")

-            if to_object_type == "contacts":
-                for obj_id in object_ids:
-                    try:
-                        obj = self._call_hubspot(
-                            api_client.crm.contacts.basic_api.get_by_id,
-                            contact_id=obj_id,
-                            properties=[
-                                "firstname",
-                                "lastname",
-                                "email",
-                                "company",
-                                "jobtitle",
-                            ],
-                        )
-                        associated_objects.append(obj.to_dict())
-                    except Exception as e:
-                        logger.warning(f"Failed to fetch contact {obj_id}: {e}")
+                elif to_object_type == "companies":
+                    for obj_id in object_ids:
+                        try:
+                            obj = api_client.crm.companies.basic_api.get_by_id(
+                                company_id=obj_id,
+                                properties=[
+                                    "name",
+                                    "domain",
+                                    "industry",
+                                    "city",
+                                    "state",
+                                ],
+                            )
+                            associated_objects.append(obj.to_dict())
+                        except Exception as e:
+                            logger.warning(f"Failed to fetch company {obj_id}: {e}")

-            elif to_object_type == "companies":
-                for obj_id in object_ids:
-                    try:
-                        obj = self._call_hubspot(
-                            api_client.crm.companies.basic_api.get_by_id,
-                            company_id=obj_id,
-                            properties=[
-                                "name",
-                                "domain",
-                                "industry",
-                                "city",
-                                "state",
-                            ],
-                        )
-                        associated_objects.append(obj.to_dict())
-                    except Exception as e:
-                        logger.warning(f"Failed to fetch company {obj_id}: {e}")
+                elif to_object_type == "deals":
+                    for obj_id in object_ids:
+                        try:
+                            obj = api_client.crm.deals.basic_api.get_by_id(
+                                deal_id=obj_id,
+                                properties=[
+                                    "dealname",
+                                    "amount",
+                                    "dealstage",
+                                    "closedate",
+                                    "pipeline",
+                                ],
+                            )
+                            associated_objects.append(obj.to_dict())
+                        except Exception as e:
+                            logger.warning(f"Failed to fetch deal {obj_id}: {e}")

-            elif to_object_type == "deals":
-                for obj_id in object_ids:
-                    try:
-                        obj = self._call_hubspot(
-                            api_client.crm.deals.basic_api.get_by_id,
-                            deal_id=obj_id,
-                            properties=[
-                                "dealname",
-                                "amount",
-                                "dealstage",
-                                "closedate",
-                                "pipeline",
-                            ],
-                        )
-                        associated_objects.append(obj.to_dict())
-                    except Exception as e:
-                        logger.warning(f"Failed to fetch deal {obj_id}: {e}")
-
-            elif to_object_type == "tickets":
-                for obj_id in object_ids:
-                    try:
-                        obj = self._call_hubspot(
-                            api_client.crm.tickets.basic_api.get_by_id,
-                            ticket_id=obj_id,
-                            properties=["subject", "content", "hs_ticket_priority"],
-                        )
-                        associated_objects.append(obj.to_dict())
-                    except Exception as e:
-                        logger.warning(f"Failed to fetch ticket {obj_id}: {e}")
+                elif to_object_type == "tickets":
+                    for obj_id in object_ids:
+                        try:
+                            obj = api_client.crm.tickets.basic_api.get_by_id(
+                                ticket_id=obj_id,
+                                properties=["subject", "content", "hs_ticket_priority"],
+                            )
+                            associated_objects.append(obj.to_dict())
+                        except Exception as e:
+                            logger.warning(f"Failed to fetch ticket {obj_id}: {e}")

            return associated_objects

@@ -283,33 +239,33 @@ class HubSpotConnector(LoadConnector, PollConnector):
    ) -> list[dict[str, Any]]:
        """Get notes associated with a given object"""
        try:
-            associations_iter = self._paginated_results(
-                api_client.crm.associations.v4.basic_api.get_page,
+            # Get associations to notes (engagement type)
+            associations = api_client.crm.associations.v4.basic_api.get_page(
                object_type=object_type,
                object_id=object_id,
                to_object_type="notes",
            )

-            note_ids = [assoc.to_object_id for assoc in associations_iter]
-
            associated_notes = []
+            if associations.results:
+                note_ids = [assoc.to_object_id for assoc in associations.results]

-            for note_id in note_ids:
-                try:
-                    # Notes are engagements in HubSpot, use the engagements API
-                    note = self._call_hubspot(
-                        api_client.crm.objects.notes.basic_api.get_by_id,
-                        note_id=note_id,
-                        properties=[
-                            "hs_note_body",
-                            "hs_timestamp",
-                            "hs_created_by",
-                            "hubspot_owner_id",
-                        ],
-                    )
-                    associated_notes.append(note.to_dict())
-                except Exception as e:
-                    logger.warning(f"Failed to fetch note {note_id}: {e}")
+                # Batch get the associated notes
+                for note_id in note_ids:
+                    try:
+                        # Notes are engagements in HubSpot, use the engagements API
+                        note = api_client.crm.objects.notes.basic_api.get_by_id(
+                            note_id=note_id,
+                            properties=[
+                                "hs_note_body",
+                                "hs_timestamp",
+                                "hs_created_by",
+                                "hubspot_owner_id",
+                            ],
+                        )
+                        associated_notes.append(note.to_dict())
+                    except Exception as e:
+                        logger.warning(f"Failed to fetch note {note_id}: {e}")

            return associated_notes

@@ -402,9 +358,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)
-
-        tickets_iter = self._paginated_results(
-            api_client.crm.tickets.basic_api.get_page,
+        all_tickets = api_client.crm.tickets.get_all(
            properties=[
                "subject",
                "content",
@@ -417,7 +371,7 @@ class HubSpotConnector(LoadConnector, PollConnector):

        doc_batch: list[Document] = []

-        for ticket in tickets_iter:
+        for ticket in all_tickets:
            updated_at = ticket.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
@@ -505,9 +459,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)
-
-        companies_iter = self._paginated_results(
-            api_client.crm.companies.basic_api.get_page,
+        all_companies = api_client.crm.companies.get_all(
            properties=[
                "name",
                "domain",
@@ -523,7 +475,7 @@ class HubSpotConnector(LoadConnector, PollConnector):

        doc_batch: list[Document] = []

-        for company in companies_iter:
+        for company in all_companies:
            updated_at = company.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
@@ -630,9 +582,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)
-
-        deals_iter = self._paginated_results(
-            api_client.crm.deals.basic_api.get_page,
+        all_deals = api_client.crm.deals.get_all(
            properties=[
                "dealname",
                "amount",
@@ -648,7 +598,7 @@ class HubSpotConnector(LoadConnector, PollConnector):

        doc_batch: list[Document] = []

-        for deal in deals_iter:
+        for deal in all_deals:
            updated_at = deal.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
@@ -753,9 +703,7 @@ class HubSpotConnector(LoadConnector, PollConnector):
        self, start: datetime | None = None, end: datetime | None = None
    ) -> GenerateDocumentsOutput:
        api_client = HubSpot(access_token=self.access_token)
-
-        contacts_iter = self._paginated_results(
-            api_client.crm.contacts.basic_api.get_page,
+        all_contacts = api_client.crm.contacts.get_all(
            properties=[
                "firstname",
                "lastname",
@@ -773,7 +721,7 @@ class HubSpotConnector(LoadConnector, PollConnector):

        doc_batch: list[Document] = []

-        for contact in contacts_iter:
+        for contact in all_contacts:
            updated_at = contact.updated_at.replace(tzinfo=None)
            if start is not None and updated_at < start.replace(tzinfo=None):
                continue
--- a/backend/onyx/connectors/hubspot/rate_limit.py
+++ b/backend/onyx/connectors/hubspot/rate_limit.py
@@ -1,145 +0,0 @@
-from __future__ import annotations
-
-import time
-from collections.abc import Callable
-from typing import Any
-from typing import TypeVar
-
-from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
-    rate_limit_builder,
-)
-from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
-    RateLimitTriedTooManyTimesError,
-)
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-T = TypeVar("T")
-
-# HubSpot exposes a ten second rolling window (x-hubspot-ratelimit-interval-milliseconds)
-# with a maximum of 190 requests, and a per-second limit of 19 requests.
-_HUBSPOT_TEN_SECOND_LIMIT = 190
-_HUBSPOT_TEN_SECOND_PERIOD = 10  # seconds
-_HUBSPOT_SECONDLY_LIMIT = 19
-_HUBSPOT_SECONDLY_PERIOD = 1  # second
-_DEFAULT_SLEEP_SECONDS = 10
-_SLEEP_PADDING_SECONDS = 1.0
-_MAX_RATE_LIMIT_RETRIES = 5
-
-
-def _extract_header(headers: Any, key: str) -> str | None:
-    if headers is None:
-        return None
-
-    getter = getattr(headers, "get", None)
-    if callable(getter):
-        value = getter(key)
-        if value is not None:
-            return value
-
-    if isinstance(headers, dict):
-        value = headers.get(key)
-        if value is not None:
-            return value
-
-    return None
-
-
-def is_rate_limit_error(exception: Exception) -> bool:
-    status = getattr(exception, "status", None)
-    if status == 429:
-        return True
-
-    headers = getattr(exception, "headers", None)
-    if headers is not None:
-        remaining = _extract_header(headers, "x-hubspot-ratelimit-remaining")
-        if remaining == "0":
-            return True
-        secondly_remaining = _extract_header(
-            headers, "x-hubspot-ratelimit-secondly-remaining"
-        )
-        if secondly_remaining == "0":
-            return True
-
-    message = str(exception)
-    return "RATE_LIMIT" in message or "Too Many Requests" in message
-
-
-def get_rate_limit_retry_delay_seconds(exception: Exception) -> float:
-    headers = getattr(exception, "headers", None)
-
-    retry_after = _extract_header(headers, "Retry-After")
-    if retry_after:
-        try:
-            return float(retry_after) + _SLEEP_PADDING_SECONDS
-        except ValueError:
-            logger.debug(
-                "Failed to parse Retry-After header '%s' as float", retry_after
-            )
-
-    interval_ms = _extract_header(headers, "x-hubspot-ratelimit-interval-milliseconds")
-    if interval_ms:
-        try:
-            return float(interval_ms) / 1000.0 + _SLEEP_PADDING_SECONDS
-        except ValueError:
-            logger.debug(
-                "Failed to parse x-hubspot-ratelimit-interval-milliseconds '%s' as float",
-                interval_ms,
-            )
-
-    secondly_limit = _extract_header(headers, "x-hubspot-ratelimit-secondly")
-    if secondly_limit:
-        try:
-            per_second = max(float(secondly_limit), 1.0)
-            return (1.0 / per_second) + _SLEEP_PADDING_SECONDS
-        except ValueError:
-            logger.debug(
-                "Failed to parse x-hubspot-ratelimit-secondly '%s' as float",
-                secondly_limit,
-            )
-
-    return _DEFAULT_SLEEP_SECONDS + _SLEEP_PADDING_SECONDS
-
-
-class HubSpotRateLimiter:
-    def __init__(
-        self,
-        *,
-        ten_second_limit: int = _HUBSPOT_TEN_SECOND_LIMIT,
-        ten_second_period: int = _HUBSPOT_TEN_SECOND_PERIOD,
-        secondly_limit: int = _HUBSPOT_SECONDLY_LIMIT,
-        secondly_period: int = _HUBSPOT_SECONDLY_PERIOD,
-        max_retries: int = _MAX_RATE_LIMIT_RETRIES,
-    ) -> None:
-        self._max_retries = max_retries
-
-        @rate_limit_builder(max_calls=secondly_limit, period=secondly_period)
-        @rate_limit_builder(max_calls=ten_second_limit, period=ten_second_period)
-        def _execute(callable_: Callable[[], T]) -> T:
-            return callable_()
-
-        self._execute = _execute
-
-    def call(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
-        attempts = 0
-
-        while True:
-            try:
-                return self._execute(lambda: func(*args, **kwargs))
-            except Exception as exc:  # pylint: disable=broad-except
-                if not is_rate_limit_error(exc):
-                    raise
-
-                attempts += 1
-                if attempts > self._max_retries:
-                    raise RateLimitTriedTooManyTimesError(
-                        "Exceeded configured HubSpot rate limit retries"
-                    ) from exc
-
-                wait_time = get_rate_limit_retry_delay_seconds(exc)
-                logger.notice(
-                    "HubSpot rate limit reached. Sleeping %.2f seconds before retrying.",
-                    wait_time,
-                )
-                time.sleep(wait_time)
--- a/backend/onyx/connectors/interfaces.py
+++ b/backend/onyx/connectors/interfaces.py
@@ -97,20 +97,11 @@ class PollConnector(BaseConnector):
        raise NotImplementedError


-# Slim connectors retrieve just the ids of documents
+# Slim connectors can retrieve just the ids and
+# permission syncing information for connected documents
 class SlimConnector(BaseConnector):
    @abc.abstractmethod
-    def retrieve_all_slim_docs(
-        self,
-    ) -> GenerateSlimDocumentOutput:
-        raise NotImplementedError
-
-
-# Slim connectors retrieve both the ids AND
-# permission syncing information for connected documents
-class SlimConnectorWithPermSync(BaseConnector):
-    @abc.abstractmethod
-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -25,11 +25,11 @@ from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import CredentialExpiredError
 from onyx.connectors.exceptions import InsufficientPermissionsError
 from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
+from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.jira.access import get_project_permissions
 from onyx.connectors.jira.utils import best_effort_basic_expert_info
 from onyx.connectors.jira.utils import best_effort_get_field_from_issue
@@ -247,7 +247,7 @@ def _perform_jql_search_v2(


 def process_jira_issue(
-    jira_base_url: str,
+    jira_client: JIRA,
    issue: Issue,
    comment_email_blacklist: tuple[str, ...] = (),
    labels_to_skip: set[str] | None = None,
@@ -281,7 +281,7 @@ def process_jira_issue(
        )
        return None

-    page_url = build_jira_url(jira_base_url, issue.key)
+    page_url = build_jira_url(jira_client, issue.key)

    metadata_dict: dict[str, str | list[str]] = {}
    people = set()
@@ -359,10 +359,7 @@ class JiraConnectorCheckpoint(ConnectorCheckpoint):
    offset: int | None = None


-class JiraConnector(
-    CheckpointedConnectorWithPermSync[JiraConnectorCheckpoint],
-    SlimConnectorWithPermSync,
-):
+class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnector):
    def __init__(
        self,
        jira_base_url: str,
@@ -375,23 +372,15 @@ class JiraConnector(
        labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
        # Custom JQL query to filter Jira issues
        jql_query: str | None = None,
-        scoped_token: bool = False,
    ) -> None:
        self.batch_size = batch_size
-
-        # dealing with scoped tokens is a bit tricky becasue we need to hit api.atlassian.net
-        # when making jira requests but still want correct links to issues in the UI.
-        # So, the user's base url is stored here, but converted to a scoped url when passed
-        # to the jira client.
        self.jira_base = jira_base_url.rstrip("/")  # Remove trailing slash if present
        self.jira_project = project_key
        self._comment_email_blacklist = comment_email_blacklist or []
        self.labels_to_skip = set(labels_to_skip)
        self.jql_query = jql_query
-        self.scoped_token = scoped_token
+
        self._jira_client: JIRA | None = None
-        # Cache project permissions to avoid fetching them repeatedly across runs
-        self._project_permissions_cache: dict[str, Any] = {}

    @property
    def comment_email_blacklist(self) -> tuple:
@@ -410,26 +399,10 @@ class JiraConnector(
            return ""
        return f'"{self.jira_project}"'

-    def _get_project_permissions(self, project_key: str) -> Any:
-        """Get project permissions with caching.
-
-        Args:
-            project_key: The Jira project key
-
-        Returns:
-            The external access permissions for the project
-        """
-        if project_key not in self._project_permissions_cache:
-            self._project_permissions_cache[project_key] = get_project_permissions(
-                jira_client=self.jira_client, jira_project=project_key
-            )
-        return self._project_permissions_cache[project_key]
-
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        self._jira_client = build_jira_client(
            credentials=credentials,
            jira_base=self.jira_base,
-            scoped_token=self.scoped_token,
        )
        return None

@@ -469,37 +442,15 @@ class JiraConnector(
    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
        jql = self._get_jql_query(start, end)
        try:
-            return self._load_from_checkpoint(
-                jql, checkpoint, include_permissions=False
-            )
+            return self._load_from_checkpoint(jql, checkpoint)
        except Exception as e:
            if is_atlassian_date_error(e):
                jql = self._get_jql_query(start - ONE_HOUR, end)
-                return self._load_from_checkpoint(
-                    jql, checkpoint, include_permissions=False
-                )
-            raise e
-
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: JiraConnectorCheckpoint,
-    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
-        """Load documents from checkpoint with permission information included."""
-        jql = self._get_jql_query(start, end)
-        try:
-            return self._load_from_checkpoint(jql, checkpoint, include_permissions=True)
-        except Exception as e:
-            if is_atlassian_date_error(e):
-                jql = self._get_jql_query(start - ONE_HOUR, end)
-                return self._load_from_checkpoint(
-                    jql, checkpoint, include_permissions=True
-                )
+                return self._load_from_checkpoint(jql, checkpoint)
            raise e

    def _load_from_checkpoint(
-        self, jql: str, checkpoint: JiraConnectorCheckpoint, include_permissions: bool
+        self, jql: str, checkpoint: JiraConnectorCheckpoint
    ) -> CheckpointOutput[JiraConnectorCheckpoint]:
        # Get the current offset from checkpoint or start at 0
        starting_offset = checkpoint.offset or 0
@@ -521,25 +472,18 @@ class JiraConnector(
            issue_key = issue.key
            try:
                if document := process_jira_issue(
-                    jira_base_url=self.jira_base,
+                    jira_client=self.jira_client,
                    issue=issue,
                    comment_email_blacklist=self.comment_email_blacklist,
                    labels_to_skip=self.labels_to_skip,
                ):
-                    # Add permission information to the document if requested
-                    if include_permissions:
-                        project_key = get_jira_project_key_from_issue(issue=issue)
-                        if project_key:
-                            document.external_access = self._get_project_permissions(
-                                project_key
-                            )
                    yield document

            except Exception as e:
                yield ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=issue_key,
-                        document_link=build_jira_url(self.jira_base, issue_key),
+                        document_link=build_jira_url(self.jira_client, issue_key),
                    ),
                    failure_message=f"Failed to process Jira issue: {str(e)}",
                    exception=e,
@@ -571,7 +515,7 @@ class JiraConnector(
            # if we didn't retrieve a full batch, we're done
            checkpoint.has_more = current_offset - starting_offset == page_size

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
@@ -590,7 +534,6 @@ class JiraConnector(
        prev_offset = 0
        current_offset = 0
        slim_doc_batch = []
-
        while checkpoint.has_more:
            for issue in _perform_jql_search(
                jira_client=self.jira_client,
@@ -607,12 +550,13 @@ class JiraConnector(
                    continue

                issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY)
-                id = build_jira_url(self.jira_base, issue_key)
-
+                id = build_jira_url(self.jira_client, issue_key)
                slim_doc_batch.append(
                    SlimDocument(
                        id=id,
-                        external_access=self._get_project_permissions(project_key),
+                        external_access=get_project_permissions(
+                            jira_client=self.jira_client, jira_project=project_key
+                        ),
                    )
                )
                current_offset += 1
@@ -757,7 +701,7 @@ if __name__ == "__main__":
    start = 0
    end = datetime.now().timestamp()

-    for slim_doc in connector.retrieve_all_slim_docs_perm_sync(
+    for slim_doc in connector.retrieve_all_slim_documents(
        start=start,
        end=end,
    ):
--- a/backend/onyx/connectors/jira/utils.py
+++ b/backend/onyx/connectors/jira/utils.py
@@ -10,7 +10,6 @@ from jira.resources import CustomFieldOption
 from jira.resources import Issue
 from jira.resources import User

-from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
 from onyx.connectors.models import BasicExpertInfo
 from onyx.utils.logger import setup_logger

@@ -75,18 +74,11 @@ def extract_text_from_adf(adf: dict | None) -> str:
    return " ".join(texts)


-def build_jira_url(jira_base_url: str, issue_key: str) -> str:
-    """
-    Get the url used to access an issue in the UI.
-    """
-    return f"{jira_base_url}/browse/{issue_key}"
+def build_jira_url(jira_client: JIRA, issue_key: str) -> str:
+    return f"{jira_client.client_info()}/browse/{issue_key}"


-def build_jira_client(
-    credentials: dict[str, Any], jira_base: str, scoped_token: bool = False
-) -> JIRA:
-
-    jira_base = scoped_url(jira_base, "jira") if scoped_token else jira_base
+def build_jira_client(credentials: dict[str, Any], jira_base: str) -> JIRA:
    api_token = credentials["jira_api_token"]
    # if user provide an email we assume it's cloud
    if "jira_user_email" in credentials:
--- a/backend/onyx/connectors/registry.py
+++ b/backend/onyx/connectors/registry.py
@@ -1,208 +0,0 @@
-"""Registry mapping for connector classes."""
-
-from pydantic import BaseModel
-
-from onyx.configs.constants import DocumentSource
-
-
-class ConnectorMapping(BaseModel):
-    module_path: str
-    class_name: str
-
-
-# Mapping of DocumentSource to connector details for lazy loading
-CONNECTOR_CLASS_MAP = {
-    DocumentSource.WEB: ConnectorMapping(
-        module_path="onyx.connectors.web.connector",
-        class_name="WebConnector",
-    ),
-    DocumentSource.FILE: ConnectorMapping(
-        module_path="onyx.connectors.file.connector",
-        class_name="LocalFileConnector",
-    ),
-    DocumentSource.SLACK: ConnectorMapping(
-        module_path="onyx.connectors.slack.connector",
-        class_name="SlackConnector",
-    ),
-    DocumentSource.GITHUB: ConnectorMapping(
-        module_path="onyx.connectors.github.connector",
-        class_name="GithubConnector",
-    ),
-    DocumentSource.GMAIL: ConnectorMapping(
-        module_path="onyx.connectors.gmail.connector",
-        class_name="GmailConnector",
-    ),
-    DocumentSource.GITLAB: ConnectorMapping(
-        module_path="onyx.connectors.gitlab.connector",
-        class_name="GitlabConnector",
-    ),
-    DocumentSource.GITBOOK: ConnectorMapping(
-        module_path="onyx.connectors.gitbook.connector",
-        class_name="GitbookConnector",
-    ),
-    DocumentSource.GOOGLE_DRIVE: ConnectorMapping(
-        module_path="onyx.connectors.google_drive.connector",
-        class_name="GoogleDriveConnector",
-    ),
-    DocumentSource.BOOKSTACK: ConnectorMapping(
-        module_path="onyx.connectors.bookstack.connector",
-        class_name="BookstackConnector",
-    ),
-    DocumentSource.OUTLINE: ConnectorMapping(
-        module_path="onyx.connectors.outline.connector",
-        class_name="OutlineConnector",
-    ),
-    DocumentSource.CONFLUENCE: ConnectorMapping(
-        module_path="onyx.connectors.confluence.connector",
-        class_name="ConfluenceConnector",
-    ),
-    DocumentSource.JIRA: ConnectorMapping(
-        module_path="onyx.connectors.jira.connector",
-        class_name="JiraConnector",
-    ),
-    DocumentSource.PRODUCTBOARD: ConnectorMapping(
-        module_path="onyx.connectors.productboard.connector",
-        class_name="ProductboardConnector",
-    ),
-    DocumentSource.SLAB: ConnectorMapping(
-        module_path="onyx.connectors.slab.connector",
-        class_name="SlabConnector",
-    ),
-    DocumentSource.NOTION: ConnectorMapping(
-        module_path="onyx.connectors.notion.connector",
-        class_name="NotionConnector",
-    ),
-    DocumentSource.ZULIP: ConnectorMapping(
-        module_path="onyx.connectors.zulip.connector",
-        class_name="ZulipConnector",
-    ),
-    DocumentSource.GURU: ConnectorMapping(
-        module_path="onyx.connectors.guru.connector",
-        class_name="GuruConnector",
-    ),
-    DocumentSource.LINEAR: ConnectorMapping(
-        module_path="onyx.connectors.linear.connector",
-        class_name="LinearConnector",
-    ),
-    DocumentSource.HUBSPOT: ConnectorMapping(
-        module_path="onyx.connectors.hubspot.connector",
-        class_name="HubSpotConnector",
-    ),
-    DocumentSource.DOCUMENT360: ConnectorMapping(
-        module_path="onyx.connectors.document360.connector",
-        class_name="Document360Connector",
-    ),
-    DocumentSource.GONG: ConnectorMapping(
-        module_path="onyx.connectors.gong.connector",
-        class_name="GongConnector",
-    ),
-    DocumentSource.GOOGLE_SITES: ConnectorMapping(
-        module_path="onyx.connectors.google_site.connector",
-        class_name="GoogleSitesConnector",
-    ),
-    DocumentSource.ZENDESK: ConnectorMapping(
-        module_path="onyx.connectors.zendesk.connector",
-        class_name="ZendeskConnector",
-    ),
-    DocumentSource.LOOPIO: ConnectorMapping(
-        module_path="onyx.connectors.loopio.connector",
-        class_name="LoopioConnector",
-    ),
-    DocumentSource.DROPBOX: ConnectorMapping(
-        module_path="onyx.connectors.dropbox.connector",
-        class_name="DropboxConnector",
-    ),
-    DocumentSource.SHAREPOINT: ConnectorMapping(
-        module_path="onyx.connectors.sharepoint.connector",
-        class_name="SharepointConnector",
-    ),
-    DocumentSource.TEAMS: ConnectorMapping(
-        module_path="onyx.connectors.teams.connector",
-        class_name="TeamsConnector",
-    ),
-    DocumentSource.SALESFORCE: ConnectorMapping(
-        module_path="onyx.connectors.salesforce.connector",
-        class_name="SalesforceConnector",
-    ),
-    DocumentSource.DISCOURSE: ConnectorMapping(
-        module_path="onyx.connectors.discourse.connector",
-        class_name="DiscourseConnector",
-    ),
-    DocumentSource.AXERO: ConnectorMapping(
-        module_path="onyx.connectors.axero.connector",
-        class_name="AxeroConnector",
-    ),
-    DocumentSource.CLICKUP: ConnectorMapping(
-        module_path="onyx.connectors.clickup.connector",
-        class_name="ClickupConnector",
-    ),
-    DocumentSource.MEDIAWIKI: ConnectorMapping(
-        module_path="onyx.connectors.mediawiki.wiki",
-        class_name="MediaWikiConnector",
-    ),
-    DocumentSource.WIKIPEDIA: ConnectorMapping(
-        module_path="onyx.connectors.wikipedia.connector",
-        class_name="WikipediaConnector",
-    ),
-    DocumentSource.ASANA: ConnectorMapping(
-        module_path="onyx.connectors.asana.connector",
-        class_name="AsanaConnector",
-    ),
-    DocumentSource.S3: ConnectorMapping(
-        module_path="onyx.connectors.blob.connector",
-        class_name="BlobStorageConnector",
-    ),
-    DocumentSource.R2: ConnectorMapping(
-        module_path="onyx.connectors.blob.connector",
-        class_name="BlobStorageConnector",
-    ),
-    DocumentSource.GOOGLE_CLOUD_STORAGE: ConnectorMapping(
-        module_path="onyx.connectors.blob.connector",
-        class_name="BlobStorageConnector",
-    ),
-    DocumentSource.OCI_STORAGE: ConnectorMapping(
-        module_path="onyx.connectors.blob.connector",
-        class_name="BlobStorageConnector",
-    ),
-    DocumentSource.XENFORO: ConnectorMapping(
-        module_path="onyx.connectors.xenforo.connector",
-        class_name="XenforoConnector",
-    ),
-    DocumentSource.DISCORD: ConnectorMapping(
-        module_path="onyx.connectors.discord.connector",
-        class_name="DiscordConnector",
-    ),
-    DocumentSource.FRESHDESK: ConnectorMapping(
-        module_path="onyx.connectors.freshdesk.connector",
-        class_name="FreshdeskConnector",
-    ),
-    DocumentSource.FIREFLIES: ConnectorMapping(
-        module_path="onyx.connectors.fireflies.connector",
-        class_name="FirefliesConnector",
-    ),
-    DocumentSource.EGNYTE: ConnectorMapping(
-        module_path="onyx.connectors.egnyte.connector",
-        class_name="EgnyteConnector",
-    ),
-    DocumentSource.AIRTABLE: ConnectorMapping(
-        module_path="onyx.connectors.airtable.airtable_connector",
-        class_name="AirtableConnector",
-    ),
-    DocumentSource.HIGHSPOT: ConnectorMapping(
-        module_path="onyx.connectors.highspot.connector",
-        class_name="HighspotConnector",
-    ),
-    DocumentSource.IMAP: ConnectorMapping(
-        module_path="onyx.connectors.imap.connector",
-        class_name="ImapConnector",
-    ),
-    DocumentSource.BITBUCKET: ConnectorMapping(
-        module_path="onyx.connectors.bitbucket.connector",
-        class_name="BitbucketConnector",
-    ),
-    # just for integration tests
-    DocumentSource.MOCK_CONNECTOR: ConnectorMapping(
-        module_path="onyx.connectors.mock_connector.connector",
-        class_name="MockConnector",
-    ),
-}
--- a/backend/onyx/connectors/salesforce/connector.py
+++ b/backend/onyx/connectors/salesforce/connector.py
@@ -16,7 +16,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorMissingCredentialError
@@ -151,7 +151,7 @@ def _validate_custom_query_config(config: dict[str, Any]) -> None:
                        )


-class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
+class SalesforceConnector(LoadConnector, PollConnector, SlimConnector):
    """Approach outline

    Goal
@@ -1119,7 +1119,7 @@ class SalesforceConnector(LoadConnector, PollConnector, SlimConnectorWithPermSyn
        with tempfile.TemporaryDirectory() as temp_dir:
            return self._delta_sync(temp_dir, start, end)

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -41,7 +41,7 @@ from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import IndexingHeartbeatInterface
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
@@ -73,8 +73,7 @@ class SiteDescriptor(BaseModel):
    """Data class for storing SharePoint site information.

    Args:
-        url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests
-             or https://danswerai.sharepoint.com/teams/team-name)
+        url: The base site URL (e.g. https://danswerai.sharepoint.com/sites/sharepoint-tests)
        drive_name: The name of the drive to access (e.g. "Shared Documents", "Other Library")
                   If None, all drives will be accessed.
        folder_path: The folder path within the drive to access (e.g. "test/nested with spaces")
@@ -673,7 +672,7 @@ def _convert_sitepage_to_slim_document(


 class SharepointConnector(
-    SlimConnectorWithPermSync,
+    SlimConnector,
    CheckpointedConnectorWithPermSync[SharepointConnectorCheckpoint],
 ):
    def __init__(
@@ -704,11 +703,9 @@ class SharepointConnector(

        # Ensure sites are sharepoint urls
        for site_url in self.sites:
-            if not site_url.startswith("https://") or not (
-                "/sites/" in site_url or "/teams/" in site_url
-            ):
+            if not site_url.startswith("https://") or "/sites/" not in site_url:
                raise ConnectorValidationError(
-                    "Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)"
+                    "Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site)"
                )

    @property
@@ -723,17 +720,10 @@ class SharepointConnector(
        site_data_list = []
        for url in site_urls:
            parts = url.strip().split("/")
-
-            site_type_index = None
            if "sites" in parts:
-                site_type_index = parts.index("sites")
-            elif "teams" in parts:
-                site_type_index = parts.index("teams")
-
-            if site_type_index is not None:
-                # Extract the base site URL (up to and including the site/team name)
-                site_url = "/".join(parts[: site_type_index + 2])
-                remaining_parts = parts[site_type_index + 2 :]
+                sites_index = parts.index("sites")
+                site_url = "/".join(parts[: sites_index + 2])
+                remaining_parts = parts[sites_index + 2 :]

                # Extract drive name and folder path
                if remaining_parts:
@@ -755,9 +745,7 @@ class SharepointConnector(
                    )
                )
            else:
-                logger.warning(
-                    f"Site URL '{url}' is not a valid Sharepoint URL (must contain /sites/ or /teams/)"
-                )
+                logger.warning(f"Site URL '{url}' is not a valid Sharepoint URL")
        return site_data_list

    def _get_drive_items_for_drive_name(
@@ -1609,7 +1597,7 @@ class SharepointConnector(
    ) -> SharepointConnectorCheckpoint:
        return SharepointConnectorCheckpoint.model_validate_json(checkpoint_json)

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/slab/connector.py
+++ b/backend/onyx/connectors/slab/connector.py
@@ -16,7 +16,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import SlimDocument
@@ -164,7 +164,7 @@ def get_slab_url_from_title_id(base_url: str, title: str, page_id: str) -> str:
    return urljoin(urljoin(base_url, "posts/"), url_id)


-class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
+class SlabConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(
        self,
        base_url: str,
@@ -239,7 +239,7 @@ class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
            time_filter=lambda t: start_time <= t <= end_time
        )

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/slack/connector.py
+++ b/backend/onyx/connectors/slack/connector.py
@@ -42,7 +42,7 @@ from onyx.connectors.interfaces import CredentialsConnector
 from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
@@ -581,7 +581,7 @@ def _process_message(


 class SlackConnector(
-    SlimConnectorWithPermSync,
+    SlimConnector,
    CredentialsConnector,
    CheckpointedConnectorWithPermSync[SlackCheckpoint],
 ):
@@ -732,7 +732,7 @@ class SlackConnector(
        self.text_cleaner = SlackTextCleaner(client=self.client)
        self.credentials_provider = credentials_provider

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/teams/connector.py
+++ b/backend/onyx/connectors/teams/connector.py
@@ -22,7 +22,7 @@ from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
@@ -51,7 +51,7 @@ class TeamsCheckpoint(ConnectorCheckpoint):

 class TeamsConnector(
    CheckpointedConnector[TeamsCheckpoint],
-    SlimConnectorWithPermSync,
+    SlimConnector,
 ):
    MAX_WORKERS = 10
    AUTHORITY_URL_PREFIX = "https://login.microsoftonline.com/"
@@ -228,9 +228,9 @@ class TeamsConnector(
            has_more=bool(todos),
        )

-    # impls for SlimConnectorWithPermSync
+    # impls for SlimConnector

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
@@ -572,7 +572,7 @@ if __name__ == "__main__":
    )
    teams_connector.validate_connector_settings()

-    for slim_doc in teams_connector.retrieve_all_slim_docs_perm_sync():
+    for slim_doc in teams_connector.retrieve_all_slim_documents():
        ...

    for doc in load_everything_from_checkpoint_connector(
--- a/backend/onyx/connectors/web/connector.py
+++ b/backend/onyx/connectors/web/connector.py
@@ -219,25 +219,6 @@ def is_valid_url(url: str) -> bool:
        return False


-def _same_site(base_url: str, candidate_url: str) -> bool:
-    base, candidate = urlparse(base_url), urlparse(candidate_url)
-    base_netloc = base.netloc.lower().removeprefix("www.")
-    candidate_netloc = candidate.netloc.lower().removeprefix("www.")
-    if base_netloc != candidate_netloc:
-        return False
-
-    base_path = (base.path or "/").rstrip("/")
-    if base_path in ("", "/"):
-        return True
-
-    candidate_path = candidate.path or "/"
-    if candidate_path == base_path:
-        return True
-
-    boundary = f"{base_path}/"
-    return candidate_path.startswith(boundary)
-
-
 def get_internal_links(
    base_url: str, url: str, soup: BeautifulSoup, should_ignore_pound: bool = True
 ) -> set[str]:
@@ -258,7 +239,7 @@ def get_internal_links(
            # Relative path handling
            href = urljoin(url, href)

-        if _same_site(base_url, href):
+        if urlparse(href).netloc == urlparse(url).netloc and base_url in href:
            internal_links.add(href)
    return internal_links

--- a/backend/onyx/connectors/zendesk/connector.py
+++ b/backend/onyx/connectors/zendesk/connector.py
@@ -26,7 +26,7 @@ from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import ConnectorFailure
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import Document
@@ -376,7 +376,7 @@ class ZendeskConnectorCheckpoint(ConnectorCheckpoint):


 class ZendeskConnector(
-    SlimConnectorWithPermSync, CheckpointedConnector[ZendeskConnectorCheckpoint]
+    SlimConnector, CheckpointedConnector[ZendeskConnectorCheckpoint]
 ):
    def __init__(
        self,
@@ -565,7 +565,7 @@ class ZendeskConnector(
        )
        return checkpoint

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -62,14 +62,6 @@ class MCPAuthenticationType(str, PyEnum):
    OAUTH = "OAUTH"


-class MCPTransport(str, PyEnum):
-    """MCP transport types"""
-
-    STDIO = "STDIO"  # TODO: currently unsupported, need to add a user guide for setup
-    SSE = "SSE"  # Server-Sent Events (deprecated but still used)
-    STREAMABLE_HTTP = "STREAMABLE_HTTP"  # Modern HTTP streaming
-
-
 class MCPAuthenticationPerformer(str, PyEnum):
    ADMIN = "ADMIN"
    PER_USER = "PER_USER"
--- a/backend/onyx/db/folder.py
+++ b/backend/onyx/db/folder.py
@@ -0,0 +1,132 @@
+from uuid import UUID
+
+from sqlalchemy.orm import Session
+
+from onyx.db.chat import delete_chat_session
+from onyx.db.models import ChatFolder
+from onyx.db.models import ChatSession
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def get_user_folders(
+    user_id: UUID | None,
+    db_session: Session,
+) -> list[ChatFolder]:
+    return db_session.query(ChatFolder).filter(ChatFolder.user_id == user_id).all()
+
+
+def update_folder_display_priority(
+    user_id: UUID | None,
+    display_priority_map: dict[int, int],
+    db_session: Session,
+) -> None:
+    folders = get_user_folders(user_id=user_id, db_session=db_session)
+    folder_ids = {folder.id for folder in folders}
+    if folder_ids != set(display_priority_map.keys()):
+        raise ValueError("Invalid Folder IDs provided")
+
+    for folder in folders:
+        folder.display_priority = display_priority_map[folder.id]
+
+    db_session.commit()
+
+
+def get_folder_by_id(
+    user_id: UUID | None,
+    folder_id: int,
+    db_session: Session,
+) -> ChatFolder:
+    folder = (
+        db_session.query(ChatFolder).filter(ChatFolder.id == folder_id).one_or_none()
+    )
+    if not folder:
+        raise ValueError("Folder by specified id does not exist")
+
+    if folder.user_id != user_id:
+        raise PermissionError(f"Folder does not belong to user: {user_id}")
+
+    return folder
+
+
+def create_folder(
+    user_id: UUID | None, folder_name: str | None, db_session: Session
+) -> int:
+    new_folder = ChatFolder(
+        user_id=user_id,
+        name=folder_name,
+    )
+    db_session.add(new_folder)
+    db_session.commit()
+
+    return new_folder.id
+
+
+def rename_folder(
+    user_id: UUID | None, folder_id: int, folder_name: str | None, db_session: Session
+) -> None:
+    folder = get_folder_by_id(
+        user_id=user_id, folder_id=folder_id, db_session=db_session
+    )
+
+    folder.name = folder_name
+    db_session.commit()
+
+
+def add_chat_to_folder(
+    user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
+) -> None:
+    folder = get_folder_by_id(
+        user_id=user_id, folder_id=folder_id, db_session=db_session
+    )
+
+    chat_session.folder_id = folder.id
+
+    db_session.commit()
+
+
+def remove_chat_from_folder(
+    user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session
+) -> None:
+    folder = get_folder_by_id(
+        user_id=user_id, folder_id=folder_id, db_session=db_session
+    )
+
+    if chat_session.folder_id != folder.id:
+        raise ValueError("The chat session is not in the specified folder.")
+
+    if folder.user_id != user_id:
+        raise ValueError(
+            f"Tried to remove a chat session from a folder that does not below to "
+            f"this user, user id: {user_id}"
+        )
+
+    chat_session.folder_id = None
+    if chat_session in folder.chat_sessions:
+        folder.chat_sessions.remove(chat_session)
+
+    db_session.commit()
+
+
+def delete_folder(
+    user_id: UUID | None,
+    folder_id: int,
+    including_chats: bool,
+    db_session: Session,
+) -> None:
+    folder = get_folder_by_id(
+        user_id=user_id, folder_id=folder_id, db_session=db_session
+    )
+
+    # Assuming there will not be a massive number of chats in any given folder
+    if including_chats:
+        for chat_session in folder.chat_sessions:
+            delete_chat_session(
+                user_id=user_id,
+                chat_session_id=chat_session.id,
+                db_session=db_session,
+            )
+
+    db_session.delete(folder)
+    db_session.commit()
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -112,7 +112,6 @@ def upsert_llm_provider(
                name=model_configuration.name,
                is_visible=model_configuration.is_visible,
                max_input_tokens=model_configuration.max_input_tokens,
-                supports_image_input=model_configuration.supports_image_input,
            )
            .on_conflict_do_nothing()
        )
--- a/backend/onyx/db/mcp.py
+++ b/backend/onyx/db/mcp.py
@@ -4,10 +4,8 @@ from uuid import UUID
 from sqlalchemy import and_
 from sqlalchemy import select
 from sqlalchemy.orm import Session
-from sqlalchemy.orm.attributes import flag_modified

 from onyx.db.enums import MCPAuthenticationPerformer
-from onyx.db.enums import MCPTransport
 from onyx.db.models import MCPAuthenticationType
 from onyx.db.models import MCPConnectionConfig
 from onyx.db.models import MCPServer
@@ -91,8 +89,6 @@ def create_mcp_server__no_commit(
    description: str | None,
    server_url: str,
    auth_type: MCPAuthenticationType,
-    transport: MCPTransport,
-    auth_performer: MCPAuthenticationPerformer,
    db_session: Session,
    admin_connection_config_id: int | None = None,
 ) -> MCPServer:
@@ -102,9 +98,7 @@ def create_mcp_server__no_commit(
        name=name,
        description=description,
        server_url=server_url,
-        transport=transport,
        auth_type=auth_type,
-        auth_performer=auth_performer,
        admin_connection_config_id=admin_connection_config_id,
    )
    db_session.add(new_server)
@@ -120,8 +114,6 @@ def update_mcp_server__no_commit(
    server_url: str | None = None,
    auth_type: MCPAuthenticationType | None = None,
    admin_connection_config_id: int | None = None,
-    auth_performer: MCPAuthenticationPerformer | None = None,
-    transport: MCPTransport | None = None,
 ) -> MCPServer:
    """Update an existing MCP server"""
    server = get_mcp_server_by_id(server_id, db_session)
@@ -136,10 +128,6 @@ def update_mcp_server__no_commit(
        server.auth_type = auth_type
    if admin_connection_config_id is not None:
        server.admin_connection_config_id = admin_connection_config_id
-    if auth_performer is not None:
-        server.auth_performer = auth_performer
-    if transport is not None:
-        server.transport = transport

    db_session.flush()  # Don't commit yet, let caller decide when to commit
    return server
@@ -159,6 +147,18 @@ def delete_mcp_server(server_id: int, db_session: Session) -> None:
    logger.info(f"Successfully deleted MCP server {server_id} and its tools")


+# TODO: this is pretty hacky
+def get_mcp_server_auth_performer(mcp_server: MCPServer) -> MCPAuthenticationPerformer:
+    """Get the authentication performer for an MCP server"""
+    if mcp_server.auth_type == MCPAuthenticationType.OAUTH:
+        return MCPAuthenticationPerformer.PER_USER
+    if not mcp_server.admin_connection_config:
+        return MCPAuthenticationPerformer.ADMIN
+    if not mcp_server.admin_connection_config.config.get("header_substitutions"):
+        return MCPAuthenticationPerformer.ADMIN
+    return MCPAuthenticationPerformer.PER_USER
+
+
 def get_all_mcp_tools_for_server(server_id: int, db_session: Session) -> list[Tool]:
    """Get all MCP tools for a server"""
    return list(
@@ -259,8 +259,6 @@ def update_connection_config(

    if config_data is not None:
        config.config = config_data
-        # Force SQLAlchemy to detect the change by marking the field as modified
-        flag_modified(config, "config")

    db_session.commit()
    return config
@@ -297,7 +295,7 @@ def get_server_auth_template(
    if not server.admin_connection_config_id:
        return None

-    if server.auth_performer == MCPAuthenticationPerformer.ADMIN:
+    if get_mcp_server_auth_performer(server) == MCPAuthenticationPerformer.ADMIN:
        return None  # admin server implies no template
    return server.admin_connection_config

--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -63,8 +63,6 @@ from onyx.db.enums import (
    SyncStatus,
    MCPAuthenticationType,
    UserFileStatus,
-    MCPAuthenticationPerformer,
-    MCPTransport,
 )
 from onyx.configs.constants import NotificationType
 from onyx.configs.constants import SearchFeedbackType
@@ -2353,8 +2351,6 @@ class ModelConfiguration(Base):
    # - The end-user is configuring a model and chooses not to set a max-input-tokens limit.
    max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)

-    supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
-
    llm_provider: Mapped["LLMProvider"] = relationship(
        "LLMProvider",
        back_populates="model_configurations",
@@ -3472,18 +3468,10 @@ class MCPServer(Base):
    name: Mapped[str] = mapped_column(String, nullable=False)
    description: Mapped[str | None] = mapped_column(String, nullable=True)
    server_url: Mapped[str] = mapped_column(String, nullable=False)
-    # Transport type for connecting to the MCP server
-    transport: Mapped[MCPTransport] = mapped_column(
-        Enum(MCPTransport, native_enum=False), nullable=False
-    )
    # Auth type: "none", "api_token", or "oauth"
    auth_type: Mapped[MCPAuthenticationType] = mapped_column(
        Enum(MCPAuthenticationType, native_enum=False), nullable=False
    )
-    # Who performs authentication for this server (ADMIN or PER_USER)
-    auth_performer: Mapped[MCPAuthenticationPerformer] = mapped_column(
-        Enum(MCPAuthenticationPerformer, native_enum=False), nullable=False
-    )
    # Admin connection config - used for the config page
    # and (when applicable) admin-managed auth
    # and (when applicable) per-user auth
--- a/backend/onyx/db/user_documents.py
+++ b/backend/onyx/db/user_documents.py
@@ -0,0 +1,478 @@
+import datetime
+import time
+from typing import List
+from uuid import UUID
+
+from fastapi import UploadFile
+from sqlalchemy import and_
+from sqlalchemy import func
+from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import Session
+
+from onyx.auth.users import get_current_tenant_id
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.models import InputType
+from onyx.db.connector import create_connector
+from onyx.db.connector_credential_pair import add_credential_to_connector
+from onyx.db.credentials import create_credential
+from onyx.db.enums import AccessType
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.models import Document
+from onyx.db.models import DocumentByConnectorCredentialPair
+from onyx.db.models import Persona
+from onyx.db.models import Persona__UserFile
+from onyx.db.models import User
+from onyx.db.models import UserFile
+from onyx.db.models import UserFolder
+from onyx.server.documents.connector import trigger_indexing_for_cc_pair
+from onyx.server.documents.connector import upload_files
+from onyx.server.documents.models import ConnectorBase
+from onyx.server.documents.models import CredentialBase
+from onyx.server.models import StatusResponse
+
+USER_FILE_CONSTANT = "USER_FILE_CONNECTOR"
+
+
+def create_user_files(
+    files: List[UploadFile],
+    folder_id: int | None,
+    user: User | None,
+    db_session: Session,
+    link_url: str | None = None,
+) -> list[UserFile]:
+    """NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
+    Document what this does?
+    """
+
+    # NOTE: At the moment, zip metadata is not used for user files.
+    # Should revisit to decide whether this should be a feature.
+    upload_response = upload_files(files)
+    user_files = []
+
+    for file_path, file in zip(upload_response.file_paths, files):
+        new_file = UserFile(
+            user_id=user.id if user else None,
+            folder_id=folder_id,
+            file_id=file_path,
+            document_id="USER_FILE_CONNECTOR__" + file_path,
+            name=file.filename,
+            token_count=None,
+            link_url=link_url,
+            content_type=file.content_type,
+        )
+        db_session.add(new_file)
+        user_files.append(new_file)
+    db_session.commit()
+    return user_files
+
+
+def upload_files_to_user_files_with_indexing(
+    files: List[UploadFile],
+    folder_id: int | None,
+    user: User,
+    db_session: Session,
+    trigger_index: bool = True,
+) -> list[UserFile]:
+    """NOTE(rkuo): This function can take -1 (RECENT_DOCS_FOLDER_ID for folder_id.
+    Document what this does?
+
+    Create user files and trigger immediate indexing"""
+    # Create the user files first
+    user_files = create_user_files(files, folder_id, user, db_session)
+
+    # Create connector and credential for each file
+    for user_file in user_files:
+        cc_pair = create_file_connector_credential(user_file, user, db_session)
+        user_file.cc_pair_id = cc_pair.data
+
+    db_session.commit()
+
+    # Trigger immediate high-priority indexing for all created files
+    if trigger_index:
+        tenant_id = get_current_tenant_id()
+        for user_file in user_files:
+            # Use the existing trigger_indexing_for_cc_pair function but with highest priority
+            if user_file.cc_pair_id:
+                trigger_indexing_for_cc_pair(
+                    [],
+                    user_file.cc_pair.connector_id,
+                    False,
+                    tenant_id,
+                    db_session,
+                    is_user_file=True,
+                )
+
+    return user_files
+
+
+def create_file_connector_credential(
+    user_file: UserFile, user: User, db_session: Session
+) -> StatusResponse:
+    """Create connector and credential for a user file"""
+    connector_base = ConnectorBase(
+        name=f"UserFile-{user_file.file_id}-{int(time.time())}",
+        source=DocumentSource.FILE,
+        input_type=InputType.LOAD_STATE,
+        connector_specific_config={
+            "file_locations": [user_file.file_id],
+            "file_names": [user_file.name],
+            "zip_metadata": {},
+        },
+        refresh_freq=None,
+        prune_freq=None,
+        indexing_start=None,
+    )
+
+    connector = create_connector(db_session=db_session, connector_data=connector_base)
+
+    credential_info = CredentialBase(
+        credential_json={},
+        admin_public=True,
+        source=DocumentSource.FILE,
+        curator_public=True,
+        groups=[],
+        name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
+        is_user_file=True,
+    )
+
+    credential = create_credential(credential_info, user, db_session)
+
+    return add_credential_to_connector(
+        db_session=db_session,
+        user=user,
+        connector_id=connector.id,
+        credential_id=credential.id,
+        cc_pair_name=f"UserFileCCPair-{user_file.file_id}-{int(time.time())}",
+        access_type=AccessType.PRIVATE,
+        auto_sync_options=None,
+        groups=[],
+        is_user_file=True,
+    )
+
+
+def get_user_file_indexing_status(
+    file_ids: list[int], db_session: Session
+) -> dict[int, bool]:
+    """Get indexing status for multiple user files"""
+    status_dict = {}
+
+    # Query UserFile with cc_pair join
+    files_with_pairs = (
+        db_session.query(UserFile)
+        .filter(UserFile.id.in_(file_ids))
+        .options(joinedload(UserFile.cc_pair))
+        .all()
+    )
+
+    for file in files_with_pairs:
+        if file.cc_pair and file.cc_pair.last_successful_index_time:
+            status_dict[file.id] = True
+        else:
+            status_dict[file.id] = False
+
+    return status_dict
+
+
+def calculate_user_files_token_count(
+    file_ids: list[int], folder_ids: list[int], db_session: Session
+) -> int:
+    """Calculate total token count for specified files and folders"""
+    total_tokens = 0
+
+    # Get tokens from individual files
+    if file_ids:
+        file_tokens = (
+            db_session.query(func.sum(UserFile.token_count))
+            .filter(UserFile.id.in_(file_ids))
+            .scalar()
+            or 0
+        )
+        total_tokens += file_tokens
+
+    # Get tokens from folders
+    if folder_ids:
+        folder_files_tokens = (
+            db_session.query(func.sum(UserFile.token_count))
+            .filter(UserFile.folder_id.in_(folder_ids))
+            .scalar()
+            or 0
+        )
+        total_tokens += folder_files_tokens
+
+    return total_tokens
+
+
+def load_all_user_files(
+    file_ids: list[int], folder_ids: list[int], db_session: Session
+) -> list[UserFile]:
+    """Load all user files from specified file IDs and folder IDs"""
+    result = []
+
+    # Get individual files
+    if file_ids:
+        files = db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all()
+        result.extend(files)
+
+    # Get files from folders
+    if folder_ids:
+        folder_files = (
+            db_session.query(UserFile).filter(UserFile.folder_id.in_(folder_ids)).all()
+        )
+        result.extend(folder_files)
+
+    return result
+
+
+def get_user_files_from_folder(folder_id: int, db_session: Session) -> list[UserFile]:
+    return db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all()
+
+
+def share_file_with_assistant(
+    file_id: int, assistant_id: int, db_session: Session
+) -> None:
+    file = db_session.query(UserFile).filter(UserFile.id == file_id).first()
+    assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
+
+    if file and assistant:
+        file.assistants.append(assistant)
+        db_session.commit()
+
+
+def unshare_file_with_assistant(
+    file_id: int, assistant_id: int, db_session: Session
+) -> None:
+    db_session.query(Persona__UserFile).filter(
+        and_(
+            Persona__UserFile.user_file_id == file_id,
+            Persona__UserFile.persona_id == assistant_id,
+        )
+    ).delete()
+    db_session.commit()
+
+
+def share_folder_with_assistant(
+    folder_id: int, assistant_id: int, db_session: Session
+) -> None:
+    folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
+    assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first()
+
+    if folder and assistant:
+        for file in folder.files:
+            share_file_with_assistant(file.id, assistant_id, db_session)
+
+
+def unshare_folder_with_assistant(
+    folder_id: int, assistant_id: int, db_session: Session
+) -> None:
+    folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first()
+
+    if folder:
+        for file in folder.files:
+            unshare_file_with_assistant(file.id, assistant_id, db_session)
+
+
+def fetch_user_files_for_documents(
+    document_ids: list[str],
+    db_session: Session,
+) -> dict[str, int | None]:
+    """
+    Fetches user file IDs for the given document IDs.
+
+    Args:
+        document_ids: List of document IDs to fetch user files for
+        db_session: Database session
+
+    Returns:
+        Dictionary mapping document IDs to user file IDs (or None if no user file exists)
+    """
+    # First, get the document to cc_pair mapping
+    doc_cc_pairs = (
+        db_session.query(Document.id, ConnectorCredentialPair.id)
+        .join(
+            DocumentByConnectorCredentialPair,
+            Document.id == DocumentByConnectorCredentialPair.id,
+        )
+        .join(
+            ConnectorCredentialPair,
+            and_(
+                DocumentByConnectorCredentialPair.connector_id
+                == ConnectorCredentialPair.connector_id,
+                DocumentByConnectorCredentialPair.credential_id
+                == ConnectorCredentialPair.credential_id,
+            ),
+        )
+        .filter(Document.id.in_(document_ids))
+        .all()
+    )
+
+    # Get cc_pair to user_file mapping
+    cc_pair_to_user_file = (
+        db_session.query(ConnectorCredentialPair.id, UserFile.id)
+        .join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
+        .filter(
+            ConnectorCredentialPair.id.in_(
+                [cc_pair_id for _, cc_pair_id in doc_cc_pairs]
+            )
+        )
+        .all()
+    )
+
+    # Create mapping from cc_pair_id to user_file_id
+    cc_pair_to_user_file_dict = {
+        cc_pair_id: user_file_id for cc_pair_id, user_file_id in cc_pair_to_user_file
+    }
+
+    # Create the final result mapping document_id to user_file_id
+    result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
+    for doc_id, cc_pair_id in doc_cc_pairs:
+        if cc_pair_id in cc_pair_to_user_file_dict:
+            result[doc_id] = cc_pair_to_user_file_dict[cc_pair_id]
+
+    return result
+
+
+def fetch_user_folders_for_documents(
+    document_ids: list[str],
+    db_session: Session,
+) -> dict[str, int | None]:
+    """
+    Fetches user folder IDs for the given document IDs.
+
+    For each document, returns the folder ID that the document's associated user file belongs to.
+
+    Args:
+        document_ids: List of document IDs to fetch user folders for
+        db_session: Database session
+
+    Returns:
+        Dictionary mapping document IDs to user folder IDs (or None if no user folder exists)
+    """
+    # First, get the document to cc_pair mapping
+    doc_cc_pairs = (
+        db_session.query(Document.id, ConnectorCredentialPair.id)
+        .join(
+            DocumentByConnectorCredentialPair,
+            Document.id == DocumentByConnectorCredentialPair.id,
+        )
+        .join(
+            ConnectorCredentialPair,
+            and_(
+                DocumentByConnectorCredentialPair.connector_id
+                == ConnectorCredentialPair.connector_id,
+                DocumentByConnectorCredentialPair.credential_id
+                == ConnectorCredentialPair.credential_id,
+            ),
+        )
+        .filter(Document.id.in_(document_ids))
+        .all()
+    )
+
+    # Get cc_pair to user_file and folder mapping
+    cc_pair_to_folder = (
+        db_session.query(ConnectorCredentialPair.id, UserFile.folder_id)
+        .join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id)
+        .filter(
+            ConnectorCredentialPair.id.in_(
+                [cc_pair_id for _, cc_pair_id in doc_cc_pairs]
+            )
+        )
+        .all()
+    )
+
+    # Create mapping from cc_pair_id to folder_id
+    cc_pair_to_folder_dict = {
+        cc_pair_id: folder_id for cc_pair_id, folder_id in cc_pair_to_folder
+    }
+
+    # Create the final result mapping document_id to folder_id
+    result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
+    for doc_id, cc_pair_id in doc_cc_pairs:
+        if cc_pair_id in cc_pair_to_folder_dict:
+            result[doc_id] = cc_pair_to_folder_dict[cc_pair_id]
+
+    return result
+
+
+def get_user_file_from_id(db_session: Session, user_file_id: int) -> UserFile | None:
+    return db_session.query(UserFile).filter(UserFile.id == user_file_id).first()
+
+
+# def fetch_user_files_for_documents(
+# #     document_ids: list[str],
+# #     db_session: Session,
+# # ) -> dict[str, int | None]:
+# #     # Query UserFile objects for the given document_ids
+# #     user_files = (
+# #         db_session.query(UserFile).filter(UserFile.document_id.in_(document_ids)).all()
+# #     )
+
+# #     # Create a dictionary mapping document_ids to UserFile objects
+# #     result: dict[str, int | None] = {doc_id: None for doc_id in document_ids}
+# #     for user_file in user_files:
+# #         result[user_file.document_id] = user_file.id
+
+# #     return result
+
+
+def upsert_user_folder(
+    db_session: Session,
+    id: int | None = None,
+    user_id: UUID | None = None,
+    name: str | None = None,
+    description: str | None = None,
+    created_at: datetime.datetime | None = None,
+    user: User | None = None,
+    files: list[UserFile] | None = None,
+    assistants: list[Persona] | None = None,
+) -> UserFolder:
+    if id is not None:
+        user_folder = db_session.query(UserFolder).filter_by(id=id).first()
+    else:
+        user_folder = (
+            db_session.query(UserFolder).filter_by(name=name, user_id=user_id).first()
+        )
+
+    if user_folder:
+        if user_id is not None:
+            user_folder.user_id = user_id
+        if name is not None:
+            user_folder.name = name
+        if description is not None:
+            user_folder.description = description
+        if created_at is not None:
+            user_folder.created_at = created_at
+        if user is not None:
+            user_folder.user = user
+        if files is not None:
+            user_folder.files = files
+        if assistants is not None:
+            user_folder.assistants = assistants
+    else:
+        user_folder = UserFolder(
+            id=id,
+            user_id=user_id,
+            name=name,
+            description=description,
+            created_at=created_at or datetime.datetime.utcnow(),
+            user=user,
+            files=files or [],
+            assistants=assistants or [],
+        )
+        db_session.add(user_folder)
+
+    db_session.flush()
+    return user_folder
+
+
+def get_user_folder_by_name(db_session: Session, name: str) -> UserFolder | None:
+    return db_session.query(UserFolder).filter(UserFolder.name == name).first()
+
+
+def update_user_file_token_count__no_commit(
+    user_file_id_to_token_count: dict[int, int | None],
+    db_session: Session,
+) -> None:
+    for user_file_id, token_count in user_file_id_to_token_count.items():
+        db_session.query(UserFile).filter(UserFile.id == user_file_id).update(
+            {UserFile.token_count: token_count}
+        )
--- a/backend/onyx/federated_connectors/factory.py
+++ b/backend/onyx/federated_connectors/factory.py
@@ -1,52 +1,15 @@
 """Factory for creating federated connector instances."""

-import importlib
 from typing import Any
-from typing import Type

 from onyx.configs.constants import FederatedConnectorSource
 from onyx.federated_connectors.interfaces import FederatedConnector
-from onyx.federated_connectors.registry import FEDERATED_CONNECTOR_CLASS_MAP
+from onyx.federated_connectors.slack.federated_connector import SlackFederatedConnector
 from onyx.utils.logger import setup_logger

 logger = setup_logger()


-class FederatedConnectorMissingException(Exception):
-    pass
-
-
-# Cache for already imported federated connector classes
-_federated_connector_cache: dict[FederatedConnectorSource, Type[FederatedConnector]] = (
-    {}
-)
-
-
-def _load_federated_connector_class(
-    source: FederatedConnectorSource,
-) -> Type[FederatedConnector]:
-    """Dynamically load and cache a federated connector class."""
-    if source in _federated_connector_cache:
-        return _federated_connector_cache[source]
-
-    if source not in FEDERATED_CONNECTOR_CLASS_MAP:
-        raise FederatedConnectorMissingException(
-            f"Federated connector not found for source={source}"
-        )
-
-    mapping = FEDERATED_CONNECTOR_CLASS_MAP[source]
-
-    try:
-        module = importlib.import_module(mapping.module_path)
-        connector_class = getattr(module, mapping.class_name)
-        _federated_connector_cache[source] = connector_class
-        return connector_class
-    except (ImportError, AttributeError) as e:
-        raise FederatedConnectorMissingException(
-            f"Failed to import {mapping.class_name} from {mapping.module_path}: {e}"
-        )
-
-
 def get_federated_connector(
    source: FederatedConnectorSource,
    credentials: dict[str, Any],
@@ -58,6 +21,9 @@ def get_federated_connector(

 def get_federated_connector_cls(
    source: FederatedConnectorSource,
-) -> Type[FederatedConnector]:
+) -> type[FederatedConnector]:
    """Get the class of the appropriate federated connector."""
-    return _load_federated_connector_class(source)
+    if source == FederatedConnectorSource.FEDERATED_SLACK:
+        return SlackFederatedConnector
+    else:
+        raise ValueError(f"Unsupported federated connector source: {source}")
--- a/backend/onyx/federated_connectors/federated_retrieval.py
+++ b/backend/onyx/federated_connectors/federated_retrieval.py
@@ -135,16 +135,12 @@ def get_federated_retrieval_functions(
    # At this point, user_id is guaranteed to be not None since we're in the else branch
    assert user_id is not None

-    # If no source types are specified, don't use any federated connectors
-    if source_types is None:
-        logger.info("No source types specified, skipping all federated connectors")
-        return []
-
    federated_retrieval_infos: list[FederatedRetrievalInfo] = []
    federated_oauth_tokens = list_federated_connector_oauth_tokens(db_session, user_id)
    for oauth_token in federated_oauth_tokens:
        if (
-            oauth_token.federated_connector.source.to_non_federated_source()
+            source_types is not None
+            and oauth_token.federated_connector.source.to_non_federated_source()
            not in source_types
        ):
            continue
--- a/backend/onyx/federated_connectors/registry.py
+++ b/backend/onyx/federated_connectors/registry.py
@@ -1,19 +0,0 @@
-"""Registry mapping for federated connector classes."""
-
-from pydantic import BaseModel
-
-from onyx.configs.constants import FederatedConnectorSource
-
-
-class FederatedConnectorMapping(BaseModel):
-    module_path: str
-    class_name: str
-
-
-# Mapping of FederatedConnectorSource to connector details for lazy loading
-FEDERATED_CONNECTOR_CLASS_MAP = {
-    FederatedConnectorSource.FEDERATED_SLACK: FederatedConnectorMapping(
-        module_path="onyx.federated_connectors.slack.federated_connector",
-        class_name="SlackFederatedConnector",
-    ),
-}
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -54,7 +54,6 @@ ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS = [
    ".xml",
    ".yml",
    ".yaml",
-    ".sql",
 ]

 ACCEPTED_DOCUMENT_FILE_EXTENSIONS = [
--- a/backend/onyx/file_processing/unstructured.py
+++ b/backend/onyx/file_processing/unstructured.py
@@ -2,6 +2,7 @@ from typing import Any
 from typing import cast
 from typing import IO

+from unstructured.staging.base import dict_to_elements
 from unstructured_client import UnstructuredClient  # type: ignore
 from unstructured_client.models import operations  # type: ignore
 from unstructured_client.models import shared
@@ -51,8 +52,6 @@ def _sdk_partition_request(


 def unstructured_to_text(file: IO[Any], file_name: str) -> str:
-    from unstructured.staging.base import dict_to_elements
-
    logger.debug(f"Starting to read file: {file_name}")
    req = _sdk_partition_request(file, file_name, strategy="fast")

--- a/backend/onyx/file_store/models.py
+++ b/backend/onyx/file_store/models.py
@@ -1,6 +1,7 @@
 import base64
 from enum import Enum
 from typing import NotRequired
+from uuid import UUID
 from typing_extensions import TypedDict  # noreorder

 from pydantic import BaseModel
@@ -35,7 +36,7 @@ class FileDescriptor(TypedDict):
    id: str
    type: ChatFileType
    name: NotRequired[str | None]
-    user_file_id: NotRequired[str | None]
+    user_file_id: NotRequired[UUID | None]


 class InMemoryChatFile(BaseModel):
@@ -57,5 +58,5 @@ class InMemoryChatFile(BaseModel):
            "id": str(self.file_id),
            "type": self.file_type,
            "name": self.filename,
-            "user_file_id": str(self.file_id) if self.file_id else None,
+            "user_file_id": UUID(str(self.file_id)) if self.file_id else None,
        }
--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -5,9 +5,8 @@ from collections.abc import Iterator
 from collections.abc import Sequence
 from typing import Any
 from typing import cast
-from typing import TYPE_CHECKING
-from typing import Union

+import litellm  # type: ignore
 from httpx import RemoteProtocolError
 from langchain.schema.language_model import LanguageModelInput
 from langchain_core.messages import AIMessage
@@ -25,7 +24,9 @@ from langchain_core.messages import SystemMessageChunk
 from langchain_core.messages.tool import ToolCallChunk
 from langchain_core.messages.tool import ToolMessage
 from langchain_core.prompt_values import PromptValue
+from litellm.utils import get_supported_openai_params

+from onyx.configs.app_configs import BRAINTRUST_ENABLED
 from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
 from onyx.configs.app_configs import MOCK_LLM_RESPONSE
 from onyx.configs.chat_configs import QA_TIMEOUT
@@ -44,9 +45,13 @@ from onyx.utils.long_term_log import LongTermLogger

 logger = setup_logger()

-if TYPE_CHECKING:
-    from litellm import ModelResponse, CustomStreamWrapper, Message
+# If a user configures a different model and it doesn't support all the same
+# parameters like frequency and presence, just ignore them
+litellm.drop_params = True
+litellm.telemetry = False

+if BRAINTRUST_ENABLED:
+    litellm.callbacks = ["braintrust"]

 _LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
 VERTEX_CREDENTIALS_FILE_KWARG = "vertex_credentials"
@@ -80,10 +85,8 @@ def _base_msg_to_role(msg: BaseMessage) -> str:


 def _convert_litellm_message_to_langchain_message(
-    litellm_message: "Message",
+    litellm_message: litellm.Message,
 ) -> BaseMessage:
-    from onyx.llm.litellm_singleton import litellm
-
    # Extracting the basic attributes from the litellm message
    content = litellm_message.content or ""
    role = litellm_message.role
@@ -173,15 +176,15 @@ def _convert_delta_to_message_chunk(
    curr_msg: BaseMessage | None,
    stop_reason: str | None = None,
 ) -> BaseMessageChunk:
-    from litellm.utils import ChatCompletionDeltaToolCall
-
    """Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk"""
    role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else "unknown")
    content = _dict.get("content") or ""
    additional_kwargs = {}
    if _dict.get("function_call"):
        additional_kwargs.update({"function_call": dict(_dict["function_call"])})
-    tool_calls = cast(list[ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls"))
+    tool_calls = cast(
+        list[litellm.utils.ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls")
+    )

    if role == "user":
        return HumanMessageChunk(content=content)
@@ -318,8 +321,6 @@ class DefaultMultiLLM(LLM):

        self._max_token_param = LEGACY_MAX_TOKENS_KWARG
        try:
-            from litellm.utils import get_supported_openai_params
-
            params = get_supported_openai_params(model_name, model_provider)
            if STANDARD_MAX_TOKENS_KWARG in (params or []):
                self._max_token_param = STANDARD_MAX_TOKENS_KWARG
@@ -387,12 +388,11 @@ class DefaultMultiLLM(LLM):
        structured_response_format: dict | None = None,
        timeout_override: int | None = None,
        max_tokens: int | None = None,
-    ) -> Union["ModelResponse", "CustomStreamWrapper"]:
+    ) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
        # litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
        # to a dict representation
        processed_prompt = _prompt_to_dict(prompt)
        self._record_call(processed_prompt)
-        from onyx.llm.litellm_singleton import litellm

        try:
            return litellm.completion(
@@ -437,16 +437,6 @@ class DefaultMultiLLM(LLM):
                    ]
                    else {}
                ),  # TODO: remove once LITELLM has patched
-                **(
-                    {"reasoning_effort": "minimal"}
-                    if self.config.model_name
-                    in [
-                        "gpt-5",
-                        "gpt-5-mini",
-                        "gpt-5-nano",
-                    ]
-                    else {}
-                ),  # TODO: remove once LITELLM has better support/we change API
                **(
                    {"response_format": structured_response_format}
                    if structured_response_format
@@ -495,13 +485,11 @@ class DefaultMultiLLM(LLM):
        timeout_override: int | None = None,
        max_tokens: int | None = None,
    ) -> BaseMessage:
-        from litellm import ModelResponse
-
        if LOG_ONYX_MODEL_INTERACTIONS:
            self.log_model_configs()

        response = cast(
-            ModelResponse,
+            litellm.ModelResponse,
            self._completion(
                prompt=prompt,
                tools=tools,
@@ -530,8 +518,6 @@ class DefaultMultiLLM(LLM):
        timeout_override: int | None = None,
        max_tokens: int | None = None,
    ) -> Iterator[BaseMessage]:
-        from litellm import CustomStreamWrapper
-
        if LOG_ONYX_MODEL_INTERACTIONS:
            self.log_model_configs()

@@ -548,7 +534,7 @@ class DefaultMultiLLM(LLM):

        output = None
        response = cast(
-            CustomStreamWrapper,
+            litellm.CustomStreamWrapper,
            self._completion(
                prompt=prompt,
                tools=tools,
--- a/backend/onyx/llm/factory.py
+++ b/backend/onyx/llm/factory.py
@@ -1,5 +1,8 @@
+from typing import Any
+
 from onyx.chat.models import PersonaOverrideConfig
 from onyx.configs.app_configs import DISABLE_GENERATIVE_AI
+from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_TEMPERATURE
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.llm import fetch_default_provider
@@ -10,8 +13,6 @@ from onyx.db.models import Persona
 from onyx.llm.chat_llm import DefaultMultiLLM
 from onyx.llm.exceptions import GenAIDisabledException
 from onyx.llm.interfaces import LLM
-from onyx.llm.llm_provider_options import OLLAMA_API_KEY_CONFIG_KEY
-from onyx.llm.llm_provider_options import OLLAMA_PROVIDER_NAME
 from onyx.llm.override_models import LLMOverride
 from onyx.llm.utils import get_max_input_tokens_from_llm_provider
 from onyx.llm.utils import model_supports_image_input
@@ -23,22 +24,13 @@ from onyx.utils.long_term_log import LongTermLogger
 logger = setup_logger()


-def _build_provider_extra_headers(
-    provider: str, custom_config: dict[str, str] | None
-) -> dict[str, str]:
-    if provider != OLLAMA_PROVIDER_NAME or not custom_config:
-        return {}
+def _build_extra_model_kwargs(provider: str) -> dict[str, Any]:
+    """Ollama requires us to specify the max context window.

-    raw_api_key = custom_config.get(OLLAMA_API_KEY_CONFIG_KEY)
-
-    api_key = raw_api_key.strip() if raw_api_key else None
-    if not api_key:
-        return {}
-
-    if not api_key.lower().startswith("bearer "):
-        api_key = f"Bearer {api_key}"
-
-    return {"Authorization": api_key}
+    For now, just using the GEN_AI_MODEL_FALLBACK_MAX_TOKENS value.
+    TODO: allow model-specific values to be configured via the UI.
+    """
+    return {"num_ctx": GEN_AI_MODEL_FALLBACK_MAX_TOKENS} if provider == "ollama" else {}


 def get_main_llm_from_tuple(
@@ -280,16 +272,6 @@ def get_llm(
 ) -> LLM:
    if temperature is None:
        temperature = GEN_AI_TEMPERATURE
-
-    extra_headers = build_llm_extra_headers(additional_headers)
-
-    # NOTE: this is needed since Ollama API key is optional
-    # User may access Ollama cloud via locally hosted instance (logged in)
-    # or just via the cloud API (not logged in, using API key)
-    provider_extra_headers = _build_provider_extra_headers(provider, custom_config)
-    if provider_extra_headers:
-        extra_headers.update(provider_extra_headers)
-
    return DefaultMultiLLM(
        model_provider=provider,
        model_name=model,
@@ -300,8 +282,8 @@ def get_llm(
        timeout=timeout,
        temperature=temperature,
        custom_config=custom_config,
-        extra_headers=extra_headers,
-        model_kwargs={},
+        extra_headers=build_llm_extra_headers(additional_headers),
+        model_kwargs=_build_extra_model_kwargs(provider),
        long_term_logger=long_term_logger,
        max_input_tokens=max_input_tokens,
    )
--- a/backend/onyx/llm/litellm_singleton.py
+++ b/backend/onyx/llm/litellm_singleton.py
@@ -1,23 +0,0 @@
-"""
-Singleton module for litellm configuration.
-This ensures litellm is configured exactly once when first imported.
-All other modules should import litellm from here instead of directly.
-"""
-
-import litellm
-
-from onyx.configs.app_configs import BRAINTRUST_ENABLED
-
-# Import litellm
-
-# Configure litellm settings immediately on import
-# If a user configures a different model and it doesn't support all the same
-# parameters like frequency and presence, just ignore them
-litellm.drop_params = True
-litellm.telemetry = False
-
-if BRAINTRUST_ENABLED:
-    litellm.callbacks = ["braintrust"]
-
-# Export the configured litellm module
-__all__ = ["litellm"]
--- a/backend/onyx/llm/llm_provider_options.py
+++ b/backend/onyx/llm/llm_provider_options.py
@@ -39,7 +39,6 @@ class WellKnownLLMProviderDescriptor(BaseModel):
    model_configurations: list[ModelConfigurationView]
    default_model: str | None = None
    default_fast_model: str | None = None
-    default_api_base: str | None = None
    # set for providers like Azure, which require a deployment name.
    deployment_name_required: bool = False
    # set for providers like Azure, which support a single model per deployment.
@@ -96,9 +95,7 @@ BEDROCK_MODEL_NAMES = [
    for model in list(litellm.bedrock_models.union(litellm.bedrock_converse_models))
    if "/" not in model and "embed" not in model
 ][::-1]
-
-OLLAMA_PROVIDER_NAME = "ollama"
-OLLAMA_API_KEY_CONFIG_KEY = "OLLAMA_API_KEY"
+BEDROCK_DEFAULT_MODEL = "anthropic.claude-3-5-sonnet-20241022-v2:0"

 IGNORABLE_ANTHROPIC_MODELS = [
    "claude-2",
@@ -112,8 +109,8 @@ ANTHROPIC_MODEL_NAMES = [
    if model not in IGNORABLE_ANTHROPIC_MODELS
 ][::-1]
 ANTHROPIC_VISIBLE_MODEL_NAMES = [
-    "claude-sonnet-4-5-20250929",
-    "claude-sonnet-4-20250514",
+    "claude-3-5-sonnet-20241022",
+    "claude-3-7-sonnet-20250219",
 ]

 AZURE_PROVIDER_NAME = "azure"
@@ -163,15 +160,13 @@ _PROVIDER_TO_MODELS_MAP = {
    BEDROCK_PROVIDER_NAME: BEDROCK_MODEL_NAMES,
    ANTHROPIC_PROVIDER_NAME: ANTHROPIC_MODEL_NAMES,
    VERTEXAI_PROVIDER_NAME: VERTEXAI_MODEL_NAMES,
-    OLLAMA_PROVIDER_NAME: [],
 }

 _PROVIDER_TO_VISIBLE_MODELS_MAP = {
    OPENAI_PROVIDER_NAME: OPEN_AI_VISIBLE_MODEL_NAMES,
-    BEDROCK_PROVIDER_NAME: [],
+    BEDROCK_PROVIDER_NAME: [BEDROCK_DEFAULT_MODEL],
    ANTHROPIC_PROVIDER_NAME: ANTHROPIC_VISIBLE_MODEL_NAMES,
    VERTEXAI_PROVIDER_NAME: VERTEXAI_VISIBLE_MODEL_NAMES,
-    OLLAMA_PROVIDER_NAME: [],
 }


@@ -190,28 +185,6 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
            default_model="gpt-4o",
            default_fast_model="gpt-4o-mini",
        ),
-        WellKnownLLMProviderDescriptor(
-            name=OLLAMA_PROVIDER_NAME,
-            display_name="Ollama",
-            api_key_required=False,
-            api_base_required=True,
-            api_version_required=False,
-            custom_config_keys=[
-                CustomConfigKey(
-                    name=OLLAMA_API_KEY_CONFIG_KEY,
-                    display_name="Ollama API Key",
-                    description="Optional API key used when connecting to Ollama Cloud (i.e. API base is https://ollama.com).",
-                    is_required=False,
-                    is_secret=True,
-                )
-            ],
-            model_configurations=fetch_model_configurations_for_provider(
-                OLLAMA_PROVIDER_NAME
-            ),
-            default_model=None,
-            default_fast_model=None,
-            default_api_base="http://127.0.0.1:11434",
-        ),
        WellKnownLLMProviderDescriptor(
            name=ANTHROPIC_PROVIDER_NAME,
            display_name="Anthropic",
@@ -222,8 +195,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
            model_configurations=fetch_model_configurations_for_provider(
                ANTHROPIC_PROVIDER_NAME
            ),
-            default_model="claude-sonnet-4-5-20250929",
-            default_fast_model="claude-sonnet-4-20250514",
+            default_model="claude-3-7-sonnet-20250219",
+            default_fast_model="claude-3-5-sonnet-20241022",
        ),
        WellKnownLLMProviderDescriptor(
            name=AZURE_PROVIDER_NAME,
@@ -275,7 +248,7 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]:
            model_configurations=fetch_model_configurations_for_provider(
                BEDROCK_PROVIDER_NAME
            ),
-            default_model=None,
+            default_model=BEDROCK_DEFAULT_MODEL,
            default_fast_model=None,
        ),
        WellKnownLLMProviderDescriptor(
--- a/backend/onyx/llm/utils.py
+++ b/backend/onyx/llm/utils.py
@@ -16,7 +16,6 @@ from langchain.schema.messages import AIMessage
 from langchain.schema.messages import BaseMessage
 from langchain.schema.messages import HumanMessage
 from langchain.schema.messages import SystemMessage
-from sqlalchemy import select

 from onyx.configs.app_configs import LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS
 from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
@@ -27,9 +26,6 @@ from onyx.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
 from onyx.configs.model_configs import GEN_AI_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
 from onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.models import LLMProvider
-from onyx.db.models import ModelConfiguration
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import InMemoryChatFile
 from onyx.llm.interfaces import LLM
@@ -464,7 +460,6 @@ def get_llm_contextual_cost(
    this does not account for the cost of documents that fit within a single chunk
    which do not get contextualized.
    """
-
    import litellm

    # calculate input costs
@@ -644,30 +639,6 @@ def get_max_input_tokens_from_llm_provider(


 def model_supports_image_input(model_name: str, model_provider: str) -> bool:
-    # TODO: Add support to check model config for any provider
-    # TODO: Circular import means OLLAMA_PROVIDER_NAME is not available here
-
-    if model_provider == "ollama":
-        try:
-            with get_session_with_current_tenant() as db_session:
-                model_config = db_session.scalar(
-                    select(ModelConfiguration)
-                    .join(
-                        LLMProvider,
-                        ModelConfiguration.llm_provider_id == LLMProvider.id,
-                    )
-                    .where(
-                        ModelConfiguration.name == model_name,
-                        LLMProvider.provider == model_provider,
-                    )
-                )
-                if model_config and model_config.supports_image_input is not None:
-                    return model_config.supports_image_input
-        except Exception as e:
-            logger.warning(
-                f"Failed to query database for {model_provider} model {model_name} image support: {e}"
-            )
-
    model_map = get_model_map()
    try:
        model_obj = find_model_obj(
--- a/backend/onyx/prompts/dr_prompts.py
+++ b/backend/onyx/prompts/dr_prompts.py
@@ -714,15 +714,13 @@ information that will be necessary to provide a succinct answer to the specific
 the documents. Again, start out here as well with a brief statement whether the SPECIFIC CONTEXT is \
 mentioned in the documents. (Example: 'I was not able to find information about yellow curry specifically, \
 but I found information about curry...'). But this should be be precise and concise, and specifically \
-answer the question. Please cite the document sources inline in format [[1]][[7]], etc., where it \
-is essential that the document NUMBERS are in the brackets, not any titles.>",
+answer the question. Please cite the document sources inline in format [[1]][[7]], etc.>",
   "claims": "<a list of short claims discussed in the documents as they pertain to the query and/or \
 the original question. These will later be used for follow-up questions and verifications. Note that \
 these may not actually be in the succinct answer above. Note also that each claim \
 should include ONE fact that contains enough context to be verified/questioned by a different system \
 without the need for going back to these documents for additional context. Also here, please cite the \
-document sources inline in format [[1]][[7]], etc., where it is essential that the document NUMBERS are \
-in the brackets, not any titles. So this should have format like \
+document sources inline in format [[1]][[7]], etc.. So this should have format like \
 [<claim 1>, <claim 2>, <claim 3>, ...], each with citations.>"
 }
 """
@@ -1045,9 +1043,8 @@ find information about yellow curry specifically, but here is what I found about
 - do not make anything up! Only use the information provided in the documents, or, \
 if no documents are provided for a sub-answer, in the actual sub-answer.
 - Provide a thoughtful answer that is concise and to the point, but that is detailed.
- Please cite your sources INLINE in format [[2]][[4]], etc! The NUMBERS of the documents \
-are provided above, and the NUMBERS need to be in the brackets. And the appropriate citation \
- should be close to the corresponding /
+- Please cite your sources INLINE in format [[2]][[4]], etc! The numbers of the documents \
+are provided above. So the appropriate citation number should be close to the corresponding /
 information it supports!
 - If you are not that certain that the information does relate to the question topic, \
 point out the ambiguity in your answer. But DO NOT say something like 'I was not able to find \
@@ -1101,16 +1098,14 @@ find information about yellow curry specifically, but here is what I found about
 - do not make anything up! Only use the information provided in the documents, or, \
 if no documents are provided for a sub-answer, in the actual sub-answer.
 - Provide a thoughtful answer that is concise and to the point, but that is detailed.
- Please cite your sources inline in format [[2]][[4]], etc! The NUMBERS of the documents \
-are provided above, and the NUMBERS need to be in the brackets. And the appropriate citation \
-should be close to the corresponding /
+- Please cite your sources inline in format [[2]][[4]], etc! The numbers of the documents \
+are provided above. So the appropriate citation number should be close to the corresponding /
 information it supports!
 - If you are not that certain that the information does relate to the question topic, \
 point out the ambiguity in your answer. But DO NOT say something like 'I was not able to find \
 information on <X> specifically, but here is what I found about <X> generally....'. Rather say, \
 'Here is what I found about <X> and I hope this is the <X> you were looking for...', or similar.
- Again... CITE YOUR SOURCES INLINE IN FORMAT [[2]][[4]], etc! This is CRITICAL! Note that \
-the DOCUMENT NUMBERS need to be in the brackets.
+- Again... CITE YOUR SOURCES INLINE IN FORMAT [[2]][[4]], etc! This is CRITICAL!

 ANSWER:
 """
@@ -1155,9 +1150,8 @@ find information about yellow curry specifically, but here is what I found about
 - do not make anything up! Only use the information provided in the documents, or, \
 if no documents are provided for a sub-answer, in the actual sub-answer.
 - Provide a thoughtful answer that is concise and to the point, but that is detailed.
- THIS IS VERY IMPORTANT: Please cite your sources inline in format [[2]][[4]], etc! \
-The NUMBERS of the documents - provided above -need to be in the brackets. \
-Also, if you refer to sub-answers, the provided reference numbers \
+- THIS IS VERY IMPORTANT: Please cite your sources inline in format [[2]][[4]], etc! The numbers of the documents \
+are provided above. Also, if you refer to sub-answers, the provided reference numbers \
 in the sub-answers are the same as the ones provided for the documents!

 ANSWER:
--- a/backend/onyx/secondary_llm_flows/starter_message_creation.py
+++ b/backend/onyx/secondary_llm_flows/starter_message_creation.py
@@ -3,6 +3,7 @@ from typing import Any
 from typing import cast
 from typing import List

+from litellm import get_supported_openai_params
 from sqlalchemy.orm import Session

 from onyx.configs.chat_configs import NUM_PERSONA_PROMPT_GENERATION_CHUNKS
@@ -122,8 +123,6 @@ def generate_starter_messages(
    """
    _, fast_llm = get_default_llms(temperature=0.5)

-    from litellm.utils import get_supported_openai_params
-
    provider = fast_llm.config.model_provider
    model = fast_llm.config.model_name

--- a/backend/onyx/seeding/load_docs.py
+++ b/backend/onyx/seeding/load_docs.py
@@ -184,7 +184,7 @@ def seed_initial_documents(
            "base_url": "https://docs.onyx.app/",
            "web_connector_type": "recursive",
        },
-        refresh_freq=3600,  # 1 hour
+        refresh_freq=None,  # Never refresh by default
        prune_freq=None,
        indexing_start=None,
    )
--- a/backend/onyx/seeding/user_folders.yaml
+++ b/backend/onyx/seeding/user_folders.yaml
@@ -0,0 +1,6 @@
+user_folders:
+  - id: -1
+    name: "Recent Documents"
+    description: "Documents uploaded by the user"
+    files: []
+    assistants: []
--- a/backend/onyx/server/documents/standard_oauth.py
+++ b/backend/onyx/server/documents/standard_oauth.py
@@ -22,7 +22,7 @@ from onyx.db.models import User
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.documents.models import CredentialBase
 from onyx.utils.logger import setup_logger
-from onyx.utils.subclasses import find_all_subclasses_in_package
+from onyx.utils.subclasses import find_all_subclasses_in_dir
 from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()
@@ -44,8 +44,7 @@ def _discover_oauth_connectors() -> dict[DocumentSource, type[OAuthConnector]]:
    if _OAUTH_CONNECTORS:  # Return cached connectors if already discovered
        return _OAUTH_CONNECTORS

-    # Import submodules using package-based discovery to avoid sys.path mutations
-    oauth_connectors = find_all_subclasses_in_package(
+    oauth_connectors = find_all_subclasses_in_dir(
        cast(type[OAuthConnector], OAuthConnector), "onyx.connectors"
    )

--- a/backend/onyx/server/features/folder/init.py
+++ b/backend/onyx/server/features/folder/init.py
--- a/backend/onyx/server/features/folder/api.py
+++ b/backend/onyx/server/features/folder/api.py
@@ -0,0 +1,177 @@
+from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi import Path
+from sqlalchemy.orm import Session
+
+from onyx.auth.users import current_user
+from onyx.db.chat import get_chat_session_by_id
+from onyx.db.engine.sql_engine import get_session
+from onyx.db.folder import add_chat_to_folder
+from onyx.db.folder import create_folder
+from onyx.db.folder import delete_folder
+from onyx.db.folder import get_user_folders
+from onyx.db.folder import remove_chat_from_folder
+from onyx.db.folder import rename_folder
+from onyx.db.folder import update_folder_display_priority
+from onyx.db.models import User
+from onyx.server.features.folder.models import DeleteFolderOptions
+from onyx.server.features.folder.models import FolderChatSessionRequest
+from onyx.server.features.folder.models import FolderCreationRequest
+from onyx.server.features.folder.models import FolderUpdateRequest
+from onyx.server.features.folder.models import GetUserFoldersResponse
+from onyx.server.features.folder.models import UserFolderSnapshot
+from onyx.server.models import DisplayPriorityRequest
+from onyx.server.query_and_chat.models import ChatSessionDetails
+
+router = APIRouter(prefix="/folder")
+
+
+@router.get("")
+def get_folders(
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> GetUserFoldersResponse:
+    folders = get_user_folders(
+        user_id=user.id if user else None,
+        db_session=db_session,
+    )
+    folders.sort()
+    return GetUserFoldersResponse(
+        folders=[
+            UserFolderSnapshot(
+                folder_id=folder.id,
+                folder_name=folder.name,
+                display_priority=folder.display_priority,
+                chat_sessions=[
+                    ChatSessionDetails(
+                        id=chat_session.id,
+                        name=chat_session.description,
+                        persona_id=chat_session.persona_id,
+                        time_created=chat_session.time_created.isoformat(),
+                        time_updated=chat_session.time_updated.isoformat(),
+                        shared_status=chat_session.shared_status,
+                        folder_id=folder.id,
+                    )
+                    for chat_session in folder.chat_sessions
+                    if not chat_session.deleted
+                ],
+            )
+            for folder in folders
+        ]
+    )
+
+
+@router.put("/reorder")
+def put_folder_display_priority(
+    display_priority_request: DisplayPriorityRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    update_folder_display_priority(
+        user_id=user.id if user else None,
+        display_priority_map=display_priority_request.display_priority_map,
+        db_session=db_session,
+    )
+
+
+@router.post("")
+def create_folder_endpoint(
+    request: FolderCreationRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> int:
+    return create_folder(
+        user_id=user.id if user else None,
+        folder_name=request.folder_name,
+        db_session=db_session,
+    )
+
+
+@router.patch("/{folder_id}")
+def patch_folder_endpoint(
+    request: FolderUpdateRequest,
+    folder_id: int = Path(..., description="The ID of the folder to rename"),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    try:
+        rename_folder(
+            user_id=user.id if user else None,
+            folder_id=folder_id,
+            folder_name=request.folder_name,
+            db_session=db_session,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.delete("/{folder_id}")
+def delete_folder_endpoint(
+    request: DeleteFolderOptions,
+    folder_id: int = Path(..., description="The ID of the folder to delete"),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    user_id = user.id if user else None
+    try:
+        delete_folder(
+            user_id=user_id,
+            folder_id=folder_id,
+            including_chats=request.including_chats,
+            db_session=db_session,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.post("/{folder_id}/add-chat-session")
+def add_chat_to_folder_endpoint(
+    request: FolderChatSessionRequest,
+    folder_id: int = Path(
+        ..., description="The ID of the folder in which to add the chat session"
+    ),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    user_id = user.id if user else None
+    try:
+        chat_session = get_chat_session_by_id(
+            chat_session_id=request.chat_session_id,
+            user_id=user_id,
+            db_session=db_session,
+        )
+        add_chat_to_folder(
+            user_id=user.id if user else None,
+            folder_id=folder_id,
+            chat_session=chat_session,
+            db_session=db_session,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.post("/{folder_id}/remove-chat-session")
+def remove_chat_from_folder_endpoint(
+    request: FolderChatSessionRequest,
+    folder_id: int = Path(
+        ..., description="The ID of the folder from which to remove the chat session"
+    ),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> None:
+    user_id = user.id if user else None
+    try:
+        chat_session = get_chat_session_by_id(
+            chat_session_id=request.chat_session_id,
+            user_id=user_id,
+            db_session=db_session,
+        )
+        remove_chat_from_folder(
+            user_id=user_id,
+            folder_id=folder_id,
+            chat_session=chat_session,
+            db_session=db_session,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
--- a/backend/onyx/server/features/folder/models.py
+++ b/backend/onyx/server/features/folder/models.py
@@ -0,0 +1,32 @@
+from uuid import UUID
+
+from pydantic import BaseModel
+
+from onyx.server.query_and_chat.models import ChatSessionDetails
+
+
+class UserFolderSnapshot(BaseModel):
+    folder_id: int
+    folder_name: str | None
+    display_priority: int
+    chat_sessions: list[ChatSessionDetails]
+
+
+class GetUserFoldersResponse(BaseModel):
+    folders: list[UserFolderSnapshot]
+
+
+class FolderCreationRequest(BaseModel):
+    folder_name: str | None = None
+
+
+class FolderUpdateRequest(BaseModel):
+    folder_name: str | None = None
+
+
+class FolderChatSessionRequest(BaseModel):
+    chat_session_id: UUID
+
+
+class DeleteFolderOptions(BaseModel):
+    including_chats: bool = False
--- a/backend/onyx/server/features/mcp/api.py
+++ b/backend/onyx/server/features/mcp/api.py
--- a/backend/onyx/server/features/mcp/models.py
+++ b/backend/onyx/server/features/mcp/models.py
@@ -1,5 +1,3 @@
-from enum import Enum
-from typing import Any
 from typing import List
 from typing import NotRequired
 from typing import Optional
@@ -12,35 +10,20 @@ from pydantic import model_validator

 from onyx.db.enums import MCPAuthenticationPerformer
 from onyx.db.enums import MCPAuthenticationType
-from onyx.db.enums import MCPTransport
-
-
-# This should be updated along with MCPConnectionData
-class MCPOAuthKeys(str, Enum):
-    """MCP OAuth keys types"""
-
-    CLIENT_INFO = "client_info"
-    TOKENS = "tokens"
-    METADATA = "metadata"


 class MCPConnectionData(TypedDict):
    """TypedDict to allow use as a type hint for a JSONB column
    in Postgres"""

+    refresh_token: NotRequired[str]
+    access_token: NotRequired[str]
    headers: dict[str, str]
    header_substitutions: NotRequired[dict[str, str]]
-
-    # For OAuth only
-    # Note: Update MCPOAuthKeys if necessary when modifying these
-    # Unfortunately we can't use the actual models here because basemodels aren't compatible
-    # with SQLAlchemy
-    client_info: NotRequired[dict[str, Any]]  # OAuthClientInformationFull
-    tokens: NotRequired[dict[str, Any]]  # OAuthToken
-    metadata: NotRequired[dict[str, Any]]  # OAuthClientMetadata
-
-    # the actual models are defined in mcp.shared.auth
-    # from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken
+    client_id: NotRequired[str]
+    client_secret: NotRequired[str]
+    registration_access_token: NotRequired[str]
+    registration_client_uri: NotRequired[str]


 class MCPAuthTemplate(BaseModel):
@@ -65,17 +48,14 @@ class MCPToolCreateRequest(BaseModel):
    description: Optional[str] = Field(None, description="Description of the MCP tool")
    server_url: str = Field(..., description="URL of the MCP server")
    auth_type: MCPAuthenticationType = Field(..., description="Authentication type")
-    auth_performer: MCPAuthenticationPerformer = Field(
-        ..., description="Who performs authentication"
+    auth_performer: Optional[MCPAuthenticationPerformer] = Field(
+        None, description="Who performs authentication"
    )
    api_token: Optional[str] = Field(
        None, description="API token for api_token auth type"
    )
    oauth_client_id: Optional[str] = Field(None, description="OAuth client ID")
    oauth_client_secret: Optional[str] = Field(None, description="OAuth client secret")
-    transport: MCPTransport | None = Field(
-        None, description="MCP transport type (STREAMABLE_HTTP or SSE)"
-    )
    auth_template: Optional[MCPAuthTemplate] = Field(
        None, description="Template configuration for per-user authentication"
    )
@@ -124,9 +104,15 @@ class MCPToolCreateRequest(BaseModel):
                    "admin_credentials is required when auth_performer is 'per_user'"
                )

-        # OAuth client ID/secret are optional. If provided, they will seed the
-        # OAuth client info; otherwise, the MCP client will attempt dynamic
-        # client registration.
+        if self.auth_type == MCPAuthenticationType.OAUTH and not self.oauth_client_id:
+            raise ValueError("oauth_client_id is required when auth_type is 'oauth'")
+        if (
+            self.auth_type == MCPAuthenticationType.OAUTH
+            and not self.oauth_client_secret
+        ):
+            raise ValueError(
+                "oauth_client_secret is required when auth_type is 'oauth'"
+            )

        return self

@@ -154,7 +140,7 @@ class MCPToolResponse(BaseModel):
    is_authenticated: bool


-class MCPOAuthConnectRequest(BaseModel):
+class MCPOAuthInitiateRequest(BaseModel):
    name: str = Field(..., description="Name of the MCP tool")
    description: Optional[str] = Field(None, description="Description of the MCP tool")
    server_url: str = Field(..., description="URL of the MCP server")
@@ -166,33 +152,32 @@ class MCPOAuthConnectRequest(BaseModel):
    )


-class MCPOAuthConnectResponse(BaseModel):
+class MCPOAuthInitiateResponse(BaseModel):
    oauth_url: str = Field(..., description="OAuth URL to redirect user to")
    state: str = Field(..., description="OAuth state parameter")
    pending_tool: dict = Field(..., description="Pending tool configuration")


-class MCPUserOAuthConnectRequest(BaseModel):
+class MCPUserOAuthInitiateRequest(BaseModel):
    server_id: int = Field(..., description="ID of the MCP server")
    return_path: str = Field(..., description="Path to redirect to after callback")
    include_resource_param: bool = Field(..., description="Include resource parameter")
-    oauth_client_id: str | None = Field(
-        None, description="OAuth client ID (optional for DCR)"
-    )
-    oauth_client_secret: str | None = Field(
-        None, description="OAuth client secret (optional for DCR)"
-    )

    @model_validator(mode="after")
-    def validate_return_path(self) -> "MCPUserOAuthConnectRequest":
+    def validate_return_path(self) -> "MCPUserOAuthInitiateRequest":
        if not self.return_path.startswith("/"):
            raise ValueError("return_path must start with a slash")
        return self


-class MCPUserOAuthConnectResponse(BaseModel):
-    server_id: int
+class MCPUserOAuthInitiateResponse(BaseModel):
    oauth_url: str = Field(..., description="OAuth URL to redirect user to")
+    state: str = Field(..., description="OAuth state parameter")
+    server_id: int = Field(..., description="Server ID")
+    server_name: str = Field(..., description="Server name")
+    code_verifier: Optional[str] = Field(
+        None, description="PKCE code verifier to be used at callback"
+    )


 class MCPOAuthCallbackRequest(BaseModel):
@@ -209,6 +194,7 @@ class MCPOAuthCallbackResponse(BaseModel):
    message: str
    server_id: int
    server_name: str
+    authenticated: bool
    redirect_url: str


@@ -269,7 +255,6 @@ class MCPServer(BaseModel):
    name: str
    description: Optional[str] = None
    server_url: str
-    transport: MCPTransport
    auth_type: MCPAuthenticationType
    auth_performer: MCPAuthenticationPerformer
    is_authenticated: bool
--- a/backend/onyx/server/features/projects/api.py
+++ b/backend/onyx/server/features/projects/api.py
@@ -162,7 +162,7 @@ def unlink_user_file_from_project(
        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
        kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
        queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
-        priority=OnyxCeleryPriority.HIGHEST,
+        priority=OnyxCeleryPriority.HIGH,
    )
    logger.info(
        f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"
@@ -210,7 +210,7 @@ def link_user_file_to_project(
        OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
        kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
        queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
-        priority=OnyxCeleryPriority.HIGHEST,
+        priority=OnyxCeleryPriority.HIGH,
    )
    logger.info(
        f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -4,7 +4,6 @@ from datetime import datetime
 from datetime import timezone

 import boto3
-import httpx
 from botocore.exceptions import BotoCoreError
 from botocore.exceptions import ClientError
 from botocore.exceptions import NoCredentialsError
@@ -12,12 +11,10 @@ from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
 from fastapi import Query
-from pydantic import ValidationError
 from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_chat_accessible_user
-from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.llm import fetch_existing_llm_provider
 from onyx.db.llm import fetch_existing_llm_providers
@@ -43,9 +40,6 @@ from onyx.server.manage.llm.models import LLMProviderDescriptor
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import LLMProviderView
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
-from onyx.server.manage.llm.models import OllamaFinalModelResponse
-from onyx.server.manage.llm.models import OllamaModelDetails
-from onyx.server.manage.llm.models import OllamaModelsRequest
 from onyx.server.manage.llm.models import TestLLMRequest
 from onyx.server.manage.llm.models import VisionProviderResponse
 from onyx.utils.logger import setup_logger
@@ -480,100 +474,3 @@ def get_bedrock_available_models(
        raise HTTPException(
            status_code=500, detail=f"Unexpected error fetching Bedrock models: {e}"
        )
-
-
-def _get_ollama_available_model_names(api_base: str) -> set[str]:
-    """Fetch available model names from Ollama server."""
-    tags_url = f"{api_base}/api/tags"
-    try:
-        response = httpx.get(tags_url, timeout=5.0)
-        response.raise_for_status()
-        response_json = response.json()
-    except Exception as e:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Failed to fetch Ollama models: {e}",
-        )
-
-    models = response_json.get("models", [])
-    return {model.get("name") for model in models if model.get("name")}
-
-
-@admin_router.post("/ollama/available-models")
-def get_ollama_available_models(
-    request: OllamaModelsRequest,
-    _: User | None = Depends(current_admin_user),
-) -> list[OllamaFinalModelResponse]:
-    """Fetch the list of available models from an Ollama server."""
-
-    cleaned_api_base = request.api_base.strip().rstrip("/")
-    if not cleaned_api_base:
-        raise HTTPException(
-            status_code=400, detail="API base URL is required to fetch Ollama models."
-        )
-
-    model_names = _get_ollama_available_model_names(cleaned_api_base)
-    if not model_names:
-        raise HTTPException(
-            status_code=400,
-            detail="No models found from your Ollama server",
-        )
-
-    all_models_with_context_size_and_vision: list[OllamaFinalModelResponse] = []
-    show_url = f"{cleaned_api_base}/api/show"
-
-    for model_name in model_names:
-        context_limit: int | None = None
-        supports_image_input: bool | None = None
-        try:
-            show_response = httpx.post(
-                show_url,
-                json={"model": model_name},
-                timeout=5.0,
-            )
-            show_response.raise_for_status()
-            show_response_json = show_response.json()
-
-            # Parse the response into the expected format
-            ollama_model_details = OllamaModelDetails.model_validate(show_response_json)
-
-            # Check if this model supports completion/chat
-            if not ollama_model_details.supports_completion():
-                continue
-
-            # Optimistically access. Context limit is stored as "model_architecture.context" = int
-            architecture = ollama_model_details.model_info.get(
-                "general.architecture", ""
-            )
-            context_limit = ollama_model_details.model_info.get(
-                architecture + ".context_length", None
-            )
-            supports_image_input = ollama_model_details.supports_image_input()
-        except ValidationError as e:
-            logger.warning(
-                "Invalid model details from Ollama server",
-                extra={"model": model_name, "validation_error": str(e)},
-            )
-        except Exception as e:
-            logger.warning(
-                "Failed to fetch Ollama model details",
-                extra={"model": model_name, "error": str(e)},
-            )
-
-        # If we fail at any point attempting to extract context limit,
-        # still allow this model to be used with a fallback max context size
-        if not context_limit:
-            context_limit = GEN_AI_MODEL_FALLBACK_MAX_TOKENS
-
-        if not supports_image_input:
-            supports_image_input = False
-
-        all_models_with_context_size_and_vision.append(
-            OllamaFinalModelResponse(
-                name=model_name,
-                max_input_tokens=context_limit,
-                supports_image_input=supports_image_input,
-            )
-        )
-
-    return all_models_with_context_size_and_vision
--- a/backend/onyx/server/manage/llm/models.py
+++ b/backend/onyx/server/manage/llm/models.py
@@ -1,4 +1,3 @@
-from typing import Any
 from typing import TYPE_CHECKING

 from pydantic import BaseModel
@@ -139,9 +138,8 @@ class LLMProviderView(LLMProvider):

 class ModelConfigurationUpsertRequest(BaseModel):
    name: str
-    is_visible: bool
+    is_visible: bool | None = False
    max_input_tokens: int | None = None
-    supports_image_input: bool | None = None

    @classmethod
    def from_model(
@@ -151,13 +149,12 @@ class ModelConfigurationUpsertRequest(BaseModel):
            name=model_configuration_model.name,
            is_visible=model_configuration_model.is_visible,
            max_input_tokens=model_configuration_model.max_input_tokens,
-            supports_image_input=model_configuration_model.supports_image_input,
        )


 class ModelConfigurationView(BaseModel):
    name: str
-    is_visible: bool
+    is_visible: bool | None = False
    max_input_tokens: int | None = None
    supports_image_input: bool

@@ -199,28 +196,3 @@ class BedrockModelsRequest(BaseModel):
    aws_secret_access_key: str | None = None
    aws_bearer_token_bedrock: str | None = None
    provider_name: str | None = None  # Optional: to save models to existing provider
-
-
-class OllamaModelsRequest(BaseModel):
-    api_base: str
-
-
-class OllamaFinalModelResponse(BaseModel):
-    name: str
-    max_input_tokens: int
-    supports_image_input: bool
-
-
-class OllamaModelDetails(BaseModel):
-    """Response model for Ollama /api/show endpoint"""
-
-    model_info: dict[str, Any]
-    capabilities: list[str] = []
-
-    def supports_completion(self) -> bool:
-        """Check if this model supports completion/chat"""
-        return "completion" in self.capabilities
-
-    def supports_image_input(self) -> bool:
-        """Check if this model supports image input"""
-        return "vision" in self.capabilities
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -1,5 +1,3 @@
-import csv
-import io
 import re
 from datetime import datetime
 from datetime import timedelta
@@ -16,7 +14,6 @@ from fastapi import Depends
 from fastapi import HTTPException
 from fastapi import Query
 from fastapi import Request
-from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from sqlalchemy.orm import Session

@@ -300,43 +297,6 @@ def list_all_users(
    )


-@router.get("/manage/users/download")
-def download_users_csv(
-    _: User | None = Depends(current_admin_user),
-    db_session: Session = Depends(get_session),
-) -> StreamingResponse:
-    """Download all users as a CSV file."""
-    # Get all users from the database
-    users = get_all_users(db_session)
-
-    # Create CSV content in memory
-    output = io.StringIO()
-    writer = csv.writer(output)
-
-    # Write CSV header
-    writer.writerow(["Email", "Role", "Status"])
-
-    # Write user data
-    for user in users:
-        writer.writerow(
-            [
-                user.email,
-                user.role.value if user.role else "",
-                "Active" if user.is_active else "Inactive",
-            ]
-        )
-
-    # Prepare the CSV content for download
-    csv_content = output.getvalue()
-    output.close()
-
-    return StreamingResponse(
-        io.BytesIO(csv_content.encode("utf-8")),
-        media_type="text/csv",
-        headers={"Content-Disposition": "attachment;"},
-    )
-
-
@router.put("/manage/admin/users")
 def bulk_invite_users(
    emails: list[str] = Body(..., embed=True),
--- a/backend/onyx/server/user_documents/api.py
+++ b/backend/onyx/server/user_documents/api.py
@@ -0,0 +1,596 @@
+import io
+import time
+from datetime import datetime
+from datetime import timedelta
+from typing import List
+
+import requests
+import sqlalchemy.exc
+from bs4 import BeautifulSoup
+from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import File
+from fastapi import Form
+from fastapi import HTTPException
+from fastapi import Query
+from fastapi import UploadFile
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from onyx.auth.users import current_user
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.models import InputType
+from onyx.db.connector import create_connector
+from onyx.db.connector_credential_pair import add_credential_to_connector
+from onyx.db.credentials import create_credential
+from onyx.db.engine.sql_engine import get_session
+from onyx.db.enums import AccessType
+from onyx.db.enums import ConnectorCredentialPairStatus
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.models import User
+from onyx.db.models import UserFile
+from onyx.db.models import UserFolder
+from onyx.db.user_documents import calculate_user_files_token_count
+from onyx.db.user_documents import create_user_files
+from onyx.db.user_documents import get_user_file_indexing_status
+from onyx.db.user_documents import share_file_with_assistant
+from onyx.db.user_documents import share_folder_with_assistant
+from onyx.db.user_documents import unshare_file_with_assistant
+from onyx.db.user_documents import unshare_folder_with_assistant
+from onyx.db.user_documents import upload_files_to_user_files_with_indexing
+from onyx.file_processing.html_utils import web_html_cleanup
+from onyx.server.documents.connector import trigger_indexing_for_cc_pair
+from onyx.server.documents.models import ConnectorBase
+from onyx.server.documents.models import CredentialBase
+from onyx.server.query_and_chat.chat_backend import RECENT_DOCS_FOLDER_ID
+from onyx.server.user_documents.models import MessageResponse
+from onyx.server.user_documents.models import UserFileSnapshot
+from onyx.server.user_documents.models import UserFolderSnapshot
+from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+router = APIRouter()
+
+
+class FolderCreationRequest(BaseModel):
+    name: str
+    description: str
+
+
+@router.post("/user/folder")
+def create_folder(
+    request: FolderCreationRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> UserFolderSnapshot:
+    try:
+        new_folder = UserFolder(
+            user_id=user.id if user else None,
+            name=request.name,
+            description=request.description,
+        )
+        db_session.add(new_folder)
+        db_session.commit()
+        return UserFolderSnapshot.from_model(new_folder)
+    except sqlalchemy.exc.DataError as e:
+        if "StringDataRightTruncation" in str(e):
+            raise HTTPException(
+                status_code=400,
+                detail="Folder name or description is too long. Please use a shorter name or description.",
+            )
+        raise
+
+
+@router.get(
+    "/user/folder",
+)
+def user_get_folders(
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> list[UserFolderSnapshot]:
+    user_id = user.id if user else None
+    # Get folders that belong to the user or have the RECENT_DOCS_FOLDER_ID
+    folders = (
+        db_session.query(UserFolder)
+        .filter(
+            (UserFolder.user_id == user_id) | (UserFolder.id == RECENT_DOCS_FOLDER_ID)
+        )
+        .all()
+    )
+
+    # For each folder, filter files to only include those belonging to the current user
+    result = []
+    for folder in folders:
+        folder_snapshot = UserFolderSnapshot.from_model(folder)
+        folder_snapshot.files = [
+            file for file in folder_snapshot.files if file.user_id == user_id
+        ]
+        result.append(folder_snapshot)
+
+    return result
+
+
+@router.get("/user/folder/{folder_id}")
+def get_folder(
+    folder_id: int,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> UserFolderSnapshot:
+    user_id = user.id if user else None
+    folder = (
+        db_session.query(UserFolder)
+        .filter(
+            UserFolder.id == folder_id,
+            (
+                (UserFolder.user_id == user_id)
+                | (UserFolder.id == RECENT_DOCS_FOLDER_ID)
+            ),
+        )
+        .first()
+    )
+    if not folder:
+        raise HTTPException(status_code=404, detail="Folder not found")
+
+    folder_snapshot = UserFolderSnapshot.from_model(folder)
+    # Filter files to only include those belonging to the current user
+    folder_snapshot.files = [
+        file for file in folder_snapshot.files if file.user_id == user_id
+    ]
+
+    return folder_snapshot
+
+
+@router.post("/user/file/upload")
+def upload_user_files(
+    files: List[UploadFile] = File(...),
+    folder_id: int | None = Form(None),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> list[UserFileSnapshot]:
+    if folder_id == 0:
+        folder_id = None
+
+    try:
+        # Use our consolidated function that handles indexing properly
+        user_files = upload_files_to_user_files_with_indexing(
+            files, folder_id or RECENT_DOCS_FOLDER_ID, user, db_session
+        )
+
+        return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
+
+    except Exception as e:
+        logger.error(f"Error uploading files: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}")
+
+
+class FolderUpdateRequest(BaseModel):
+    name: str | None = None
+    description: str | None = None
+
+
+@router.put("/user/folder/{folder_id}")
+def update_folder(
+    folder_id: int,
+    request: FolderUpdateRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> UserFolderSnapshot:
+    user_id = user.id if user else None
+    folder = (
+        db_session.query(UserFolder)
+        .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
+        .first()
+    )
+    if not folder:
+        raise HTTPException(status_code=404, detail="Folder not found")
+    if request.name:
+        folder.name = request.name
+    if request.description:
+        folder.description = request.description
+    db_session.commit()
+
+    return UserFolderSnapshot.from_model(folder)
+
+
+@router.delete("/user/folder/{folder_id}")
+def delete_folder(
+    folder_id: int,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    folder = (
+        db_session.query(UserFolder)
+        .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
+        .first()
+    )
+    if not folder:
+        raise HTTPException(status_code=404, detail="Folder not found")
+    db_session.delete(folder)
+    db_session.commit()
+    return MessageResponse(message="Folder deleted successfully")
+
+
+@router.delete("/user/file/{file_id}")
+def delete_file(
+    file_id: int,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    file = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
+        .first()
+    )
+    if not file:
+        raise HTTPException(status_code=404, detail="File not found")
+    db_session.delete(file)
+    db_session.commit()
+    return MessageResponse(message="File deleted successfully")
+
+
+class FileMoveRequest(BaseModel):
+    new_folder_id: int | None
+
+
+@router.put("/user/file/{file_id}/move")
+def move_file(
+    file_id: int,
+    request: FileMoveRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> UserFileSnapshot:
+    user_id = user.id if user else None
+    file = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
+        .first()
+    )
+    if not file:
+        raise HTTPException(status_code=404, detail="File not found")
+    file.folder_id = request.new_folder_id
+    db_session.commit()
+    return UserFileSnapshot.from_model(file)
+
+
+@router.get("/user/file-system")
+def get_file_system(
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> list[UserFolderSnapshot]:
+    user_id = user.id if user else None
+    folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all()
+    return [UserFolderSnapshot.from_model(folder) for folder in folders]
+
+
+@router.put("/user/file/{file_id}/rename")
+def rename_file(
+    file_id: int,
+    name: str,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> UserFileSnapshot:
+    user_id = user.id if user else None
+    file = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
+        .first()
+    )
+    if not file:
+        raise HTTPException(status_code=404, detail="File not found")
+    file.name = name
+    db_session.commit()
+    return UserFileSnapshot.from_model(file)
+
+
+class ShareRequest(BaseModel):
+    assistant_id: int
+
+
+@router.post("/user/file/{file_id}/share")
+def share_file(
+    file_id: int,
+    request: ShareRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    file = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
+        .first()
+    )
+    if not file:
+        raise HTTPException(status_code=404, detail="File not found")
+
+    share_file_with_assistant(file_id, request.assistant_id, db_session)
+    return MessageResponse(message="File shared successfully with the assistant")
+
+
+@router.post("/user/file/{file_id}/unshare")
+def unshare_file(
+    file_id: int,
+    request: ShareRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    file = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == file_id, UserFile.user_id == user_id)
+        .first()
+    )
+    if not file:
+        raise HTTPException(status_code=404, detail="File not found")
+
+    unshare_file_with_assistant(file_id, request.assistant_id, db_session)
+    return MessageResponse(message="File unshared successfully from the assistant")
+
+
+@router.post("/user/folder/{folder_id}/share")
+def share_folder(
+    folder_id: int,
+    request: ShareRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    folder = (
+        db_session.query(UserFolder)
+        .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
+        .first()
+    )
+    if not folder:
+        raise HTTPException(status_code=404, detail="Folder not found")
+
+    share_folder_with_assistant(folder_id, request.assistant_id, db_session)
+    return MessageResponse(
+        message="Folder and its files shared successfully with the assistant"
+    )
+
+
+@router.post("/user/folder/{folder_id}/unshare")
+def unshare_folder(
+    folder_id: int,
+    request: ShareRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    folder = (
+        db_session.query(UserFolder)
+        .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id)
+        .first()
+    )
+    if not folder:
+        raise HTTPException(status_code=404, detail="Folder not found")
+
+    unshare_folder_with_assistant(folder_id, request.assistant_id, db_session)
+    return MessageResponse(
+        message="Folder and its files unshared successfully from the assistant"
+    )
+
+
+class CreateFileFromLinkRequest(BaseModel):
+    url: str
+    folder_id: int | None
+
+
+@router.post("/user/file/create-from-link")
+def create_file_from_link(
+    request: CreateFileFromLinkRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> list[UserFileSnapshot]:
+    try:
+        response = requests.get(request.url)
+        response.raise_for_status()
+        content = response.text
+        soup = BeautifulSoup(content, "html.parser")
+        parsed_html = web_html_cleanup(soup, mintlify_cleanup_enabled=False)
+
+        file_name = f"{parsed_html.title or 'Untitled'}.txt"
+        file_content = parsed_html.cleaned_text.encode()
+
+        file = UploadFile(filename=file_name, file=io.BytesIO(file_content))
+        user_files = create_user_files(
+            [file], request.folder_id or -1, user, db_session, link_url=request.url
+        )
+
+        # Create connector and credential (same as in upload_user_files)
+        for user_file in user_files:
+            connector_base = ConnectorBase(
+                name=f"UserFile-{user_file.file_id}-{int(time.time())}",
+                source=DocumentSource.FILE,
+                input_type=InputType.LOAD_STATE,
+                connector_specific_config={
+                    "file_locations": [user_file.file_id],
+                    "file_names": [user_file.name],
+                    "zip_metadata": {},
+                },
+                refresh_freq=None,
+                prune_freq=None,
+                indexing_start=None,
+            )
+
+            connector = create_connector(
+                db_session=db_session,
+                connector_data=connector_base,
+            )
+
+            credential_info = CredentialBase(
+                credential_json={},
+                admin_public=True,
+                source=DocumentSource.FILE,
+                curator_public=True,
+                groups=[],
+                name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}",
+            )
+            credential = create_credential(credential_info, user, db_session)
+
+            cc_pair = add_credential_to_connector(
+                db_session=db_session,
+                user=user,
+                connector_id=connector.id,
+                credential_id=credential.id,
+                cc_pair_name=f"UserFileCCPair-{int(time.time())}",
+                access_type=AccessType.PRIVATE,
+                auto_sync_options=None,
+                groups=[],
+                is_user_file=True,
+            )
+            user_file.cc_pair_id = cc_pair.data
+            db_session.commit()
+
+            # Trigger immediate indexing with highest priority
+            tenant_id = get_current_tenant_id()
+            trigger_indexing_for_cc_pair(
+                [], connector.id, False, tenant_id, db_session, is_user_file=True
+            )
+
+        db_session.commit()
+        return [UserFileSnapshot.from_model(user_file) for user_file in user_files]
+    except requests.RequestException as e:
+        raise HTTPException(status_code=400, detail=f"Failed to fetch URL: {str(e)}")
+
+
+@router.get("/user/file/indexing-status")
+def get_files_indexing_status(
+    file_ids: list[int] = Query(...),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> dict[int, bool]:
+    """Get indexing status for multiple files"""
+    return get_user_file_indexing_status(file_ids, db_session)
+
+
+@router.get("/user/file/token-estimate")
+def get_files_token_estimate(
+    file_ids: list[int] = Query([]),
+    folder_ids: list[int] = Query([]),
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> dict:
+    """Get token estimate for files and folders"""
+    total_tokens = calculate_user_files_token_count(file_ids, folder_ids, db_session)
+    return {"total_tokens": total_tokens}
+
+
+class ReindexFileRequest(BaseModel):
+    file_id: int
+
+
+@router.post("/user/file/reindex")
+def reindex_file(
+    request: ReindexFileRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    user_id = user.id if user else None
+    user_file_to_reindex = (
+        db_session.query(UserFile)
+        .filter(UserFile.id == request.file_id, UserFile.user_id == user_id)
+        .first()
+    )
+
+    if not user_file_to_reindex:
+        raise HTTPException(status_code=404, detail="File not found")
+
+    if not user_file_to_reindex.cc_pair_id:
+        raise HTTPException(
+            status_code=400,
+            detail="File does not have an associated connector-credential pair",
+        )
+
+    # Get the connector id from the cc_pair
+    cc_pair = (
+        db_session.query(ConnectorCredentialPair)
+        .filter_by(id=user_file_to_reindex.cc_pair_id)
+        .first()
+    )
+    if not cc_pair:
+        raise HTTPException(
+            status_code=404, detail="Associated connector-credential pair not found"
+        )
+
+    # Trigger immediate reindexing with highest priority
+    tenant_id = get_current_tenant_id()
+    # Update the cc_pair status to ACTIVE to ensure it's processed
+    cc_pair.status = ConnectorCredentialPairStatus.ACTIVE
+    db_session.commit()
+    try:
+        trigger_indexing_for_cc_pair(
+            [], cc_pair.connector_id, True, tenant_id, db_session, is_user_file=True
+        )
+        return MessageResponse(
+            message="File reindexing has been triggered successfully"
+        )
+    except Exception as e:
+        logger.error(
+            f"Error triggering reindexing for file {request.file_id}: {str(e)}"
+        )
+        raise HTTPException(
+            status_code=500, detail=f"Failed to trigger reindexing: {str(e)}"
+        )
+
+
+class BulkCleanupRequest(BaseModel):
+    folder_id: int
+    days_older_than: int | None = None
+
+
+@router.post("/user/file/bulk-cleanup")
+def bulk_cleanup_files(
+    request: BulkCleanupRequest,
+    user: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> MessageResponse:
+    """Bulk delete files older than specified days in a folder"""
+    user_id = user.id if user else None
+
+    logger.info(
+        f"Bulk cleanup request: folder_id={request.folder_id}, days_older_than={request.days_older_than}"
+    )
+
+    # Check if folder exists
+    if request.folder_id != RECENT_DOCS_FOLDER_ID:
+        folder = (
+            db_session.query(UserFolder)
+            .filter(UserFolder.id == request.folder_id, UserFolder.user_id == user_id)
+            .first()
+        )
+        if not folder:
+            raise HTTPException(status_code=404, detail="Folder not found")
+
+    filter_criteria = [UserFile.user_id == user_id]
+
+    # Filter by folder
+    if request.folder_id != -2:  # -2 means all folders
+        filter_criteria.append(UserFile.folder_id == request.folder_id)
+
+    # Filter by date if days_older_than is provided
+    if request.days_older_than is not None:
+        cutoff_date = datetime.utcnow() - timedelta(days=request.days_older_than)
+        logger.info(f"Filtering files older than {cutoff_date} (UTC)")
+        filter_criteria.append(UserFile.created_at < cutoff_date)
+
+    # Get all files matching the criteria
+    files_to_delete = db_session.query(UserFile).filter(*filter_criteria).all()
+
+    logger.info(f"Found {len(files_to_delete)} files to delete")
+
+    # Delete files
+    delete_count = 0
+    for file in files_to_delete:
+        logger.debug(
+            f"Deleting file: id={file.id}, name={file.name}, created_at={file.created_at}"
+        )
+        db_session.delete(file)
+        delete_count += 1
+
+    db_session.commit()
+
+    return MessageResponse(message=f"Successfully deleted {delete_count} files")
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Raunak Bhagat	68e19383b9	Edit props passed to ActionToggle	2025-09-28 12:04:53 -07:00
Raunak Bhagat	f01f3d6e77	Add back assistants selector	2025-09-28 12:00:25 -07:00
Raunak Bhagat	42b57bf804	Add back FilePicker and DeepResearchToggle	2025-09-28 09:50:21 -07:00
Raunak Bhagat	d4b696d17d	Clean up some more code	2025-09-27 23:34:13 -07:00
Raunak Bhagat	84b774f376	Edit ChatInputBar and fix build errors	2025-09-27 23:27:47 -07:00
Raunak Bhagat	921f90d013	Fix all build errors	2025-09-27 21:50:53 -07:00
Raunak Bhagat	f6c09572eb	Finish more integrations of Projects into Refresh	2025-09-27 21:34:11 -07:00
Raunak Bhagat	b2f90db0b8	Merge in other files from main	2025-09-26 17:10:55 -07:00
Raunak Bhagat	ee7b33b382	Merge main in	2025-09-26 17:09:50 -07:00
Raunak Bhagat	db37c35030	Update imports	2025-09-26 12:11:08 -07:00
Raunak Bhagat	4f105d002c	Finish refreshing HumanMessage	2025-09-26 11:01:42 -07:00
Raunak Bhagat	7e9095b976	Whole bunch of refactors	2025-09-25 22:58:33 -07:00
Raunak Bhagat	bce6741ee6	Fix build errors	2025-09-25 22:27:30 -07:00
Raunak Bhagat	b5a188ee5e	Add new utility components	2025-09-25 22:22:34 -07:00
Raunak Bhagat	8578d89a24	Update MessageSwitcher	2025-09-25 22:20:27 -07:00
Raunak Bhagat	e42840775a	Add tooltip to IconButton	2025-09-25 22:18:51 -07:00
Raunak Bhagat	c907971d5b	Saving changes	2025-09-25 22:17:56 -07:00
Raunak Bhagat	44a565bbfc	Add new icons	2025-09-25 22:13:18 -07:00
Raunak Bhagat	ea4219806d	Edit main page	2025-09-25 15:08:49 -07:00
Raunak Bhagat	ccc76413c6	Fix build errors	2025-09-25 13:01:41 -07:00
Raunak Bhagat	12a2786ff6	Fix build errors	2025-09-25 12:53:12 -07:00
Raunak Bhagat	882c294e74	saving changes	2025-09-25 10:57:55 -07:00
Raunak Bhagat	0e2b6cf193	Remove more unused files	2025-09-24 19:54:52 -07:00
Raunak Bhagat	c7ae8bd783	Remove unused files	2025-09-24 19:51:19 -07:00
Raunak Bhagat	3b10dd4b22	Edit LLMPopover + dependent components	2025-09-24 19:33:00 -07:00
Raunak Bhagat	6ea886fc85	Add new SelectButton	2025-09-24 17:59:40 -07:00
Raunak Bhagat	f8c89fc750	Fix up ChatInputBar + lots of cleanup	2025-09-24 17:05:02 -07:00
Raunak Bhagat	cace80ffaa	Fix sidebar folding	2025-09-24 13:57:18 -07:00
Raunak Bhagat	e628033885	Reattach handler	2025-09-24 13:37:11 -07:00
Raunak Bhagat	22bb4b6d98	Add emphasis	2025-09-24 13:33:02 -07:00
Raunak Bhagat	9afffc2de4	Implement proper grouping hierarchies for buttons	2025-09-24 13:28:26 -07:00
Raunak Bhagat	2c1193f975	Fix button variants + subvariants	2025-09-24 13:00:00 -07:00
Raunak Bhagat	b192542c85	Fix bug in which `folded` state would stil render text	2025-09-24 12:18:38 -07:00
Raunak Bhagat	d8821b8ccc	Prevent click propagation	2025-09-24 12:14:58 -07:00
Raunak Bhagat	a007369bd5	Edit how renaming UI is rendered (using input instead of textarea)	2025-09-24 12:03:19 -07:00
Raunak Bhagat	2f65629f51	More edits to AppSidebar	2025-09-24 11:48:04 -07:00
Raunak Bhagat	7701ae2112	Re-implement buttons + clean up look	2025-09-24 11:15:38 -07:00
Raunak Bhagat	01a3a256e9	Update README	2025-09-23 15:57:28 -07:00
Raunak Bhagat	0d55febaa7	Remove strokeOpacity from icons	2025-09-23 15:56:16 -07:00
Raunak Bhagat	bdafbfe0e8	Edit AdminSidebar width	2025-09-22 11:22:25 -07:00
Raunak Bhagat	278fd0e153	Fix error in which message would not be updated after edit	2025-09-22 11:04:15 -07:00
Raunak Bhagat	a4bb97bc22	Fix editing modal	2025-09-22 10:53:55 -07:00
Raunak Bhagat	8063d9a75e	Edit KG configuration page	2025-09-22 05:04:42 -07:00
Raunak Bhagat	1ffaba12f0	Fix height of search bar	2025-09-22 04:45:41 -07:00
Raunak Bhagat	26f8660663	Fix search bar	2025-09-22 04:40:12 -07:00
Raunak Bhagat	d6504ed578	Update search-settings page	2025-09-22 03:59:35 -07:00
Raunak Bhagat	7fcc2c9d35	Clean up more admin stuff	2025-09-22 03:21:48 -07:00
Raunak Bhagat	46e8f925fe	Clean up AdminSidebar	2025-09-21 22:33:26 -07:00
Raunak Bhagat	5ec1f61839	Add user settings	2025-09-19 20:07:11 -07:00
Raunak Bhagat	df950963a7	Edit SvgMoreHorizontal SVG size	2025-09-19 19:47:06 -07:00
Raunak Bhagat	93208a66ac	Edit settings popup state transitions	2025-09-19 19:30:39 -07:00
Raunak Bhagat	a4819e07e7	Small bug fixes	2025-09-19 19:19:37 -07:00
Raunak Bhagat	f642ace40c	Implement logout	2025-09-19 19:16:24 -07:00
Raunak Bhagat	9b430ae2d5	Implement notifications	2025-09-19 19:03:48 -07:00
Raunak Bhagat	05f3f878b2	Edit edit/delete modals	2025-09-19 17:54:24 -07:00
Raunak Bhagat	df17c5352e	Edit active colours	2025-09-19 16:46:08 -07:00
Raunak Bhagat	bcfb0f3cf3	Remove commented out state	2025-09-19 16:04:13 -07:00
Raunak Bhagat	38468c1dc4	Fix AgentsModal	2025-09-19 15:55:40 -07:00
Raunak Bhagat	8550a9c5e3	Cleanup sidebar a bit more	2025-09-19 14:33:36 -07:00
Raunak Bhagat	fe0c60e50d	Fix UX around naming chats	2025-09-19 14:03:02 -07:00
Raunak Bhagat	4ecc151a02	Fix up chat-renaming	2025-09-19 13:49:08 -07:00
Raunak Bhagat	d08becead5	Saving changes	2025-09-19 13:34:54 -07:00
Raunak Bhagat	a429f852d5	Reduce height of buttons	2025-09-19 08:05:50 -07:00
Raunak Bhagat	a856f27fae	Saving changes	2025-09-18 20:19:25 -07:00
Raunak Bhagat	d0d8027928	Edit popups in sidebar buttons	2025-09-18 19:53:33 -07:00
Raunak Bhagat	bd1671f1a1	Edit popovers and add new icons	2025-09-18 19:05:16 -07:00
Raunak Bhagat	e236c67678	Fix build errors	2025-09-18 17:28:19 -07:00
Raunak Bhagat	683956697a	More UI fixes and tweaks	2025-09-18 16:57:47 -07:00
Raunak Bhagat	fb1e303ffc	Fix ordering bug	2025-09-18 16:09:25 -07:00
Raunak Bhagat	729d4fafd1	Remove client directive	2025-09-18 15:52:16 -07:00
Raunak Bhagat	40c60282d0	Update agents modal and general structure of app	2025-09-18 15:19:35 -07:00
Raunak Bhagat	2141fd2c6e	More edits to styling + colours	2025-09-18 12:45:50 -07:00
Raunak Bhagat	9aeba96043	Update state management	2025-09-16 19:34:48 -07:00
Raunak Bhagat	b431de5141	Update hover state for buttons	2025-09-16 19:06:05 -07:00
Raunak Bhagat	d1a6340cfc	Add new chat handler	2025-09-16 17:45:21 -07:00
Raunak Bhagat	ccf382ef4f	Edit spacing	2025-09-16 17:41:56 -07:00
Raunak Bhagat	c31997b9b2	Save folded state to localStorage	2025-09-16 17:40:11 -07:00
Raunak Bhagat	ab31795a46	Recenter icon when title is hidden	2025-09-16 17:35:40 -07:00
Raunak Bhagat	b3beca63dc	Make headers sticky	2025-09-16 17:33:21 -07:00
Raunak Bhagat	cc6d54c1e6	Add loading state for Truncated component + fix spacings	2025-09-16 17:28:54 -07:00
Raunak Bhagat	ee12c0c5de	Fix scrolling issue	2025-09-16 17:00:40 -07:00
Raunak Bhagat	d48912a05d	Fix errors	2025-09-16 15:50:48 -07:00
Raunak Bhagat	c079072676	Remove unnecessary file + make HistorySidebar be smart	2025-09-16 15:48:15 -07:00
Raunak Bhagat	952f6bfb37	Delete unused files	2025-09-16 15:42:36 -07:00
Raunak Bhagat	0714e4bb4e	Fix dnd	2025-09-16 15:39:47 -07:00
Raunak Bhagat	ae577f0f44	Add AgentsModal	2025-09-16 15:35:59 -07:00
Raunak Bhagat	0705d584d8	Update user hover-card	2025-09-16 14:52:54 -07:00
Raunak Bhagat	36e391e557	Add folded sidebar (+ shortcuts)	2025-09-16 13:50:49 -07:00
Raunak Bhagat	1efce594b5	Clean up truncation + buttons	2025-09-16 13:02:16 -07:00
Raunak Bhagat	67ac53f17d	Add more styling for HistorySidebar + add README for working w/ icons	2025-09-16 11:21:13 -07:00
Raunak Bhagat	d5a222925a	Add icons (as raw TSX)	2025-09-16 09:51:25 -07:00
Raunak Bhagat	d5ef928782	Add icons	2025-09-16 09:12:12 -07:00
Raunak Bhagat	6963d78f8e	Fix more build errors?	2025-09-15 17:39:24 -07:00
Raunak Bhagat	d3ef2b8c17	Fix build errors	2025-09-15 17:28:34 -07:00
Raunak Bhagat	70f4162ea8	Update name	2025-09-15 17:17:09 -07:00
Raunak Bhagat	883f52d332	Update component names	2025-09-15 17:09:13 -07:00
Raunak Bhagat	f8fd83c883	Clean up sidebar	2025-09-15 15:56:45 -07:00
Raunak Bhagat	d2bf0c0c5f	Update token-context bar	2025-09-15 11:43:54 -07:00
Raunak Bhagat	5d598c2d22	Add more colour fixes to Modal	2025-09-15 11:13:45 -07:00
Raunak Bhagat	9dc0e97302	Merge branch 'main' into colours	2025-09-15 09:45:32 -07:00
Raunak Bhagat	048b2a6b39	Edit LLMPopover and add border-radii	2025-09-15 09:43:06 -07:00
Raunak Bhagat	7dd3cecf67	Edit UserDropdown colours	2025-09-15 09:15:56 -07:00
Raunak Bhagat	82abe28986	Update more colours	2025-09-14 20:37:33 -07:00
Raunak Bhagat	a0575e6a00	Update colours for sidebar	2025-09-14 20:24:25 -07:00
Raunak Bhagat	0c5bf5b3ed	Add all colours from Figma	2025-09-11 13:34:54 -07:00
Raunak Bhagat	492117d910	Edit .gitignore	2025-09-11 12:32:53 -07:00