build org

k
2026-02-18 08:15:48 +00:00 · 2024-12-01 17:20:43 -08:00 · 2024-12-01 17:14:09 -08:00 · 2024-12-01 17:12:54 -08:00 · 2024-12-01 15:06:32 -08:00 · 2024-12-01 15:02:46 -08:00
763 changed files with 41920 additions and 25792 deletions
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -3,61 +3,61 @@ name: Build and Push Backend Image on Tag
 on:
  push:
    tags:
-      - '*'
+      - "*"

 env:
-  REGISTRY_IMAGE: danswer/danswer-backend
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-backend-cloud' || 'danswer/danswer-backend' }}
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build-and-push:
-    # TODO: investigate a matrix build like the web container 
+    # TODO: investigate a matrix build like the web container
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
+      - name: Checkout code
+        uses: actions/checkout@v4

-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3

-    - name: Login to Docker Hub
-      uses: docker/login-action@v3
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_TOKEN }}
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Install build-essential
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y build-essential
-          
-    - name: Backend Image Docker Build and Push
-      uses: docker/build-push-action@v5
-      with:
-        context: ./backend
-        file: ./backend/Dockerfile
-        platforms: linux/amd64,linux/arm64
-        push: true
-        tags: |
-          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
-        build-args: |
-          DANSWER_VERSION=${{ github.ref_name }}
+      - name: Install build-essential
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
-    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
-      with:
-        # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
-        image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-        severity: 'CRITICAL,HIGH'
-        trivyignores: ./backend/.trivyignore
+      - name: Backend Image Docker Build and Push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+            ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          build-args: |
+            DANSWER_VERSION=${{ github.ref_name }}
+
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+        with:
+          # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
+          trivyignores: ./backend/.trivyignore
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -4,12 +4,12 @@ name: Build and Push Cloud Web Image on Tag
 on:
  push:
    tags:
-      - '*'
+      - "*"

 env:
-  REGISTRY_IMAGE: danswer/danswer-cloud-web-server
+  REGISTRY_IMAGE: danswer/danswer-web-server-cloud
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build:
    runs-on:
@@ -28,11 +28,11 @@ jobs:
      - name: Prepare
        run: |
          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV          
-      
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
      - name: Checkout
        uses: actions/checkout@v4
-      
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
@@ -41,16 +41,16 @@ jobs:
          tags: |
            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
-      
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      
+
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
-    
+
      - name: Build and push by digest
        id: build
        uses: docker/build-push-action@v5
@@ -65,17 +65,18 @@ jobs:
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
-          # needed due to weird interactions with the builds for different platforms  
+            NEXT_PUBLIC_GTM_ENABLED=true
+          # needed due to weird interactions with the builds for different platforms
          no-cache: true
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-      
+
      - name: Export digest
        run: |
          mkdir -p /tmp/digests
          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"          
-      
+          touch "/tmp/digests/${digest#sha256:}"
+
      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
@@ -95,42 +96,42 @@ jobs:
          path: /tmp/digests
          pattern: digests-*
          merge-multiple: true
-      
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-      
+
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
-      
+
      - name: Create manifest list and push
        working-directory: /tmp/digests
        run: |
          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)          
-      
+            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
      - name: Inspect image
        run: |
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
        env:
-          TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-          TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: 'CRITICAL,HIGH'
+          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -3,53 +3,53 @@ name: Build and Push Model Server Image on Tag
 on:
  push:
    tags:
-      - '*'
+      - "*"

 env:
-  REGISTRY_IMAGE: danswer/danswer-model-server
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-model-server-cloud' || 'danswer/danswer-model-server' }}
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build-and-push:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
+      - name: Checkout code
+        uses: actions/checkout@v4

-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3

-    - name: Login to Docker Hub
-      uses: docker/login-action@v3
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_TOKEN }}
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Model Server Image Docker Build and Push
-      uses: docker/build-push-action@v5
-      with:
-        context: ./backend
-        file: ./backend/Dockerfile.model_server
-        platforms: linux/amd64,linux/arm64
-        push: true
-        tags: |
-          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
-        build-args: |
-          DANSWER_VERSION=${{ github.ref_name }}
+      - name: Model Server Image Docker Build and Push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: |
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+            ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          build-args: |
+            DANSWER_VERSION=${{ github.ref_name }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
-    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
-      with:
-        image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
-        severity: 'CRITICAL,HIGH'
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+        with:
+          image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -0,0 +1,76 @@
+# Scan for problematic software licenses
+
+# trivy has their own rate limiting issues causing this action to flake
+# we worked around it by hardcoding to different db repos in env
+# can re-enable when they figure it out
+# https://github.com/aquasecurity/trivy/discussions/7538
+# https://github.com/aquasecurity/trivy-action/issues/389
+
+name: 'Nightly - Scan licenses'
+on:
+#   schedule:
+#     - cron: '0 14 * * *'  # Runs every day at 6 AM PST / 7 AM PDT / 2 PM UTC
+  workflow_dispatch:  # Allows manual triggering
+
+permissions:
+  actions: read
+  contents: read
+  security-events: write
+  
+jobs:
+  scan-licenses:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+      
+      - name: Get explicit and transitive dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+          pip freeze > requirements-all.txt
+                    
+      - name: Check python
+        id: license_check_report
+        uses: pilosus/action-pip-license-checker@v2
+        with:
+          requirements: 'requirements-all.txt'
+          fail: 'Copyleft'
+          exclude: '(?i)^(pylint|aio[-_]*).*'
+          
+      - name: Print report
+        if: ${{ always() }}
+        run: echo "${{ steps.license_check_report.outputs.report }}"
+      
+      - name: Install npm dependencies
+        working-directory: ./web
+        run: npm ci
+        
+      - name: Run Trivy vulnerability scanner in repo mode
+        uses: aquasecurity/trivy-action@0.28.0
+        with:
+          scan-type: fs
+          scanners: license
+          format: table
+#           format: sarif
+#           output: trivy-results.sarif
+          severity: HIGH,CRITICAL
+
+#       - name: Upload Trivy scan results to GitHub Security tab
+#         uses: github/codeql-action/upload-sarif@v3
+#         with:
+#           sarif_file: trivy-results.sarif
--- a/.github/workflows/pr-chromatic-tests.yml
+++ b/.github/workflows/pr-chromatic-tests.yml
@@ -0,0 +1,225 @@
+name: Run Chromatic Tests
+concurrency:
+  group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on: push
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+
+jobs:
+  playwright-tests:
+    name: Playwright Tests
+
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+      - run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+        
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci
+
+      - name: Install playwright browsers
+        working-directory: ./web
+        run: npx playwright install --with-deps
+        
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+      
+      # we use the runs-on cache for docker builds
+      # in conjunction with runs-on runners, it has better speed and unlimited caching
+      # https://runs-on.com/caching/s3-cache-for-github-actions/
+      # https://runs-on.com/caching/docker/
+      # https://github.com/moby/buildkit#s3-cache-experimental
+      
+      # images are built and run locally for testing purposes. Not pushed.
+
+      - name: Build Web Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64
+          tags: danswer/danswer-web-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Backend Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64
+          tags: danswer/danswer-backend:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Model Server Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          tags: danswer/danswer-model-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+          
+          docker logs -f danswer-stack-api_server-1 &
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+          
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+
+      - name: Run pytest playwright test init
+        working-directory: ./backend
+        env: 
+          PYTEST_IGNORE_SKIP: true
+        run: pytest -s tests/integration/tests/playwright/test_playwright.py
+
+      - name: Run Playwright tests
+        working-directory: ./web
+        run: npx playwright test
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          # Chromatic automatically defaults to the test-results directory.
+          # Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
+          name: test-results
+          path: ./web/test-results
+          retention-days: 30
+                    
+      # save before stopping the containers so the logs can be captured
+      - name: Save Docker logs
+        if: success() || failure()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
+          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+      
+      - name: Upload logs
+        if: success() || failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-logs
+          path: ${{ github.workspace }}/docker-compose.log
+
+      - name: Stop Docker containers
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+
+  chromatic-tests:
+    name: Chromatic Tests
+    
+    needs: playwright-tests
+    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci
+        
+      - name: Download Playwright test results
+        uses: actions/download-artifact@v4
+        with:
+          name: test-results
+          path: ./web/test-results
+          
+      - name: Run Chromatic
+        uses: chromaui/action@latest
+        with:
+          playwright: true
+          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+          workingDir: ./web
+        env: 
+          CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -0,0 +1,72 @@
+name: Helm - Lint and Test Charts
+
+on:
+  merge_group:
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:  # Allows manual triggering
+  
+jobs:
+  helm-chart-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+
+    # fetch-depth 0 is required for helm/chart-testing-action
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        
+    - name: Set up Helm
+      uses: azure/setup-helm@v4.2.0
+      with:
+        version: v3.14.4
+      
+    - name: Set up chart-testing
+      uses: helm/chart-testing-action@v2.6.1
+
+    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
+    - name: Run chart-testing (list-changed)
+      id: list-changed
+      run: |
+        echo "default_branch: ${{ github.event.repository.default_branch }}"
+        changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
+        echo "list-changed output: $changed"
+        if [[ -n "$changed" ]]; then
+          echo "changed=true" >> "$GITHUB_OUTPUT"
+        fi
+
+#     rkuo: I don't think we need python?
+#     - name: Set up Python
+#       uses: actions/setup-python@v5
+#       with:
+#         python-version: '3.11'
+#         cache: 'pip'
+#         cache-dependency-path: |
+#           backend/requirements/default.txt
+#           backend/requirements/dev.txt
+#           backend/requirements/model_server.txt
+#     - run: |
+#         python -m pip install --upgrade pip
+#         pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+#         pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+#         pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+
+    # lint all charts if any changes were detected
+    - name: Run chart-testing (lint)
+      if: steps.list-changed.outputs.changed == 'true'
+      run: ct lint --config ct.yaml --all
+      # the following would lint only changed charts, but linting isn't expensive
+      # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
+
+    - name: Create kind cluster
+      if: steps.list-changed.outputs.changed == 'true'
+      uses: helm/kind-action@v1.10.0
+
+    - name: Run chart-testing (install)
+      if: steps.list-changed.outputs.changed == 'true'
+      run: ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml
+      # the following would install only changed charts, but we only have one chart so 
+      # don't worry about that for now
+      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
+++ b/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
@@ -1,68 +0,0 @@
-# This workflow is intentionally disabled while we're still working on it
-# It's close to ready, but a race condition needs to be fixed with
-# API server and Vespa startup, and it needs to have a way to build/test against
-# local containers
-
-name: Helm - Lint and Test Charts
-
-on:
-  merge_group:
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  lint-test:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
-
-    # fetch-depth 0 is required for helm/chart-testing-action
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-        
-    - name: Set up Helm
-      uses: azure/setup-helm@v4.2.0
-      with:
-        version: v3.14.4
-      
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.11'
-        cache: 'pip'
-        cache-dependency-path: |
-          backend/requirements/default.txt
-          backend/requirements/dev.txt
-          backend/requirements/model_server.txt
-    - run: |
-        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
-
-    - name: Set up chart-testing
-      uses: helm/chart-testing-action@v2.6.1
-
-    - name: Run chart-testing (list-changed)
-      id: list-changed
-      run: |
-        changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }})
-        if [[ -n "$changed" ]]; then
-          echo "changed=true" >> "$GITHUB_OUTPUT"
-        fi
-
-    - name: Run chart-testing (lint)
-#       if: steps.list-changed.outputs.changed == 'true'
-      run: ct lint --all --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
-
-    - name: Create kind cluster
-#       if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@v1.10.0
-
-    - name: Run chart-testing (install)
-#       if: steps.list-changed.outputs.changed == 'true'
-      run: ct install --all --config ct.yaml
-#       run: ct install --target-branch ${{ github.event.repository.default_branch }}
-      
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -13,7 +13,10 @@ on:
 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
+  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
+  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
+  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
+  
 jobs:
  integration-tests:
    # See https://runs-on.com/runners/linux/
@@ -195,9 +198,13 @@ jobs:
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
+            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
+            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
            danswer/danswer-integration:test \
-            /app/tests/integration/tests
+            /app/tests/integration/tests \
+            /app/tests/integration/connector_job_tests
        continue-on-error: true
        id: run_tests

@@ -210,17 +217,18 @@ jobs:
            echo "All integration tests passed successfully."
          fi

-      - name: Stop Docker containers
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
-
+      # save before stopping the containers so the logs can be captured
      - name: Save Docker logs
        if: success() || failure()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+
+      - name: Stop Docker containers
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
      
      - name: Upload logs
        if: success() || failure()
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -18,6 +18,12 @@ env:
  # Jira
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
+  # Google
+  GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
+  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
+  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
+  GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
+  GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}

 jobs:
  connectors-check:
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -15,7 +15,7 @@ env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

 jobs:
-  connectors-check:
+  model-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]

--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@
 .vscode/
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
+/web/test-results/
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -203,7 +203,7 @@
                "--loglevel=INFO",
                "--hostname=light@%n",
                "-Q",
-                "vespa_metadata_sync,connector_deletion",
+                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
            ],
            "presentation": {
 				 "group": "2",
@@ -232,7 +232,7 @@
                "--loglevel=INFO",
                "--hostname=heavy@%n",
                "-Q",
-                "connector_pruning",
+                "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync",
            ],
            "presentation": {
 				 "group": "2",
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -32,7 +32,7 @@ To contribute to this project, please follow the
 When opening a pull request, mention related issues and feel free to tag relevant maintainers.

 Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
-See the [Formatting and Linting](#-formatting-and-linting) section for how to run these checks locally.
+See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.


 ### Getting Help 🙋
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 <!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/README.md"} -->
+<a name="readme-top"></a>

 <h2 align="center">
 <a href="https://www.danswer.ai/"> <img width="50%" src="https://github.com/danswer-owners/danswer/blob/1fabd9372d66cd54238847197c33f091a724803b/DanswerWithName.png?raw=true)" /></a>
@@ -11,7 +12,7 @@
 <a href="https://docs.danswer.dev/" target="_blank">
    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
 </a>
-<a href="https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ" target="_blank">
+<a href="https://join.slack.com/t/danswer/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
 </a>
 <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
@@ -127,3 +128,19 @@ To try the Danswer Enterprise Edition:

 ## 💡 Contributing
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
+
+## ⭐Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=danswer-ai/danswer&type=Date)](https://star-history.com/#danswer-ai/danswer&Date)
+
+## ✨Contributors
+
+<a href="https://github.com/danswer-ai/danswer/graphs/contributors">
+  <img alt="contributors" src="https://contrib.rocks/image?repo=danswer-ai/danswer"/>
+</a>
+
+<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
+    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
+        ↑ Back to Top ↑
+    </a>
+</p>
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -12,7 +12,6 @@ ARG DANSWER_VERSION=0.8-dev
 ENV DANSWER_VERSION=${DANSWER_VERSION} \
    DANSWER_RUNNING_IN_DOCKER="true"

-ARG CA_CERT_CONTENT=""

 RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
 # Install system dependencies
@@ -39,15 +38,6 @@ RUN apt-get update && \
    apt-get clean


-# Conditionally write the CA certificate and update certificates
-RUN if [ -n "$CA_CERT_CONTENT" ]; then \
-    echo "Adding custom CA certificate"; \
-    echo "$CA_CERT_CONTENT" > /usr/local/share/ca-certificates/my-ca.crt && \
-    chmod 644 /usr/local/share/ca-certificates/my-ca.crt && \
-    update-ca-certificates; \
-else \
-    echo "No custom CA certificate provided"; \
-fi

 # Install Python dependencies
 # Remove py which is pulled in by retry, py is not needed and is a CVE
@@ -87,7 +77,6 @@ RUN apt-get update && \
 RUN python -c "from tokenizers import Tokenizer; \
 Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"

-
 # Pre-downloading NLTK for setups with limited egress
 RUN python -c "import nltk; \
 nltk.download('stopwords', quiet=True); \
--- a/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
+++ b/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
@@ -0,0 +1,59 @@
+"""display custom llm models
+
+Revision ID: 177de57c21c9
+Revises: 4ee1287bd26a
+Create Date: 2024-11-21 11:49:04.488677
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy import and_
+
+revision = "177de57c21c9"
+down_revision = "4ee1287bd26a"
+branch_labels = None
+depends_on = None
+depends_on = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    llm_provider = sa.table(
+        "llm_provider",
+        sa.column("id", sa.Integer),
+        sa.column("provider", sa.String),
+        sa.column("model_names", postgresql.ARRAY(sa.String)),
+        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
+    )
+
+    excluded_providers = ["openai", "bedrock", "anthropic", "azure"]
+
+    providers_to_update = sa.select(
+        llm_provider.c.id,
+        llm_provider.c.model_names,
+        llm_provider.c.display_model_names,
+    ).where(
+        and_(
+            ~llm_provider.c.provider.in_(excluded_providers),
+            llm_provider.c.model_names.isnot(None),
+        )
+    )
+
+    results = conn.execute(providers_to_update).fetchall()
+
+    for provider_id, model_names, display_model_names in results:
+        if display_model_names is None:
+            display_model_names = []
+
+        combined_model_names = list(set(display_model_names + model_names))
+        update_stmt = (
+            llm_provider.update()
+            .where(llm_provider.c.id == provider_id)
+            .values(display_model_names=combined_model_names)
+        )
+        conn.execute(update_stmt)
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
+++ b/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
@@ -0,0 +1,68 @@
+"""default chosen assistants to none
+
+Revision ID: 26b931506ecb
+Revises: 2daa494a0851
+Create Date: 2024-11-12 13:23:29.858995
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "26b931506ecb"
+down_revision = "2daa494a0851"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user", sa.Column("chosen_assistants_new", postgresql.JSONB(), nullable=True)
+    )
+
+    op.execute(
+        """
+    UPDATE "user"
+    SET chosen_assistants_new =
+        CASE
+            WHEN chosen_assistants = '[-2, -1, 0]' THEN NULL
+            ELSE chosen_assistants
+        END
+    """
+    )
+
+    op.drop_column("user", "chosen_assistants")
+
+    op.alter_column(
+        "user", "chosen_assistants_new", new_column_name="chosen_assistants"
+    )
+
+
+def downgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "chosen_assistants_old",
+            postgresql.JSONB(),
+            nullable=False,
+            server_default="[-2, -1, 0]",
+        ),
+    )
+
+    op.execute(
+        """
+    UPDATE "user"
+    SET chosen_assistants_old =
+        CASE
+            WHEN chosen_assistants IS NULL THEN '[-2, -1, 0]'::jsonb
+            ELSE chosen_assistants
+        END
+    """
+    )
+
+    op.drop_column("user", "chosen_assistants")
+
+    op.alter_column(
+        "user", "chosen_assistants_old", new_column_name="chosen_assistants"
+    )
--- a/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
+++ b/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
@@ -0,0 +1,30 @@
+"""add-group-sync-time
+
+Revision ID: 2daa494a0851
+Revises: c0fd6e4da83a
+Create Date: 2024-11-11 10:57:22.991157
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "2daa494a0851"
+down_revision = "c0fd6e4da83a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "last_time_external_group_sync",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "last_time_external_group_sync")
--- a/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
+++ b/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
@@ -0,0 +1,50 @@
+"""single tool call per message
+
+Revision ID: 33cb72ea4d80
+Revises: 5b29123cd710
+Create Date: 2024-11-01 12:51:01.535003
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "33cb72ea4d80"
+down_revision = "5b29123cd710"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Step 1: Delete extraneous ToolCall entries
+    # Keep only the ToolCall with the smallest 'id' for each 'message_id'
+    op.execute(
+        sa.text(
+            """
+            DELETE FROM tool_call
+            WHERE id NOT IN (
+                SELECT MIN(id)
+                FROM tool_call
+                WHERE message_id IS NOT NULL
+                GROUP BY message_id
+            );
+        """
+        )
+    )
+
+    # Step 2: Add a unique constraint on message_id
+    op.create_unique_constraint(
+        constraint_name="uq_tool_call_message_id",
+        table_name="tool_call",
+        columns=["message_id"],
+    )
+
+
+def downgrade() -> None:
+    # Step 1: Drop the unique constraint on message_id
+    op.drop_constraint(
+        constraint_name="uq_tool_call_message_id",
+        table_name="tool_call",
+        type_="unique",
+    )
--- a/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
+++ b/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
@@ -0,0 +1,45 @@
+"""add persona categories
+
+Revision ID: 47e5bef3a1d7
+Revises: dfbe9e93d3c7
+Create Date: 2024-11-05 18:55:02.221064
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "47e5bef3a1d7"
+down_revision = "dfbe9e93d3c7"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create the persona_category table
+    op.create_table(
+        "persona_category",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("description", sa.String(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("name"),
+    )
+
+    # Add category_id to persona table
+    op.add_column("persona", sa.Column("category_id", sa.Integer(), nullable=True))
+    op.create_foreign_key(
+        "fk_persona_category",
+        "persona",
+        "persona_category",
+        ["category_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("fk_persona_category", "persona", type_="foreignkey")
+    op.drop_column("persona", "category_id")
+    op.drop_table("persona_category")
--- a/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
+++ b/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
@@ -0,0 +1,280 @@
+"""add_multiple_slack_bot_support
+
+Revision ID: 4ee1287bd26a
+Revises: 47e5bef3a1d7
+Create Date: 2024-11-06 13:15:53.302644
+
+"""
+import logging
+from typing import cast
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.orm import Session
+from danswer.key_value_store.factory import get_kv_store
+from danswer.db.models import SlackBot
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "4ee1287bd26a"
+down_revision = "47e5bef3a1d7"
+branch_labels: None = None
+depends_on: None = None
+
+# Configure logging
+logger = logging.getLogger("alembic.runtime.migration")
+logger.setLevel(logging.INFO)
+
+
+def upgrade() -> None:
+    logger.info(f"{revision}: create_table: slack_bot")
+    # Create new slack_bot table
+    op.create_table(
+        "slack_bot",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("enabled", sa.Boolean(), nullable=False, server_default="true"),
+        sa.Column("bot_token", sa.LargeBinary(), nullable=False),
+        sa.Column("app_token", sa.LargeBinary(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("bot_token"),
+        sa.UniqueConstraint("app_token"),
+    )
+
+    # # Create new slack_channel_config table
+    op.create_table(
+        "slack_channel_config",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("slack_bot_id", sa.Integer(), nullable=True),
+        sa.Column("persona_id", sa.Integer(), nullable=True),
+        sa.Column("channel_config", postgresql.JSONB(), nullable=False),
+        sa.Column("response_type", sa.String(), nullable=False),
+        sa.Column(
+            "enable_auto_filters", sa.Boolean(), nullable=False, server_default="false"
+        ),
+        sa.ForeignKeyConstraint(
+            ["slack_bot_id"],
+            ["slack_bot.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Handle existing Slack bot tokens first
+    logger.info(f"{revision}: Checking for existing Slack bot.")
+    bot_token = None
+    app_token = None
+    first_row_id = None
+
+    try:
+        tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
+    except Exception:
+        logger.warning("No existing Slack bot tokens found.")
+        tokens = {}
+
+    bot_token = tokens.get("bot_token")
+    app_token = tokens.get("app_token")
+
+    if bot_token and app_token:
+        logger.info(f"{revision}: Found bot and app tokens.")
+
+        session = Session(bind=op.get_bind())
+        new_slack_bot = SlackBot(
+            name="Slack Bot (Migrated)",
+            enabled=True,
+            bot_token=bot_token,
+            app_token=app_token,
+        )
+        session.add(new_slack_bot)
+        session.commit()
+        first_row_id = new_slack_bot.id
+
+    # Create a default bot if none exists
+    # This is in case there are no slack tokens but there are channels configured
+    op.execute(
+        sa.text(
+            """
+            INSERT INTO slack_bot (name, enabled, bot_token, app_token)
+            SELECT 'Default Bot', true, '', ''
+            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
+            RETURNING id;
+            """
+        )
+    )
+
+    # Get the bot ID to use (either from existing migration or newly created)
+    bot_id_query = sa.text(
+        """
+        SELECT COALESCE(
+            :first_row_id,
+            (SELECT id FROM slack_bot ORDER BY id ASC LIMIT 1)
+        ) as bot_id;
+        """
+    )
+    result = op.get_bind().execute(bot_id_query, {"first_row_id": first_row_id})
+    bot_id = result.scalar()
+
+    # CTE (Common Table Expression) that transforms the old slack_bot_config table data
+    # This splits up the channel_names into their own rows
+    channel_names_cte = """
+        WITH channel_names AS (
+            SELECT
+                sbc.id as config_id,
+                sbc.persona_id,
+                sbc.response_type,
+                sbc.enable_auto_filters,
+                jsonb_array_elements_text(sbc.channel_config->'channel_names') as channel_name,
+                sbc.channel_config->>'respond_tag_only' as respond_tag_only,
+                sbc.channel_config->>'respond_to_bots' as respond_to_bots,
+                sbc.channel_config->'respond_member_group_list' as respond_member_group_list,
+                sbc.channel_config->'answer_filters' as answer_filters,
+                sbc.channel_config->'follow_up_tags' as follow_up_tags
+            FROM slack_bot_config sbc
+        )
+    """
+
+    # Insert the channel names into the new slack_channel_config table
+    insert_statement = """
+        INSERT INTO slack_channel_config (
+            slack_bot_id,
+            persona_id,
+            channel_config,
+            response_type,
+            enable_auto_filters
+        )
+        SELECT
+            :bot_id,
+            channel_name.persona_id,
+            jsonb_build_object(
+                'channel_name', channel_name.channel_name,
+                'respond_tag_only',
+                COALESCE((channel_name.respond_tag_only)::boolean, false),
+                'respond_to_bots',
+                COALESCE((channel_name.respond_to_bots)::boolean, false),
+                'respond_member_group_list',
+                COALESCE(channel_name.respond_member_group_list, '[]'::jsonb),
+                'answer_filters',
+                COALESCE(channel_name.answer_filters, '[]'::jsonb),
+                'follow_up_tags',
+                COALESCE(channel_name.follow_up_tags, '[]'::jsonb)
+            ),
+            channel_name.response_type,
+            channel_name.enable_auto_filters
+        FROM channel_names channel_name;
+    """
+
+    op.execute(sa.text(channel_names_cte + insert_statement).bindparams(bot_id=bot_id))
+
+    # Clean up old tokens if they existed
+    try:
+        if bot_token and app_token:
+            logger.info(f"{revision}: Removing old bot and app tokens.")
+            get_kv_store().delete("slack_bot_tokens_config_key")
+    except Exception:
+        logger.warning("tried to delete tokens in dynamic config but failed")
+    # Rename the table
+    op.rename_table(
+        "slack_bot_config__standard_answer_category",
+        "slack_channel_config__standard_answer_category",
+    )
+
+    # Rename the column
+    op.alter_column(
+        "slack_channel_config__standard_answer_category",
+        "slack_bot_config_id",
+        new_column_name="slack_channel_config_id",
+    )
+
+    # Drop the table with CASCADE to handle dependent objects
+    op.execute("DROP TABLE slack_bot_config CASCADE")
+
+    logger.info(f"{revision}: Migration complete.")
+
+
+def downgrade() -> None:
+    # Recreate the old slack_bot_config table
+    op.create_table(
+        "slack_bot_config",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("persona_id", sa.Integer(), nullable=True),
+        sa.Column("channel_config", postgresql.JSONB(), nullable=False),
+        sa.Column("response_type", sa.String(), nullable=False),
+        sa.Column("enable_auto_filters", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Migrate data back to the old format
+    # Group by persona_id to combine channel names back into arrays
+    op.execute(
+        sa.text(
+            """
+            INSERT INTO slack_bot_config (
+                persona_id,
+                channel_config,
+                response_type,
+                enable_auto_filters
+            )
+            SELECT DISTINCT ON (persona_id)
+                persona_id,
+                jsonb_build_object(
+                    'channel_names', (
+                        SELECT jsonb_agg(c.channel_config->>'channel_name')
+                        FROM slack_channel_config c
+                        WHERE c.persona_id = scc.persona_id
+                    ),
+                    'respond_tag_only', (channel_config->>'respond_tag_only')::boolean,
+                    'respond_to_bots', (channel_config->>'respond_to_bots')::boolean,
+                    'respond_member_group_list', channel_config->'respond_member_group_list',
+                    'answer_filters', channel_config->'answer_filters',
+                    'follow_up_tags', channel_config->'follow_up_tags'
+                ),
+                response_type,
+                enable_auto_filters
+            FROM slack_channel_config scc
+            WHERE persona_id IS NOT NULL;
+            """
+        )
+    )
+
+    # Rename the table back
+    op.rename_table(
+        "slack_channel_config__standard_answer_category",
+        "slack_bot_config__standard_answer_category",
+    )
+
+    # Rename the column back
+    op.alter_column(
+        "slack_bot_config__standard_answer_category",
+        "slack_channel_config_id",
+        new_column_name="slack_bot_config_id",
+    )
+
+    # Try to save the first bot's tokens back to KV store
+    try:
+        first_bot = (
+            op.get_bind()
+            .execute(
+                sa.text(
+                    "SELECT bot_token, app_token FROM slack_bot ORDER BY id LIMIT 1"
+                )
+            )
+            .first()
+        )
+        if first_bot and first_bot.bot_token and first_bot.app_token:
+            tokens = {
+                "bot_token": first_bot.bot_token,
+                "app_token": first_bot.app_token,
+            }
+            get_kv_store().store("slack_bot_tokens_config_key", tokens)
+    except Exception:
+        logger.warning("Failed to save tokens back to KV store")
+
+    # Drop the new tables in reverse order
+    op.drop_table("slack_channel_config")
+    op.drop_table("slack_bot")
--- a/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py
+++ b/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py
@@ -0,0 +1,70 @@
+"""nullable search settings for historic index attempts
+
+Revision ID: 5b29123cd710
+Revises: 949b4a92a401
+Create Date: 2024-10-30 19:37:59.630704
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "5b29123cd710"
+down_revision = "949b4a92a401"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Drop the existing foreign key constraint
+    op.drop_constraint(
+        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
+    )
+
+    # Modify the column to be nullable
+    op.alter_column(
+        "index_attempt", "search_settings_id", existing_type=sa.INTEGER(), nullable=True
+    )
+
+    # Add back the foreign key with ON DELETE SET NULL
+    op.create_foreign_key(
+        "fk_index_attempt_search_settings",
+        "index_attempt",
+        "search_settings",
+        ["search_settings_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+
+
+def downgrade() -> None:
+    # Warning: This will delete all index attempts that don't have search settings
+    op.execute(
+        """
+        DELETE FROM index_attempt
+        WHERE search_settings_id IS NULL
+    """
+    )
+
+    # Drop foreign key constraint
+    op.drop_constraint(
+        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
+    )
+
+    # Modify the column to be not nullable
+    op.alter_column(
+        "index_attempt",
+        "search_settings_id",
+        existing_type=sa.INTEGER(),
+        nullable=False,
+    )
+
+    # Add back the foreign key without ON DELETE SET NULL
+    op.create_foreign_key(
+        "fk_index_attempt_search_settings",
+        "index_attempt",
+        "search_settings",
+        ["search_settings_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/6d562f86c78b_remove_default_bot.py
+++ b/backend/alembic/versions/6d562f86c78b_remove_default_bot.py
@@ -0,0 +1,45 @@
+"""remove default bot
+
+Revision ID: 6d562f86c78b
+Revises: 177de57c21c9
+Create Date: 2024-11-22 11:51:29.331336
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "6d562f86c78b"
+down_revision = "177de57c21c9"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        sa.text(
+            """
+            DELETE FROM slack_bot
+            WHERE name = 'Default Bot'
+            AND bot_token = ''
+            AND app_token = ''
+            AND NOT EXISTS (
+                SELECT 1 FROM slack_channel_config
+                WHERE slack_channel_config.slack_bot_id = slack_bot.id
+            )
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        sa.text(
+            """
+            INSERT INTO slack_bot (name, enabled, bot_token, app_token)
+            SELECT 'Default Bot', true, '', ''
+            WHERE NOT EXISTS (SELECT 1 FROM slack_bot)
+            RETURNING id;
+            """
+        )
+    )
--- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
+++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
@@ -9,8 +9,8 @@ from alembic import op
 import sqlalchemy as sa

 from danswer.db.models import IndexModelStatus
-from danswer.search.enums import RecencyBiasSetting
-from danswer.search.enums import SearchType
+from danswer.context.search.enums import RecencyBiasSetting
+from danswer.context.search.enums import SearchType

 # revision identifiers, used by Alembic.
 revision = "776b3bbe9092"
--- a/backend/alembic/versions/93560ba1b118_add_web_ui_option_to_slack_config.py
+++ b/backend/alembic/versions/93560ba1b118_add_web_ui_option_to_slack_config.py
@@ -0,0 +1,35 @@
+"""add web ui option to slack config
+
+Revision ID: 93560ba1b118
+Revises: 6d562f86c78b
+Create Date: 2024-11-24 06:36:17.490612
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "93560ba1b118"
+down_revision = "6d562f86c78b"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add show_continue_in_web_ui with default False to all existing channel_configs
+    op.execute(
+        """
+        UPDATE slack_channel_config
+        SET channel_config = channel_config || '{"show_continue_in_web_ui": false}'::jsonb
+        WHERE NOT channel_config ? 'show_continue_in_web_ui'
+        """
+    )
+
+
+def downgrade() -> None:
+    # Remove show_continue_in_web_ui from all channel_configs
+    op.execute(
+        """
+        UPDATE slack_channel_config
+        SET channel_config = channel_config - 'show_continue_in_web_ui'
+        """
+    )
--- a/backend/alembic/versions/949b4a92a401_remove_rt.py
+++ b/backend/alembic/versions/949b4a92a401_remove_rt.py
@@ -7,6 +7,7 @@ Create Date: 2024-10-26 13:06:06.937969
 """
 from alembic import op
 from sqlalchemy.orm import Session
+from sqlalchemy import text

 # Import your models and constants
 from danswer.db.models import (
@@ -15,7 +16,6 @@ from danswer.db.models import (
    Credential,
    IndexAttempt,
 )
-from danswer.configs.constants import DocumentSource


 # revision identifiers, used by Alembic.
@@ -30,13 +30,11 @@ def upgrade() -> None:
    bind = op.get_bind()
    session = Session(bind=bind)

-    connectors_to_delete = (
-        session.query(Connector)
-        .filter(Connector.source == DocumentSource.REQUESTTRACKER)
-        .all()
+    # Get connectors using raw SQL
+    result = bind.execute(
+        text("SELECT id FROM connector WHERE source = 'requesttracker'")
    )
-
-    connector_ids = [connector.id for connector in connectors_to_delete]
+    connector_ids = [row[0] for row in result]

    if connector_ids:
        cc_pairs_to_delete = (
--- a/backend/alembic/versions/9cf5c00f72fe_add_creator_to_cc_pair.py
+++ b/backend/alembic/versions/9cf5c00f72fe_add_creator_to_cc_pair.py
@@ -0,0 +1,30 @@
+"""add creator to cc pair
+
+Revision ID: 9cf5c00f72fe
+Revises: 26b931506ecb
+Create Date: 2024-11-12 15:16:42.682902
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "9cf5c00f72fe"
+down_revision = "26b931506ecb"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "creator_id",
+            sa.UUID(as_uuid=True),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "creator_id")
--- a/backend/alembic/versions/a8c2065484e6_add_auto_scroll_to_user_model.py
+++ b/backend/alembic/versions/a8c2065484e6_add_auto_scroll_to_user_model.py
@@ -0,0 +1,27 @@
+"""add auto scroll to user model
+
+Revision ID: a8c2065484e6
+Revises: abe7378b8217
+Create Date: 2024-11-22 17:34:09.690295
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a8c2065484e6"
+down_revision = "abe7378b8217"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column("auto_scroll", sa.Boolean(), nullable=True, server_default=None),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "auto_scroll")
--- a/backend/alembic/versions/abe7378b8217_add_indexing_trigger_to_cc_pair.py
+++ b/backend/alembic/versions/abe7378b8217_add_indexing_trigger_to_cc_pair.py
@@ -0,0 +1,30 @@
+"""add indexing trigger to cc_pair
+
+Revision ID: abe7378b8217
+Revises: 6d562f86c78b
+Create Date: 2024-11-26 19:09:53.481171
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "abe7378b8217"
+down_revision = "93560ba1b118"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "indexing_trigger",
+            sa.Enum("UPDATE", "REINDEX", name="indexingmode", native_enum=False),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "indexing_trigger")
--- a/backend/alembic/versions/b156fa702355_chat_reworked.py
+++ b/backend/alembic/versions/b156fa702355_chat_reworked.py
@@ -288,6 +288,15 @@ def upgrade() -> None:


 def downgrade() -> None:
+    # NOTE: you will lose all chat history. This is to satisfy the non-nullable constraints
+    # below
+    op.execute("DELETE FROM chat_feedback")
+    op.execute("DELETE FROM chat_message__search_doc")
+    op.execute("DELETE FROM document_retrieval_feedback")
+    op.execute("DELETE FROM document_retrieval_feedback")
+    op.execute("DELETE FROM chat_message")
+    op.execute("DELETE FROM chat_session")
+
    op.drop_constraint(
        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
    )
--- a/backend/alembic/versions/b72ed7a5db0e_remove_description_from_starter_messages.py
+++ b/backend/alembic/versions/b72ed7a5db0e_remove_description_from_starter_messages.py
@@ -0,0 +1,48 @@
+"""remove description from starter messages
+
+Revision ID: b72ed7a5db0e
+Revises: 33cb72ea4d80
+Create Date: 2024-11-03 15:55:28.944408
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "b72ed7a5db0e"
+down_revision = "33cb72ea4d80"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET starter_messages = (
+                SELECT jsonb_agg(elem - 'description')
+                FROM jsonb_array_elements(starter_messages) elem
+            )
+            WHERE starter_messages IS NOT NULL
+              AND jsonb_typeof(starter_messages) = 'array'
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET starter_messages = (
+                SELECT jsonb_agg(elem || '{"description": ""}')
+                FROM jsonb_array_elements(starter_messages) elem
+            )
+            WHERE starter_messages IS NOT NULL
+              AND jsonb_typeof(starter_messages) = 'array'
+            """
+        )
+    )
--- a/backend/alembic/versions/c0fd6e4da83a_add_recent_assistants.py
+++ b/backend/alembic/versions/c0fd6e4da83a_add_recent_assistants.py
@@ -0,0 +1,29 @@
+"""add recent assistants
+
+Revision ID: c0fd6e4da83a
+Revises: b72ed7a5db0e
+Create Date: 2024-11-03 17:28:54.916618
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "c0fd6e4da83a"
+down_revision = "b72ed7a5db0e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "recent_assistants")
--- a/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
+++ b/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
@@ -23,6 +23,56 @@ def upgrade() -> None:


 def downgrade() -> None:
+    # Delete chat messages and feedback first since they reference chat sessions
+    # Get chat messages from sessions with null persona_id
+    chat_messages_query = """
+        SELECT id
+        FROM chat_message
+        WHERE chat_session_id IN (
+            SELECT id
+            FROM chat_session
+            WHERE persona_id IS NULL
+        )
+    """
+
+    # Delete dependent records first
+    op.execute(
+        f"""
+        DELETE FROM document_retrieval_feedback
+        WHERE chat_message_id IN (
+            {chat_messages_query}
+        )
+    """
+    )
+    op.execute(
+        f"""
+        DELETE FROM chat_message__search_doc
+        WHERE chat_message_id IN (
+            {chat_messages_query}
+        )
+    """
+    )
+
+    # Delete chat messages
+    op.execute(
+        """
+        DELETE FROM chat_message
+        WHERE chat_session_id IN (
+            SELECT id
+            FROM chat_session
+            WHERE persona_id IS NULL
+        )
+    """
+    )
+
+    # Now we can safely delete the chat sessions
+    op.execute(
+        """
+        DELETE FROM chat_session
+        WHERE persona_id IS NULL
+    """
+    )
+
    op.alter_column(
        "chat_session",
        "persona_id",
--- a/backend/alembic/versions/dfbe9e93d3c7_extended_role_for_non_web.py
+++ b/backend/alembic/versions/dfbe9e93d3c7_extended_role_for_non_web.py
@@ -0,0 +1,42 @@
+"""extended_role_for_non_web
+
+Revision ID: dfbe9e93d3c7
+Revises: 9cf5c00f72fe
+Create Date: 2024-11-16 07:54:18.727906
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "dfbe9e93d3c7"
+down_revision = "9cf5c00f72fe"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        UPDATE "user"
+        SET role = 'EXT_PERM_USER'
+        WHERE has_web_login = false
+    """
+    )
+    op.drop_column("user", "has_web_login")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column("has_web_login", sa.Boolean(), nullable=False, server_default="true"),
+    )
+
+    op.execute(
+        """
+        UPDATE "user"
+        SET has_web_login = false,
+            role = 'BASIC'
+        WHERE role IN ('SLACK_USER', 'EXT_PERM_USER')
+    """
+    )
--- a/backend/danswer/access/models.py
+++ b/backend/danswer/access/models.py
@@ -16,6 +16,41 @@ class ExternalAccess:
    is_public: bool


+@dataclass(frozen=True)
+class DocExternalAccess:
+    external_access: ExternalAccess
+    # The document ID
+    doc_id: str
+
+    def to_dict(self) -> dict:
+        return {
+            "external_access": {
+                "external_user_emails": list(self.external_access.external_user_emails),
+                "external_user_group_ids": list(
+                    self.external_access.external_user_group_ids
+                ),
+                "is_public": self.external_access.is_public,
+            },
+            "doc_id": self.doc_id,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "DocExternalAccess":
+        external_access = ExternalAccess(
+            external_user_emails=set(
+                data["external_access"].get("external_user_emails", [])
+            ),
+            external_user_group_ids=set(
+                data["external_access"].get("external_user_group_ids", [])
+            ),
+            is_public=data["external_access"]["is_public"],
+        )
+        return cls(
+            external_access=external_access,
+            doc_id=data["doc_id"],
+        )
+
+
@dataclass(frozen=True)
 class DocumentAccess(ExternalAccess):
    # User emails for Danswer users, None indicates admin
--- a/backend/ee/danswer/auth/api_key.py
+++ b/backend/ee/danswer/auth/api_key.py
@@ -1,12 +1,14 @@
 import secrets
 import uuid
+from urllib.parse import quote
+from urllib.parse import unquote

 from fastapi import Request
 from passlib.hash import sha256_crypt
 from pydantic import BaseModel

 from danswer.auth.schemas import UserRole
-from ee.danswer.configs.app_configs import API_KEY_HASH_ROUNDS
+from danswer.configs.app_configs import API_KEY_HASH_ROUNDS


 _API_KEY_HEADER_NAME = "Authorization"
@@ -30,8 +32,35 @@ class ApiKeyDescriptor(BaseModel):
    user_id: uuid.UUID


-def generate_api_key() -> str:
-    return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN)
+def generate_api_key(tenant_id: str | None = None) -> str:
+    # For backwards compatibility, if no tenant_id, generate old style key
+    if not tenant_id:
+        return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN)
+
+    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID
+    return f"{_API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(_API_KEY_LEN)}"
+
+
+def extract_tenant_from_api_key_header(request: Request) -> str | None:
+    """Extract tenant ID from request. Returns None if auth is disabled or invalid format."""
+    raw_api_key_header = request.headers.get(
+        _API_KEY_HEADER_ALTERNATIVE_NAME
+    ) or request.headers.get(_API_KEY_HEADER_NAME)
+
+    if not raw_api_key_header or not raw_api_key_header.startswith(_BEARER_PREFIX):
+        return None
+
+    api_key = raw_api_key_header[len(_BEARER_PREFIX) :].strip()
+
+    if not api_key.startswith(_API_KEY_PREFIX):
+        return None
+
+    parts = api_key[len(_API_KEY_PREFIX) :].split(".", 1)
+    if len(parts) != 2:
+        return None
+
+    tenant_id = parts[0]
+    return unquote(tenant_id) if tenant_id else None


 def hash_api_key(api_key: str) -> str:
--- a/backend/danswer/auth/invited_users.py
+++ b/backend/danswer/auth/invited_users.py
@@ -2,8 +2,8 @@ from typing import cast

 from danswer.configs.constants import KV_USER_STORE_KEY
 from danswer.key_value_store.factory import get_kv_store
-from danswer.key_value_store.interface import JSON_ro
 from danswer.key_value_store.interface import KvKeyNotFoundError
+from danswer.utils.special_types import JSON_ro


 def get_invited_users() -> list[str]:
--- a/backend/danswer/auth/noauth_user.py
+++ b/backend/danswer/auth/noauth_user.py
@@ -23,7 +23,9 @@ def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
        )
        return UserPreferences(**preferences_data)
    except KvKeyNotFoundError:
-        return UserPreferences(chosen_assistants=None, default_model=None)
+        return UserPreferences(
+            chosen_assistants=None, default_model=None, auto_scroll=True
+        )


 def fetch_no_auth_user(store: KeyValueStore) -> UserInfo:
--- a/backend/danswer/auth/schemas.py
+++ b/backend/danswer/auth/schemas.py
@@ -13,12 +13,24 @@ class UserRole(str, Enum):
        groups they are curators of
    - Global Curator can perform admin actions
        for all groups they are a member of
+    - Limited can access a limited set of basic api endpoints
+    - Slack are users that have used danswer via slack but dont have a web login
+    - External permissioned users that have been picked up during the external permissions sync process but don't have a web login
    """

+    LIMITED = "limited"
    BASIC = "basic"
    ADMIN = "admin"
    CURATOR = "curator"
    GLOBAL_CURATOR = "global_curator"
+    SLACK_USER = "slack_user"
+    EXT_PERM_USER = "ext_perm_user"
+
+    def is_web_login(self) -> bool:
+        return self not in [
+            UserRole.SLACK_USER,
+            UserRole.EXT_PERM_USER,
+        ]


 class UserStatus(str, Enum):
@@ -33,10 +45,8 @@ class UserRead(schemas.BaseUser[uuid.UUID]):

 class UserCreate(schemas.BaseUserCreate):
    role: UserRole = UserRole.BASIC
-    has_web_login: bool | None = True
    tenant_id: str | None = None


 class UserUpdate(schemas.BaseUserUpdate):
    role: UserRole
-    has_web_login: bool | None = True
--- a/backend/danswer/auth/users.py
+++ b/backend/danswer/auth/users.py
@@ -48,11 +48,10 @@ from httpx_oauth.integrations.fastapi import OAuth2AuthorizeCallback
 from httpx_oauth.oauth2 import BaseOAuth2
 from httpx_oauth.oauth2 import OAuth2Token
 from pydantic import BaseModel
-from sqlalchemy import select
 from sqlalchemy import text
-from sqlalchemy.orm import attributes
-from sqlalchemy.orm import Session
+from sqlalchemy.ext.asyncio import AsyncSession

+from danswer.auth.api_key import get_hashed_api_key_from_request
 from danswer.auth.invited_users import get_invited_users
 from danswer.auth.schemas import UserCreate
 from danswer.auth.schemas import UserRole
@@ -75,32 +74,36 @@ from danswer.configs.constants import AuthType
 from danswer.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
 from danswer.configs.constants import DANSWER_API_KEY_PREFIX
 from danswer.configs.constants import UNNAMED_KEY_PLACEHOLDER
+from danswer.db.api_key import fetch_user_for_api_key
 from danswer.db.auth import get_access_token_db
 from danswer.db.auth import get_default_admin_user_emails
 from danswer.db.auth import get_user_count
 from danswer.db.auth import get_user_db
 from danswer.db.auth import SQLAlchemyUserAdminDB
+from danswer.db.engine import get_async_session
 from danswer.db.engine import get_async_session_with_tenant
-from danswer.db.engine import get_session
 from danswer.db.engine import get_session_with_tenant
-from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.models import AccessToken
 from danswer.db.models import OAuthAccount
 from danswer.db.models import User
-from danswer.db.models import UserTenantMapping
 from danswer.db.users import get_user_by_email
 from danswer.utils.logger import setup_logger
 from danswer.utils.telemetry import optional_telemetry
 from danswer.utils.telemetry import RecordType
+from danswer.utils.variable_functionality import fetch_ee_implementation_or_noop
 from danswer.utils.variable_functionality import fetch_versioned_implementation
-from shared_configs.configs import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.configs import async_return_default_schema
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
-
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()


+class BasicAuthenticationError(HTTPException):
+    def __init__(self, detail: str):
+        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
+
+
 def is_user_admin(user: User | None) -> bool:
    if AUTH_TYPE == AuthType.DISABLED:
        return True
@@ -190,20 +193,6 @@ def verify_email_domain(email: str) -> None:
            )


-def get_tenant_id_for_email(email: str) -> str:
-    if not MULTI_TENANT:
-        return POSTGRES_DEFAULT_SCHEMA
-    # Implement logic to get tenant_id from the mapping table
-    with Session(get_sqlalchemy_engine()) as db_session:
-        result = db_session.execute(
-            select(UserTenantMapping.tenant_id).where(UserTenantMapping.email == email)
-        )
-        tenant_id = result.scalar_one_or_none()
-    if tenant_id is None:
-        raise exceptions.UserNotExists()
-    return tenant_id
-
-
 def send_user_verification_email(
    user_email: str,
    token: str,
@@ -232,25 +221,26 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    reset_password_token_secret = USER_AUTH_SECRET
    verification_token_secret = USER_AUTH_SECRET

+    user_db: SQLAlchemyUserDatabase[User, uuid.UUID]
+
    async def create(
        self,
        user_create: schemas.UC | UserCreate,
        safe: bool = False,
        request: Optional[Request] = None,
    ) -> User:
-        try:
-            tenant_id = (
-                get_tenant_id_for_email(user_create.email)
-                if MULTI_TENANT
-                else POSTGRES_DEFAULT_SCHEMA
-            )
-        except exceptions.UserNotExists:
-            raise HTTPException(status_code=401, detail="User not found")
+        referral_source = None
+        if request is not None:
+            referral_source = request.cookies.get("referral_source", None)

-        if not tenant_id:
-            raise HTTPException(
-                status_code=401, detail="User does not belong to an organization"
-            )
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "danswer.server.tenants.provisioning",
+            "get_or_create_tenant_id",
+            async_return_default_schema,
+        )(
+            email=user_create.email,
+            referral_source=referral_source,
+        )

        async with get_async_session_with_tenant(tenant_id) as db_session:
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@@ -258,7 +248,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            verify_email_is_invited(user_create.email)
            verify_email_domain(user_create.email)
            if MULTI_TENANT:
-                tenant_user_db = SQLAlchemyUserAdminDB(db_session, User, OAuthAccount)
+                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
+                    db_session, User, OAuthAccount
+                )
                self.user_db = tenant_user_db
                self.database = tenant_user_db

@@ -271,20 +263,15 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                    user_create.role = UserRole.ADMIN
                else:
                    user_create.role = UserRole.BASIC
-            user = None
+
            try:
                user = await super().create(user_create, safe=safe, request=request)  # type: ignore
            except exceptions.UserAlreadyExists:
                user = await self.get_by_email(user_create.email)
                # Handle case where user has used product outside of web and is now creating an account through web
-                if (
-                    not user.has_web_login
-                    and hasattr(user_create, "has_web_login")
-                    and user_create.has_web_login
-                ):
+                if not user.role.is_web_login() and user_create.role.is_web_login():
                    user_update = UserUpdate(
                        password=user_create.password,
-                        has_web_login=True,
                        role=user_create.role,
                        is_verified=user_create.is_verified,
                    )
@@ -292,11 +279,13 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                else:
                    raise exceptions.UserAlreadyExists()

-            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+            finally:
+                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
            return user

    async def oauth_callback(
-        self: "BaseUserManager[models.UOAP, models.ID]",
+        self,
        oauth_name: str,
        access_token: str,
        account_id: str,
@@ -307,20 +296,24 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        *,
        associate_by_email: bool = False,
        is_verified_by_default: bool = False,
-    ) -> models.UOAP:
-        # Get tenant_id from mapping table
-        try:
-            tenant_id = (
-                get_tenant_id_for_email(account_email)
-                if MULTI_TENANT
-                else POSTGRES_DEFAULT_SCHEMA
-            )
-        except exceptions.UserNotExists:
-            raise HTTPException(status_code=401, detail="User not found")
+    ) -> User:
+        referral_source = None
+        if request:
+            referral_source = getattr(request.state, "referral_source", None)
+
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "danswer.server.tenants.provisioning",
+            "get_or_create_tenant_id",
+            async_return_default_schema,
+        )(
+            email=account_email,
+            referral_source=referral_source,
+        )

        if not tenant_id:
            raise HTTPException(status_code=401, detail="User not found")

+        # Proceed with the tenant context
        token = None
        async with get_async_session_with_tenant(tenant_id) as db_session:
            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
@@ -329,9 +322,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            verify_email_domain(account_email)

            if MULTI_TENANT:
-                tenant_user_db = SQLAlchemyUserAdminDB(db_session, User, OAuthAccount)
+                tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
+                    db_session, User, OAuthAccount
+                )
                self.user_db = tenant_user_db
-                self.database = tenant_user_db  # type: ignore
+                self.database = tenant_user_db

            oauth_account_dict = {
                "oauth_name": oauth_name,
@@ -371,9 +366,9 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                    # Explicitly set the Postgres schema for this session to ensure
                    # OAuth account creation happens in the correct tenant schema
                    await db_session.execute(text(f'SET search_path = "{tenant_id}"'))
-                    user = await self.user_db.add_oauth_account(
-                        user, oauth_account_dict
-                    )
+
+                    # Add OAuth account
+                    await self.user_db.add_oauth_account(user, oauth_account_dict)
                    await self.on_after_register(user, request)

            else:
@@ -383,7 +378,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                        and existing_oauth_account.oauth_name == oauth_name
                    ):
                        user = await self.user_db.update_oauth_account(
-                            user, existing_oauth_account, oauth_account_dict
+                            user,
+                            # NOTE: OAuthAccount DOES implement the OAuthAccountProtocol
+                            # but the type checker doesn't know that :(
+                            existing_oauth_account,  # type: ignore
+                            oauth_account_dict,
                        )

            # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to
@@ -396,16 +395,15 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                )

            # Handle case where user has used product outside of web and is now creating an account through web
-            if not user.has_web_login:  # type: ignore
+            if not user.role.is_web_login():
                await self.user_db.update(
                    user,
                    {
                        "is_verified": is_verified_by_default,
-                        "has_web_login": True,
+                        "role": UserRole.BASIC,
                    },
                )
                user.is_verified = is_verified_by_default
-                user.has_web_login = True  # type: ignore

            # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false`
            # otherwise, the oidc expiry will always be old, and the user will never be able to login
@@ -453,7 +451,13 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        email = credentials.username

        # Get tenant_id from mapping table
-        tenant_id = get_tenant_id_for_email(email)
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "danswer.server.tenants.provisioning",
+            "get_or_create_tenant_id",
+            async_return_default_schema,
+        )(
+            email=email,
+        )
        if not tenant_id:
            # User not found in mapping
            self.password_helper.hash(credentials.password)
@@ -474,11 +478,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                self.password_helper.hash(credentials.password)
                return None

-            has_web_login = attributes.get_attribute(user, "has_web_login")
-
-            if not has_web_login:
-                raise HTTPException(
-                    status_code=status.HTTP_403_FORBIDDEN,
+            if not user.role.is_web_login():
+                raise BasicAuthenticationError(
                    detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
                )

@@ -510,19 +511,30 @@ cookie_transport = CookieTransport(

 # This strategy is used to add tenant_id to the JWT token
 class TenantAwareJWTStrategy(JWTStrategy):
-    async def write_token(self, user: User) -> str:
-        tenant_id = get_tenant_id_for_email(user.email)
+    async def _create_token_data(self, user: User, impersonate: bool = False) -> dict:
+        tenant_id = await fetch_ee_implementation_or_noop(
+            "danswer.server.tenants.provisioning",
+            "get_or_create_tenant_id",
+            async_return_default_schema,
+        )(
+            email=user.email,
+        )
+
        data = {
            "sub": str(user.id),
            "aud": self.token_audience,
            "tenant_id": tenant_id,
        }
+        return data
+
+    async def write_token(self, user: User) -> str:
+        data = await self._create_token_data(user)
        return generate_jwt(
            data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm
        )


-def get_jwt_strategy() -> JWTStrategy:
+def get_jwt_strategy() -> TenantAwareJWTStrategy:
    return TenantAwareJWTStrategy(
        secret=USER_AUTH_SECRET,
        lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,
@@ -597,7 +609,7 @@ optional_fastapi_current_user = fastapi_users.current_user(active=True, optional
 async def optional_user_(
    request: Request,
    user: User | None,
-    db_session: Session,
+    async_db_session: AsyncSession,
 ) -> User | None:
    """NOTE: `request` and `db_session` are not used here, but are included
    for the EE version of this function."""
@@ -606,13 +618,21 @@ async def optional_user_(

 async def optional_user(
    request: Request,
-    db_session: Session = Depends(get_session),
+    async_db_session: AsyncSession = Depends(get_async_session),
    user: User | None = Depends(optional_fastapi_current_user),
 ) -> User | None:
    versioned_fetch_user = fetch_versioned_implementation(
        "danswer.auth.users", "optional_user_"
    )
-    return await versioned_fetch_user(request, user, db_session)
+    user = await versioned_fetch_user(request, user, async_db_session)
+
+    # check if an API key is present
+    if user is None:
+        hashed_api_key = get_hashed_api_key_from_request(request)
+        if hashed_api_key:
+            user = await fetch_user_for_api_key(hashed_api_key, async_db_session)
+
+    return user


 async def double_check_user(
@@ -624,14 +644,12 @@ async def double_check_user(
        return None

    if user is None:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User is not authenticated.",
        )

    if user_needs_to_be_verified() and not user.is_verified:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User is not verified.",
        )

@@ -640,8 +658,7 @@ async def double_check_user(
        and user.oidc_expiry < datetime.now(timezone.utc)
        and not include_expired
    ):
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User's OIDC token has expired.",
        )

@@ -654,12 +671,26 @@ async def current_user_with_expired_token(
    return await double_check_user(user, include_expired=True)


-async def current_user(
+async def current_limited_user(
    user: User | None = Depends(optional_user),
 ) -> User | None:
    return await double_check_user(user)


+async def current_user(
+    user: User | None = Depends(optional_user),
+) -> User | None:
+    user = await double_check_user(user)
+    if not user:
+        return None
+
+    if user.role == UserRole.LIMITED:
+        raise BasicAuthenticationError(
+            detail="Access denied. User role is LIMITED. BASIC or higher permissions are required.",
+        )
+    return user
+
+
 async def current_curator_or_admin_user(
    user: User | None = Depends(current_user),
 ) -> User | None:
@@ -667,15 +698,13 @@ async def current_curator_or_admin_user(
        return None

    if not user or not hasattr(user, "role"):
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User is not authenticated or lacks role information.",
        )

    allowed_roles = {UserRole.GLOBAL_CURATOR, UserRole.CURATOR, UserRole.ADMIN}
    if user.role not in allowed_roles:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User is not a curator or admin.",
        )

@@ -687,8 +716,7 @@ async def current_admin_user(user: User | None = Depends(current_user)) -> User
        return None

    if not user or not hasattr(user, "role") or user.role != UserRole.ADMIN:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
+        raise BasicAuthenticationError(
            detail="Access denied. User must be an admin to perform this action.",
        )

@@ -716,8 +744,6 @@ def generate_state_token(


 # refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
-
-
 def create_danswer_oauth_router(
    oauth_client: BaseOAuth2,
    backend: AuthenticationBackend,
@@ -767,15 +793,22 @@ def get_oauth_router(
        response_model=OAuth2AuthorizeResponse,
    )
    async def authorize(
-        request: Request, scopes: List[str] = Query(None)
+        request: Request,
+        scopes: List[str] = Query(None),
    ) -> OAuth2AuthorizeResponse:
+        referral_source = request.cookies.get("referral_source", None)
+
        if redirect_url is not None:
            authorize_redirect_url = redirect_url
        else:
            authorize_redirect_url = str(request.url_for(callback_route_name))

        next_url = request.query_params.get("next", "/")
-        state_data: Dict[str, str] = {"next_url": next_url}
+
+        state_data: Dict[str, str] = {
+            "next_url": next_url,
+            "referral_source": referral_source or "default_referral",
+        }
        state = generate_state_token(state_data, state_secret)
        authorization_url = await oauth_client.get_authorization_url(
            authorize_redirect_url,
@@ -834,8 +867,11 @@ def get_oauth_router(
            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)

        next_url = state_data.get("next_url", "/")
+        referral_source = state_data.get("referral_source", None)

-        # Authenticate user
+        request.state.referral_source = referral_source
+
+        # Proceed to authenticate or create the user
        try:
            user = await user_manager.oauth_callback(
                oauth_client.name,
@@ -877,7 +913,25 @@ def get_oauth_router(
            redirect_response.status_code = response.status_code
        if hasattr(response, "media_type"):
            redirect_response.media_type = response.media_type
-
        return redirect_response

    return router
+
+
+async def api_key_dep(
+    request: Request, async_db_session: AsyncSession = Depends(get_async_session)
+) -> User | None:
+    if AUTH_TYPE == AuthType.DISABLED:
+        return None
+
+    hashed_api_key = get_hashed_api_key_from_request(request)
+    if not hashed_api_key:
+        raise HTTPException(status_code=401, detail="Missing API key")
+
+    if hashed_api_key:
+        user = await fetch_user_for_api_key(hashed_api_key, async_db_session)
+
+    if user is None:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+
+    return user
--- a/backend/danswer/background/celery/apps/app_base.py
+++ b/backend/danswer/background/celery/apps/app_base.py
@@ -3,6 +3,7 @@ import multiprocessing
 import time
 from typing import Any

+import requests
 import sentry_sdk
 from celery import Task
 from celery.app import trace
@@ -11,18 +12,24 @@ from celery.states import READY_STATES
 from celery.utils.log import get_task_logger
 from celery.worker import strategy  # type: ignore
 from sentry_sdk.integrations.celery import CeleryIntegration
+from sqlalchemy import text
+from sqlalchemy.orm import Session

 from danswer.background.celery.apps.task_formatters import CeleryTaskColoredFormatter
 from danswer.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
-from danswer.background.celery.celery_redis import RedisConnectorCredentialPair
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisDocumentSet
-from danswer.background.celery.celery_redis import RedisUserGroup
 from danswer.background.celery.celery_utils import celery_is_worker_primary
 from danswer.configs.constants import DanswerRedisLocks
-from danswer.db.engine import get_all_tenant_ids
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.document_index.vespa_constants import VESPA_CONFIG_SERVER_URL
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
+from danswer.redis.redis_connector_delete import RedisConnectorDelete
+from danswer.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from danswer.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
+from danswer.redis.redis_connector_prune import RedisConnectorPrune
+from danswer.redis.redis_document_set import RedisDocumentSet
 from danswer.redis.redis_pool import get_redis_client
+from danswer.redis.redis_usergroup import RedisUserGroup
 from danswer.utils.logger import ColoredFormatter
 from danswer.utils.logger import PlainFormatter
 from danswer.utils.logger import setup_logger
@@ -108,29 +115,43 @@ def on_task_postrun(
    if task_id.startswith(RedisDocumentSet.PREFIX):
        document_set_id = RedisDocumentSet.get_id_from_task_id(task_id)
        if document_set_id is not None:
-            rds = RedisDocumentSet(int(document_set_id))
+            rds = RedisDocumentSet(tenant_id, int(document_set_id))
            r.srem(rds.taskset_key, task_id)
        return

    if task_id.startswith(RedisUserGroup.PREFIX):
        usergroup_id = RedisUserGroup.get_id_from_task_id(task_id)
        if usergroup_id is not None:
-            rug = RedisUserGroup(int(usergroup_id))
+            rug = RedisUserGroup(tenant_id, int(usergroup_id))
            r.srem(rug.taskset_key, task_id)
        return

-    if task_id.startswith(RedisConnectorDeletion.PREFIX):
-        cc_pair_id = RedisConnectorDeletion.get_id_from_task_id(task_id)
+    if task_id.startswith(RedisConnectorDelete.PREFIX):
+        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
-            rcd = RedisConnectorDeletion(int(cc_pair_id))
-            r.srem(rcd.taskset_key, task_id)
+            RedisConnectorDelete.remove_from_taskset(int(cc_pair_id), task_id, r)
        return

-    if task_id.startswith(RedisConnectorPruning.SUBTASK_PREFIX):
-        cc_pair_id = RedisConnectorPruning.get_id_from_task_id(task_id)
+    if task_id.startswith(RedisConnectorPrune.SUBTASK_PREFIX):
+        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
        if cc_pair_id is not None:
-            rcp = RedisConnectorPruning(int(cc_pair_id))
-            r.srem(rcp.taskset_key, task_id)
+            RedisConnectorPrune.remove_from_taskset(int(cc_pair_id), task_id, r)
+        return
+
+    if task_id.startswith(RedisConnectorPermissionSync.SUBTASK_PREFIX):
+        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
+        if cc_pair_id is not None:
+            RedisConnectorPermissionSync.remove_from_taskset(
+                int(cc_pair_id), task_id, r
+            )
+        return
+
+    if task_id.startswith(RedisConnectorExternalGroupSync.SUBTASK_PREFIX):
+        cc_pair_id = RedisConnector.get_id_from_task_id(task_id)
+        if cc_pair_id is not None:
+            RedisConnectorExternalGroupSync.remove_from_taskset(
+                int(cc_pair_id), task_id, r
+            )
        return


@@ -140,77 +161,154 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None


 def wait_for_redis(sender: Any, **kwargs: Any) -> None:
+    """Waits for redis to become ready subject to a hardcoded timeout.
+    Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""
+
    r = get_redis_client(tenant_id=None)

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60

+    ready = False
    time_start = time.monotonic()
-    logger.info("Redis: Readiness check starting.")
+    logger.info("Redis: Readiness probe starting.")
    while True:
        try:
            if r.ping():
+                ready = True
                break
        except Exception:
            pass

        time_elapsed = time.monotonic() - time_start
-        logger.info(
-            f"Redis: Ping failed. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
-        )
        if time_elapsed > WAIT_LIMIT:
-            msg = (
-                f"Redis: Readiness check did not succeed within the timeout "
-                f"({WAIT_LIMIT} seconds). Exiting..."
-            )
-            logger.error(msg)
-            raise WorkerShutdown(msg)
+            break
+
+        logger.info(
+            f"Redis: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
+        )

        time.sleep(WAIT_INTERVAL)

-    logger.info("Redis: Readiness check succeeded. Continuing...")
+    if not ready:
+        msg = (
+            f"Redis: Readiness probe did not succeed within the timeout "
+            f"({WAIT_LIMIT} seconds). Exiting..."
+        )
+        logger.error(msg)
+        raise WorkerShutdown(msg)
+
+    logger.info("Redis: Readiness probe succeeded. Continuing...")
+    return
+
+
+def wait_for_db(sender: Any, **kwargs: Any) -> None:
+    """Waits for the db to become ready subject to a hardcoded timeout.
+    Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""
+
+    WAIT_INTERVAL = 5
+    WAIT_LIMIT = 60
+
+    ready = False
+    time_start = time.monotonic()
+    logger.info("Database: Readiness probe starting.")
+    while True:
+        try:
+            with Session(get_sqlalchemy_engine()) as db_session:
+                result = db_session.execute(text("SELECT NOW()")).scalar()
+                if result:
+                    ready = True
+                    break
+        except Exception:
+            pass
+
+        time_elapsed = time.monotonic() - time_start
+        if time_elapsed > WAIT_LIMIT:
+            break
+
+        logger.info(
+            f"Database: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
+        )
+
+        time.sleep(WAIT_INTERVAL)
+
+    if not ready:
+        msg = (
+            f"Database: Readiness probe did not succeed within the timeout "
+            f"({WAIT_LIMIT} seconds). Exiting..."
+        )
+        logger.error(msg)
+        raise WorkerShutdown(msg)
+
+    logger.info("Database: Readiness probe succeeded. Continuing...")
+    return
+
+
+def wait_for_vespa(sender: Any, **kwargs: Any) -> None:
+    """Waits for Vespa to become ready subject to a hardcoded timeout.
+    Will raise WorkerShutdown to kill the celery worker if the timeout is reached."""
+
+    WAIT_INTERVAL = 5
+    WAIT_LIMIT = 60
+
+    ready = False
+    time_start = time.monotonic()
+    logger.info("Vespa: Readiness probe starting.")
+    while True:
+        try:
+            response = requests.get(f"{VESPA_CONFIG_SERVER_URL}/state/v1/health")
+            response.raise_for_status()
+
+            response_dict = response.json()
+            if response_dict["status"]["code"] == "up":
+                ready = True
+                break
+        except Exception:
+            pass
+
+        time_elapsed = time.monotonic() - time_start
+        if time_elapsed > WAIT_LIMIT:
+            break
+
+        logger.info(
+            f"Vespa: Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
+        )
+
+        time.sleep(WAIT_INTERVAL)
+
+    if not ready:
+        msg = (
+            f"Vespa: Readiness probe did not succeed within the timeout "
+            f"({WAIT_LIMIT} seconds). Exiting..."
+        )
+        logger.error(msg)
+        raise WorkerShutdown(msg)
+
+    logger.info("Vespa: Readiness probe succeeded. Continuing...")
    return


 def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
+    logger.info("Running as a secondary celery worker.")
+
+    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
-
-    logger.info("Running as a secondary celery worker.")
-    logger.info("Waiting for all tenant primary workers to be ready...")
+    r = get_redis_client(tenant_id=None)
    time_start = time.monotonic()

+    logger.info("Waiting for primary worker to be ready...")
    while True:
-        tenant_ids = get_all_tenant_ids()
-        # Check if we have a primary worker lock for each tenant
-        all_tenants_ready = all(
-            get_redis_client(tenant_id=tenant_id).exists(
-                DanswerRedisLocks.PRIMARY_WORKER
-            )
-            for tenant_id in tenant_ids
-        )
-
-        if all_tenants_ready:
+        if r.exists(DanswerRedisLocks.PRIMARY_WORKER):
            break

        time_elapsed = time.monotonic() - time_start
-        ready_tenants = sum(
-            1
-            for tenant_id in tenant_ids
-            if get_redis_client(tenant_id=tenant_id).exists(
-                DanswerRedisLocks.PRIMARY_WORKER
-            )
-        )
-
        logger.info(
-            f"Not all tenant primary workers are ready yet. "
-            f"Ready tenants: {ready_tenants}/{len(tenant_ids)} "
-            f"elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
+            f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}"
        )
-
        if time_elapsed > WAIT_LIMIT:
            msg = (
-                f"Not all tenant primary workers were ready within the timeout "
+                f"Primary worker was not ready within the timeout. "
                f"({WAIT_LIMIT} seconds). Exiting..."
            )
            logger.error(msg)
@@ -218,7 +316,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:

        time.sleep(WAIT_INTERVAL)

-    logger.info("All tenant primary workers are ready. Continuing...")
+    logger.info("Wait for primary worker completed successfully. Continuing...")
    return


@@ -230,26 +328,20 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    if not celery_is_worker_primary(sender):
        return

-    if not hasattr(sender, "primary_worker_locks"):
+    if not sender.primary_worker_lock:
        return

-    for tenant_id, lock in sender.primary_worker_locks.items():
-        try:
-            if lock and lock.owned():
-                logger.debug(f"Attempting to release lock for tenant {tenant_id}")
-                try:
-                    lock.release()
-                    logger.debug(f"Successfully released lock for tenant {tenant_id}")
-                except Exception as e:
-                    logger.error(
-                        f"Failed to release lock for tenant {tenant_id}. Error: {str(e)}"
-                    )
-                finally:
-                    sender.primary_worker_locks[tenant_id] = None
-        except Exception as e:
-            logger.error(
-                f"Error checking lock status for tenant {tenant_id}. Error: {str(e)}"
-            )
+    logger.info("Releasing primary worker lock.")
+    lock = sender.primary_worker_lock
+    try:
+        if lock.owned():
+            try:
+                lock.release()
+                sender.primary_worker_lock = None
+            except Exception as e:
+                logger.error(f"Failed to release primary worker lock: {e}")
+    except Exception as e:
+        logger.error(f"Failed to check if primary worker lock is owned: {e}")


 def on_setup_logging(
--- a/backend/danswer/background/celery/apps/beat.py
+++ b/backend/danswer/background/celery/apps/beat.py
@@ -3,28 +3,162 @@ from typing import Any

 from celery import Celery
 from celery import signals
+from celery.beat import PersistentScheduler  # type: ignore
 from celery.signals import beat_init

 import danswer.background.celery.apps.app_base as app_base
-from danswer.configs.constants import DanswerCeleryPriority
 from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
 from danswer.db.engine import get_all_tenant_ids
 from danswer.db.engine import SqlEngine
 from danswer.utils.logger import setup_logger
+from danswer.utils.variable_functionality import fetch_versioned_implementation
+from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
+from shared_configs.configs import MULTI_TENANT

-logger = setup_logger()
+logger = setup_logger(__name__)

 celery_app = Celery(__name__)
 celery_app.config_from_object("danswer.background.celery.configs.beat")


+class DynamicTenantScheduler(PersistentScheduler):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        logger.info("Initializing DynamicTenantScheduler")
+        super().__init__(*args, **kwargs)
+        self._reload_interval = timedelta(minutes=2)
+        self._last_reload = self.app.now() - self._reload_interval
+        # Let the parent class handle store initialization
+        self.setup_schedule()
+        self._update_tenant_tasks()
+        logger.info(f"Set reload interval to {self._reload_interval}")
+
+    def setup_schedule(self) -> None:
+        logger.info("Setting up initial schedule")
+        super().setup_schedule()
+        logger.info("Initial schedule setup complete")
+
+    def tick(self) -> float:
+        retval = super().tick()
+        now = self.app.now()
+        if (
+            self._last_reload is None
+            or (now - self._last_reload) > self._reload_interval
+        ):
+            logger.info("Reload interval reached, initiating tenant task update")
+            self._update_tenant_tasks()
+            self._last_reload = now
+            logger.info("Tenant task update completed, reset reload timer")
+        return retval
+
+    def _update_tenant_tasks(self) -> None:
+        logger.info("Starting tenant task update process")
+        try:
+            logger.info("Fetching all tenant IDs")
+            tenant_ids = get_all_tenant_ids()
+            logger.info(f"Found {len(tenant_ids)} tenants")
+
+            logger.info("Fetching tasks to schedule")
+            tasks_to_schedule = fetch_versioned_implementation(
+                "danswer.background.celery.tasks.beat_schedule", "get_tasks_to_schedule"
+            )
+
+            new_beat_schedule: dict[str, dict[str, Any]] = {}
+
+            current_schedule = self.schedule.items()
+
+            existing_tenants = set()
+            for task_name, _ in current_schedule:
+                if "-" in task_name:
+                    existing_tenants.add(task_name.split("-")[-1])
+            logger.info(f"Found {len(existing_tenants)} existing tenants in schedule")
+
+            for tenant_id in tenant_ids:
+                if (
+                    IGNORED_SYNCING_TENANT_LIST
+                    and tenant_id in IGNORED_SYNCING_TENANT_LIST
+                ):
+                    logger.info(
+                        f"Skipping tenant {tenant_id} as it is in the ignored syncing list"
+                    )
+                    continue
+
+                if tenant_id not in existing_tenants:
+                    logger.info(f"Processing new tenant: {tenant_id}")
+
+                for task in tasks_to_schedule():
+                    task_name = f"{task['name']}-{tenant_id}"
+                    logger.debug(f"Creating task configuration for {task_name}")
+                    new_task = {
+                        "task": task["task"],
+                        "schedule": task["schedule"],
+                        "kwargs": {"tenant_id": tenant_id},
+                    }
+                    if options := task.get("options"):
+                        logger.debug(f"Adding options to task {task_name}: {options}")
+                        new_task["options"] = options
+                    new_beat_schedule[task_name] = new_task
+
+            if self._should_update_schedule(current_schedule, new_beat_schedule):
+                logger.info(
+                    "Schedule update required",
+                    extra={
+                        "new_tasks": len(new_beat_schedule),
+                        "current_tasks": len(current_schedule),
+                    },
+                )
+
+                # Create schedule entries
+                entries = {}
+                for name, entry in new_beat_schedule.items():
+                    entries[name] = self.Entry(
+                        name=name,
+                        app=self.app,
+                        task=entry["task"],
+                        schedule=entry["schedule"],
+                        options=entry.get("options", {}),
+                        kwargs=entry.get("kwargs", {}),
+                    )
+
+                # Update the schedule using the scheduler's methods
+                self.schedule.clear()
+                self.schedule.update(entries)
+
+                # Ensure changes are persisted
+                self.sync()
+
+                logger.info("Schedule update completed successfully")
+            else:
+                logger.info("Schedule is up to date, no changes needed")
+
+        except (AttributeError, KeyError):
+            logger.exception("Failed to process task configuration")
+        except Exception:
+            logger.exception("Unexpected error updating tenant tasks")
+
+    def _should_update_schedule(
+        self, current_schedule: dict, new_schedule: dict
+    ) -> bool:
+        """Compare schedules to determine if an update is needed."""
+        logger.debug("Comparing current and new schedules")
+        current_tasks = set(name for name, _ in current_schedule)
+        new_tasks = set(new_schedule.keys())
+        needs_update = current_tasks != new_tasks
+        logger.debug(f"Schedule update needed: {needs_update}")
+        return needs_update
+
+
@beat_init.connect
 def on_beat_init(sender: Any, **kwargs: Any) -> None:
    logger.info("beat_init signal received.")

-    # celery beat shouldn't touch the db at all. But just setting a low minimum here.
+    # Celery beat shouldn't touch the db at all. But just setting a low minimum here.
    SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)
    SqlEngine.init_engine(pool_size=2, max_overflow=0)
+
+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return
+
    app_base.wait_for_redis(sender, **kwargs)


@@ -35,68 +169,4 @@ def on_setup_logging(
    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)


-#####
-# Celery Beat (Periodic Tasks) Settings
-#####
-
-tenant_ids = get_all_tenant_ids()
-
-tasks_to_schedule = [
-    {
-        "name": "check-for-vespa-sync",
-        "task": "check_for_vespa_sync_task",
-        "schedule": timedelta(seconds=5),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-connector-deletion",
-        "task": "check_for_connector_deletion_task",
-        "schedule": timedelta(seconds=60),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-indexing",
-        "task": "check_for_indexing",
-        "schedule": timedelta(seconds=10),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-prune",
-        "task": "check_for_pruning",
-        "schedule": timedelta(seconds=10),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "kombu-message-cleanup",
-        "task": "kombu_message_cleanup_task",
-        "schedule": timedelta(seconds=3600),
-        "options": {"priority": DanswerCeleryPriority.LOWEST},
-    },
-    {
-        "name": "monitor-vespa-sync",
-        "task": "monitor_vespa_sync",
-        "schedule": timedelta(seconds=5),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-]
-
-
-# Build the celery beat schedule dynamically
-beat_schedule = {}
-
-for tenant_id in tenant_ids:
-    for task in tasks_to_schedule:
-        task_name = f"{task['name']}-{tenant_id}"  # Unique name for each scheduled task
-        beat_schedule[task_name] = {
-            "task": task["task"],
-            "schedule": task["schedule"],
-            "options": task["options"],
-            "kwargs": {"tenant_id": tenant_id},  # Must pass tenant_id as an argument
-        }
-
-# Include any existing beat schedules
-existing_beat_schedule = celery_app.conf.beat_schedule or {}
-beat_schedule.update(existing_beat_schedule)
-
-# Update the Celery app configuration once
-celery_app.conf.beat_schedule = beat_schedule
+celery_app.conf.beat_scheduler = DynamicTenantScheduler
--- a/backend/danswer/background/celery/apps/heavy.py
+++ b/backend/danswer/background/celery/apps/heavy.py
@@ -13,6 +13,7 @@ import danswer.background.celery.apps.app_base as app_base
 from danswer.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME
 from danswer.db.engine import SqlEngine
 from danswer.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT


 logger = setup_logger()
@@ -60,7 +61,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)
    SqlEngine.init_engine(pool_size=4, max_overflow=12)

+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return
+
    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)
    app_base.on_secondary_worker_init(sender, **kwargs)


@@ -84,5 +91,7 @@ def on_setup_logging(
 celery_app.autodiscover_tasks(
    [
        "danswer.background.celery.tasks.pruning",
+        "danswer.background.celery.tasks.doc_permission_syncing",
+        "danswer.background.celery.tasks.external_group_syncing",
    ]
 )
--- a/backend/danswer/background/celery/apps/indexing.py
+++ b/backend/danswer/background/celery/apps/indexing.py
@@ -6,6 +6,7 @@ from celery import signals
 from celery import Task
 from celery.signals import celeryd_init
 from celery.signals import worker_init
+from celery.signals import worker_process_init
 from celery.signals import worker_ready
 from celery.signals import worker_shutdown

@@ -13,6 +14,7 @@ import danswer.background.celery.apps.app_base as app_base
 from danswer.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_APP_NAME
 from danswer.db.engine import SqlEngine
 from danswer.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT


 logger = setup_logger()
@@ -58,9 +60,15 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_APP_NAME)
-    SqlEngine.init_engine(pool_size=8, max_overflow=0)
+    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=sender.concurrency)
+
+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return

    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)
    app_base.on_secondary_worker_init(sender, **kwargs)


@@ -74,6 +82,11 @@ def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
    app_base.on_worker_shutdown(sender, **kwargs)


+@worker_process_init.connect
+def init_worker(**kwargs: Any) -> None:
+    SqlEngine.reset_engine()
+
+
@signals.setup_logging.connect
 def on_setup_logging(
    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
--- a/backend/danswer/background/celery/apps/light.py
+++ b/backend/danswer/background/celery/apps/light.py
@@ -13,6 +13,7 @@ import danswer.background.celery.apps.app_base as app_base
 from danswer.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME
 from danswer.db.engine import SqlEngine
 from danswer.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT


 logger = setup_logger()
@@ -59,8 +60,13 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:

    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
    SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8)
+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return

    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)
    app_base.on_secondary_worker_init(sender, **kwargs)


@@ -85,5 +91,7 @@ celery_app.autodiscover_tasks(
    [
        "danswer.background.celery.tasks.shared",
        "danswer.background.celery.tasks.vespa",
+        "danswer.background.celery.tasks.connector_deletion",
+        "danswer.background.celery.tasks.doc_permission_syncing",
    ]
 )
--- a/backend/danswer/background/celery/apps/primary.py
+++ b/backend/danswer/background/celery/apps/primary.py
@@ -1,5 +1,6 @@
 import multiprocessing
 from typing import Any
+from typing import cast

 from celery import bootsteps  # type: ignore
 from celery import Celery
@@ -13,21 +14,29 @@ from celery.signals import worker_shutdown

 import danswer.background.celery.apps.app_base as app_base
 from danswer.background.celery.apps.app_base import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorCredentialPair
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorIndexing
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisConnectorStop
-from danswer.background.celery.celery_redis import RedisDocumentSet
-from danswer.background.celery.celery_redis import RedisUserGroup
 from danswer.background.celery.celery_utils import celery_is_worker_primary
+from danswer.background.celery.tasks.indexing.tasks import (
+    get_unfenced_index_attempt_ids,
+)
 from danswer.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
 from danswer.configs.constants import DanswerRedisLocks
 from danswer.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME
-from danswer.db.engine import get_all_tenant_ids
+from danswer.db.engine import get_session_with_default_tenant
 from danswer.db.engine import SqlEngine
+from danswer.db.index_attempt import get_index_attempt
+from danswer.db.index_attempt import mark_attempt_canceled
+from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
+from danswer.redis.redis_connector_delete import RedisConnectorDelete
+from danswer.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from danswer.redis.redis_connector_ext_group_sync import RedisConnectorExternalGroupSync
+from danswer.redis.redis_connector_index import RedisConnectorIndex
+from danswer.redis.redis_connector_prune import RedisConnectorPrune
+from danswer.redis.redis_connector_stop import RedisConnectorStop
+from danswer.redis.redis_document_set import RedisDocumentSet
 from danswer.redis.redis_pool import get_redis_client
+from danswer.redis.redis_usergroup import RedisUserGroup
 from danswer.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT


 logger = setup_logger()
@@ -75,95 +84,94 @@ def on_worker_init(sender: Any, **kwargs: Any) -> None:
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)
    SqlEngine.init_engine(pool_size=8, max_overflow=0)

+    # Startup checks are not needed in multi-tenant case
+    if MULTI_TENANT:
+        return
+
    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa(sender, **kwargs)

    logger.info("Running as the primary celery worker.")

-    sender.primary_worker_locks = {}
-
    # This is singleton work that should be done on startup exactly once
-    # by the primary worker
-    tenant_ids = get_all_tenant_ids()
-    for tenant_id in tenant_ids:
-        r = get_redis_client(tenant_id=tenant_id)
+    # by the primary worker. This is unnecessary in the multi tenant scenario
+    r = get_redis_client(tenant_id=None)

-        # For the moment, we're assuming that we are the only primary worker
-        # that should be running.
-        # TODO: maybe check for or clean up another zombie primary worker if we detect it
-        r.delete(DanswerRedisLocks.PRIMARY_WORKER)
+    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
+    info: dict[str, Any] = cast(dict, r.info("replication"))
+    role: str = cast(str, info.get("role"))
+    connected_slaves: int = info.get("connected_slaves", 0)

-        # this process wide lock is taken to help other workers start up in order.
-        # it is planned to use this lock to enforce singleton behavior on the primary
-        # worker, since the primary worker does redis cleanup on startup, but this isn't
-        # implemented yet.
-        lock = r.lock(
-            DanswerRedisLocks.PRIMARY_WORKER,
-            timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
-        )
+    logger.info(
+        f"Redis INFO REPLICATION: role={role} connected_slaves={connected_slaves}"
+    )

-        logger.info("Primary worker lock: Acquire starting.")
-        acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2)
-        if acquired:
-            logger.info("Primary worker lock: Acquire succeeded.")
-        else:
-            logger.error("Primary worker lock: Acquire failed!")
-            raise WorkerShutdown("Primary worker lock could not be acquired!")
+    # For the moment, we're assuming that we are the only primary worker
+    # that should be running.
+    # TODO: maybe check for or clean up another zombie primary worker if we detect it
+    r.delete(DanswerRedisLocks.PRIMARY_WORKER)

-        # tacking on our own user data to the sender
-        sender.primary_worker_locks[tenant_id] = lock
+    # this process wide lock is taken to help other workers start up in order.
+    # it is planned to use this lock to enforce singleton behavior on the primary
+    # worker, since the primary worker does redis cleanup on startup, but this isn't
+    # implemented yet.
+    lock = r.lock(
+        DanswerRedisLocks.PRIMARY_WORKER,
+        timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
+    )

-        # As currently designed, when this worker starts as "primary", we reinitialize redis
-        # to a clean state (for our purposes, anyway)
-        r.delete(DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)
-        r.delete(DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)
+    logger.info("Primary worker lock: Acquire starting.")
+    acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2)
+    if acquired:
+        logger.info("Primary worker lock: Acquire succeeded.")
+    else:
+        logger.error("Primary worker lock: Acquire failed!")
+        raise WorkerShutdown("Primary worker lock could not be acquired!")

-        r.delete(RedisConnectorCredentialPair.get_taskset_key())
-        r.delete(RedisConnectorCredentialPair.get_fence_key())
+    # tacking on our own user data to the sender
+    sender.primary_worker_lock = lock

-        for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + "*"):
-            r.delete(key)
+    # As currently designed, when this worker starts as "primary", we reinitialize redis
+    # to a clean state (for our purposes, anyway)
+    r.delete(DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)
+    r.delete(DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

-        for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
-            r.delete(key)
+    r.delete(RedisConnectorCredentialPair.get_taskset_key())
+    r.delete(RedisConnectorCredentialPair.get_fence_key())

-        for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + "*"):
-            r.delete(key)
+    RedisDocumentSet.reset_all(r)

-        for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
-            r.delete(key)
+    RedisUserGroup.reset_all(r)

-        for key in r.scan_iter(RedisConnectorDeletion.TASKSET_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorDelete.reset_all(r)

-        for key in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorPrune.reset_all(r)

-        for key in r.scan_iter(RedisConnectorPruning.TASKSET_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorIndex.reset_all(r)

-        for key in r.scan_iter(RedisConnectorPruning.GENERATOR_COMPLETE_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorStop.reset_all(r)

-        for key in r.scan_iter(RedisConnectorPruning.GENERATOR_PROGRESS_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorPermissionSync.reset_all(r)

-        for key in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
-            r.delete(key)
+    RedisConnectorExternalGroupSync.reset_all(r)

-        for key in r.scan_iter(RedisConnectorIndexing.TASKSET_PREFIX + "*"):
-            r.delete(key)
+    # mark orphaned index attempts as failed
+    with get_session_with_default_tenant() as db_session:
+        unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
+        for attempt_id in unfenced_attempt_ids:
+            attempt = get_index_attempt(db_session, attempt_id)
+            if not attempt:
+                continue

-        for key in r.scan_iter(RedisConnectorIndexing.GENERATOR_COMPLETE_PREFIX + "*"):
-            r.delete(key)
-
-        for key in r.scan_iter(RedisConnectorIndexing.GENERATOR_PROGRESS_PREFIX + "*"):
-            r.delete(key)
-
-        for key in r.scan_iter(RedisConnectorIndexing.FENCE_PREFIX + "*"):
-            r.delete(key)
-
-        for key in r.scan_iter(RedisConnectorStop.FENCE_PREFIX + "*"):
-            r.delete(key)
+            failure_reason = (
+                f"Canceling leftover index attempt found on startup: "
+                f"index_attempt={attempt.id} "
+                f"cc_pair={attempt.connector_credential_pair_id} "
+                f"search_settings={attempt.search_settings_id}"
+            )
+            logger.warning(failure_reason)
+            mark_attempt_canceled(attempt.id, db_session, failure_reason)


@worker_ready.connect
@@ -216,52 +224,36 @@ class HubPeriodicTask(bootsteps.StartStopStep):
            if not celery_is_worker_primary(worker):
                return

-            if not hasattr(worker, "primary_worker_locks"):
+            if not hasattr(worker, "primary_worker_lock"):
                return

-            # Retrieve all tenant IDs
-            tenant_ids = get_all_tenant_ids()
+            lock = worker.primary_worker_lock

-            for tenant_id in tenant_ids:
-                lock = worker.primary_worker_locks.get(tenant_id)
-                if not lock:
-                    continue  # Skip if no lock for this tenant
+            r = get_redis_client(tenant_id=None)

-                r = get_redis_client(tenant_id=tenant_id)
+            if lock.owned():
+                task_logger.debug("Reacquiring primary worker lock.")
+                lock.reacquire()
+            else:
+                task_logger.warning(
+                    "Full acquisition of primary worker lock. "
+                    "Reasons could be worker restart or lock expiration."
+                )
+                lock = r.lock(
+                    DanswerRedisLocks.PRIMARY_WORKER,
+                    timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
+                )

-                if lock.owned():
-                    task_logger.debug(
-                        f"Reacquiring primary worker lock for tenant {tenant_id}."
-                    )
-                    lock.reacquire()
+                task_logger.info("Primary worker lock: Acquire starting.")
+                acquired = lock.acquire(
+                    blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2
+                )
+                if acquired:
+                    task_logger.info("Primary worker lock: Acquire succeeded.")
+                    worker.primary_worker_lock = lock
                else:
-                    task_logger.warning(
-                        f"Full acquisition of primary worker lock for tenant {tenant_id}. "
-                        "Reasons could be worker restart or lock expiration."
-                    )
-                    lock = r.lock(
-                        DanswerRedisLocks.PRIMARY_WORKER,
-                        timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT,
-                    )
-
-                    task_logger.info(
-                        f"Primary worker lock for tenant {tenant_id}: Acquire starting."
-                    )
-                    acquired = lock.acquire(
-                        blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2
-                    )
-                    if acquired:
-                        task_logger.info(
-                            f"Primary worker lock for tenant {tenant_id}: Acquire succeeded."
-                        )
-                        worker.primary_worker_locks[tenant_id] = lock
-                    else:
-                        task_logger.error(
-                            f"Primary worker lock for tenant {tenant_id}: Acquire failed!"
-                        )
-                        raise TimeoutError(
-                            f"Primary worker lock for tenant {tenant_id} could not be acquired!"
-                        )
+                    task_logger.error("Primary worker lock: Acquire failed!")
+                    raise TimeoutError("Primary worker lock could not be acquired!")

        except Exception:
            task_logger.exception("Periodic task failed.")
@@ -280,6 +272,8 @@ celery_app.autodiscover_tasks(
        "danswer.background.celery.tasks.connector_deletion",
        "danswer.background.celery.tasks.indexing",
        "danswer.background.celery.tasks.periodic",
+        "danswer.background.celery.tasks.doc_permission_syncing",
+        "danswer.background.celery.tasks.external_group_syncing",
        "danswer.background.celery.tasks.pruning",
        "danswer.background.celery.tasks.shared",
        "danswer.background.celery.tasks.vespa",
--- a/backend/danswer/background/celery/celery_redis.py
+++ b/backend/danswer/background/celery/celery_redis.py
@@ -1,568 +1,10 @@
 # These are helper objects for tracking the keys we need to write in redis
-import time
-from abc import ABC
-from abc import abstractmethod
 from typing import cast
-from uuid import uuid4

-import redis
-from celery import Celery
 from redis import Redis
-from sqlalchemy.orm import Session

 from danswer.background.celery.configs.base import CELERY_SEPARATOR
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
 from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import DanswerCeleryQueues
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
-from danswer.db.document import construct_document_select_for_connector_credential_pair
-from danswer.db.document import (
-    construct_document_select_for_connector_credential_pair_by_needs_sync,
-)
-from danswer.db.document_set import construct_document_select_by_docset
-from danswer.utils.variable_functionality import fetch_versioned_implementation
-from danswer.utils.variable_functionality import global_version
-
-
-class RedisObjectHelper(ABC):
-    PREFIX = "base"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def __init__(self, id: str):
-        self._id: str = id
-
-    @property
-    def task_id_prefix(self) -> str:
-        return f"{self.PREFIX}_{self._id}"
-
-    @property
-    def fence_key(self) -> str:
-        # example: documentset_fence_1
-        return f"{self.FENCE_PREFIX}_{self._id}"
-
-    @property
-    def taskset_key(self) -> str:
-        # example: documentset_taskset_1
-        return f"{self.TASKSET_PREFIX}_{self._id}"
-
-    @staticmethod
-    def get_id_from_fence_key(key: str) -> str | None:
-        """
-        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.
-
-        Args:
-            key (str): The fence key string.
-
-        Returns:
-            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.
-        """
-        parts = key.split("_")
-        if len(parts) != 3:
-            return None
-
-        object_id = parts[2]
-        return object_id
-
-    @staticmethod
-    def get_id_from_task_id(task_id: str) -> str | None:
-        """
-        Extracts the object ID from a task ID string.
-
-        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:
-        - `prefix` is an arbitrary string (e.g., the name of the task or entity),
-        - `objectid` is the ID you want to extract,
-        - `suffix` is another arbitrary string (e.g., a UUID).
-
-        Example:
-            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,
-            this method will return the string `"1"`.
-
-        Args:
-            task_id (str): The task ID string from which to extract the object ID.
-
-        Returns:
-            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.
-        """
-        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc
-        parts = task_id.split("_")
-        if len(parts) != 3:
-            return None
-
-        object_id = parts[1]
-        return object_id
-
-    @abstractmethod
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-        tenant_id: str | None,
-    ) -> int | None:
-        pass
-
-
-class RedisDocumentSet(RedisObjectHelper):
-    PREFIX = "documentset"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-        tenant_id: str | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        stmt = construct_document_select_by_docset(int(self._id), current_only=False)
-        for doc in db_session.scalars(stmt).yield_per(1):
-            current_time = time.monotonic()
-            if current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
-
-            # add to the set BEFORE creating the task.
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            result = celery_app.send_task(
-                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
-                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.LOW,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-
-class RedisUserGroup(RedisObjectHelper):
-    PREFIX = "usergroup"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-        tenant_id: str | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-
-        if not global_version.is_ee_version():
-            return 0
-
-        try:
-            construct_document_select_by_usergroup = fetch_versioned_implementation(
-                "danswer.db.user_group",
-                "construct_document_select_by_usergroup",
-            )
-        except ModuleNotFoundError:
-            return 0
-
-        stmt = construct_document_select_by_usergroup(int(self._id))
-        for doc in db_session.scalars(stmt).yield_per(1):
-            current_time = time.monotonic()
-            if current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
-
-            # add to the set BEFORE creating the task.
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            result = celery_app.send_task(
-                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
-                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.LOW,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-
-class RedisConnectorCredentialPair(RedisObjectHelper):
-    """This class is used to scan documents by cc_pair in the db and collect them into
-    a unified set for syncing.
-
-    It differs from the other redis helpers in that the taskset used spans
-    all connectors and is not per connector."""
-
-    PREFIX = "connectorsync"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-
-    @classmethod
-    def get_fence_key(cls) -> str:
-        return RedisConnectorCredentialPair.FENCE_PREFIX
-
-    @classmethod
-    def get_taskset_key(cls) -> str:
-        return RedisConnectorCredentialPair.TASKSET_PREFIX
-
-    @property
-    def taskset_key(self) -> str:
-        """Notice that this is intentionally reusing the same taskset for all
-        connector syncs"""
-        # example: connector_taskset
-        return f"{self.TASKSET_PREFIX}"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-        tenant_id: str | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(int(self._id), db_session)
-        if not cc_pair:
-            return None
-
-        stmt = construct_document_select_for_connector_credential_pair_by_needs_sync(
-            cc_pair.connector_id, cc_pair.credential_id
-        )
-        for doc in db_session.scalars(stmt).yield_per(1):
-            current_time = time.monotonic()
-            if current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(
-                RedisConnectorCredentialPair.get_taskset_key(), custom_task_id
-            )
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
-                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-
-class RedisConnectorDeletion(RedisObjectHelper):
-    PREFIX = "connectordeletion"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-        tenant_id: str | None,
-    ) -> int | None:
-        """Returns None if the cc_pair doesn't exist.
-        Otherwise, returns an int with the number of generated tasks."""
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(int(self._id), db_session)
-        if not cc_pair:
-            return None
-
-        stmt = construct_document_select_for_connector_credential_pair(
-            cc_pair.connector_id, cc_pair.credential_id
-        )
-        for doc in db_session.scalars(stmt).yield_per(1):
-            current_time = time.monotonic()
-            if current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "document_by_cc_pair_cleanup_task",
-                kwargs=dict(
-                    document_id=doc.id,
-                    connector_id=cc_pair.connector_id,
-                    credential_id=cc_pair.credential_id,
-                    tenant_id=tenant_id,
-                ),
-                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-
-class RedisConnectorPruning(RedisObjectHelper):
-    """Celery will kick off a long running generator task to crawl the connector and
-    find any missing docs, which will each then get a new cleanup task. The progress of
-    those tasks will then be monitored to completion.
-
-    Example rough happy path order:
-    Check connectorpruning_fence_1
-    Send generator task with id connectorpruning+generator_1_{uuid}
-
-    generator runs connector with callbacks that increment connectorpruning_generator_progress_1
-    generator creates many subtasks with id connectorpruning+sub_1_{uuid}
-      in taskset connectorpruning_taskset_1
-    on completion, generator sets connectorpruning_generator_complete_1
-
-    celery postrun removes subtasks from taskset
-    monitor beat task cleans up when taskset reaches 0 items
-    """
-
-    PREFIX = "connectorpruning"
-    FENCE_PREFIX = PREFIX + "_fence"  # a fence for the entire pruning process
-    GENERATOR_TASK_PREFIX = PREFIX + "+generator"
-
-    TASKSET_PREFIX = PREFIX + "_taskset"  # stores a list of prune tasks id's
-    SUBTASK_PREFIX = PREFIX + "+sub"
-
-    GENERATOR_PROGRESS_PREFIX = (
-        PREFIX + "_generator_progress"
-    )  # a signal that contains generator progress
-    GENERATOR_COMPLETE_PREFIX = (
-        PREFIX + "_generator_complete"
-    )  # a signal that the generator has finished
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-        self.documents_to_prune: set[str] = set()
-
-    @property
-    def generator_task_id_prefix(self) -> str:
-        return f"{self.GENERATOR_TASK_PREFIX}_{self._id}"
-
-    @property
-    def generator_progress_key(self) -> str:
-        # example: connectorpruning_generator_progress_1
-        return f"{self.GENERATOR_PROGRESS_PREFIX}_{self._id}"
-
-    @property
-    def generator_complete_key(self) -> str:
-        # example: connectorpruning_generator_complete_1
-        return f"{self.GENERATOR_COMPLETE_PREFIX}_{self._id}"
-
-    @property
-    def subtask_id_prefix(self) -> str:
-        return f"{self.SUBTASK_PREFIX}_{self._id}"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock | None,
-        tenant_id: str | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(int(self._id), db_session)
-        if not cc_pair:
-            return None
-
-        for doc_id in self.documents_to_prune:
-            current_time = time.monotonic()
-            if lock and current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.subtask_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "document_by_cc_pair_cleanup_task",
-                kwargs=dict(
-                    document_id=doc_id,
-                    connector_id=cc_pair.connector_id,
-                    credential_id=cc_pair.credential_id,
-                    tenant_id=tenant_id,
-                ),
-                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-    def is_pruning(self, redis_client: Redis) -> bool:
-        """A single example of a helper method being refactored into the redis helper"""
-        if redis_client.exists(self.fence_key):
-            return True
-
-        return False
-
-
-class RedisConnectorIndexing(RedisObjectHelper):
-    """Celery will kick off a long running indexing task to crawl the connector and
-    find any new or updated docs docs, which will each then get a new sync task or be
-    indexed inline.
-
-    ID should be a concatenation of cc_pair_id and search_setting_id, delimited by "/".
-    e.g. "2/5"
-    """
-
-    PREFIX = "connectorindexing"
-    FENCE_PREFIX = PREFIX + "_fence"  # a fence for the entire indexing process
-    GENERATOR_TASK_PREFIX = PREFIX + "+generator"
-
-    TASKSET_PREFIX = PREFIX + "_taskset"  # stores a list of prune tasks id's
-    SUBTASK_PREFIX = PREFIX + "+sub"
-
-    GENERATOR_LOCK_PREFIX = "da_lock:indexing"
-    GENERATOR_PROGRESS_PREFIX = (
-        PREFIX + "_generator_progress"
-    )  # a signal that contains generator progress
-    GENERATOR_COMPLETE_PREFIX = (
-        PREFIX + "_generator_complete"
-    )  # a signal that the generator has finished
-
-    def __init__(self, cc_pair_id: int, search_settings_id: int) -> None:
-        super().__init__(f"{cc_pair_id}/{search_settings_id}")
-
-    @property
-    def generator_lock_key(self) -> str:
-        return f"{self.GENERATOR_LOCK_PREFIX}_{self._id}"
-
-    @property
-    def generator_task_id_prefix(self) -> str:
-        return f"{self.GENERATOR_TASK_PREFIX}_{self._id}"
-
-    @property
-    def generator_progress_key(self) -> str:
-        # example: connectorpruning_generator_progress_1
-        return f"{self.GENERATOR_PROGRESS_PREFIX}_{self._id}"
-
-    @property
-    def generator_complete_key(self) -> str:
-        # example: connectorpruning_generator_complete_1
-        return f"{self.GENERATOR_COMPLETE_PREFIX}_{self._id}"
-
-    @property
-    def subtask_id_prefix(self) -> str:
-        return f"{self.SUBTASK_PREFIX}_{self._id}"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock | None,
-        tenant_id: str | None,
-    ) -> int | None:
-        return None
-
-    def is_indexing(self, redis_client: Redis) -> bool:
-        """A single example of a helper method being refactored into the redis helper"""
-        if redis_client.exists(self.fence_key):
-            return True
-
-        return False
-
-
-class RedisConnectorStop(RedisObjectHelper):
-    """Used to signal any running tasks for a connector to stop. We should refactor
-    connector related redis helpers into a single class.
-    """
-
-    PREFIX = "connectorstop"
-    FENCE_PREFIX = PREFIX + "_fence"  # a fence for the entire indexing process
-    TASKSET_PREFIX = PREFIX + "_taskset"  # stores a list of prune tasks id's
-
-    def __init__(self, id: int) -> None:
-        super().__init__(str(id))
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock | None,
-        tenant_id: str | None,
-    ) -> int | None:
-        return None


 def celery_get_queue_length(queue: str, r: Redis) -> int:
--- a/backend/danswer/background/celery/celery_utils.py
+++ b/backend/danswer/background/celery/celery_utils.py
@@ -4,8 +4,6 @@ from typing import Any

 from sqlalchemy.orm import Session

-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.indexing.run_indexing import RunIndexingCallbackInterface
 from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
 from danswer.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
@@ -18,7 +16,8 @@ from danswer.connectors.models import Document
 from danswer.db.connector_credential_pair import get_connector_credential_pair
 from danswer.db.enums import TaskStatus
 from danswer.db.models import TaskQueueState
-from danswer.redis.redis_pool import get_redis_client
+from danswer.indexing.indexing_heartbeat import IndexingHeartbeatInterface
+from danswer.redis.redis_connector import RedisConnector
 from danswer.server.documents.models import DeletionAttemptSnapshot
 from danswer.utils.logger import setup_logger

@@ -41,14 +40,14 @@ def _get_deletion_status(
    if not cc_pair:
        return None

-    rcd = RedisConnectorDeletion(cc_pair.id)
-
-    r = get_redis_client(tenant_id=tenant_id)
-    if not r.exists(rcd.fence_key):
+    redis_connector = RedisConnector(tenant_id, cc_pair.id)
+    if not redis_connector.delete.fenced:
        return None

    return TaskQueueState(
-        task_id="", task_name=rcd.fence_key, status=TaskStatus.STARTED
+        task_id="",
+        task_name=redis_connector.delete.fence_key,
+        status=TaskStatus.STARTED,
    )


@@ -79,10 +78,10 @@ def document_batch_to_ids(

 def extract_ids_from_runnable_connector(
    runnable_connector: BaseConnector,
-    callback: RunIndexingCallbackInterface | None = None,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> set[str]:
    """
-    If the PruneConnector hasnt been implemented for the given connector, just pull
+    If the SlimConnector hasnt been implemented for the given connector, just pull
    all docs using the load_from_state and grab out the IDs.

    Optionally, a callback can be passed to handle the length of each document batch.
@@ -112,10 +111,15 @@ def extract_ids_from_runnable_connector(
    for doc_batch in doc_batch_generator:
        if callback:
            if callback.should_stop():
-                raise RuntimeError("Stop signal received")
-            callback.progress(len(doc_batch))
+                raise RuntimeError(
+                    "extract_ids_from_runnable_connector: Stop signal detected"
+                )
+
        all_connector_doc_ids.update(doc_batch_processing_func(doc_batch))

+        if callback:
+            callback.progress("extract_ids_from_runnable_connector", len(doc_batch))
+
    return all_connector_doc_ids


--- a/backend/danswer/background/celery/tasks/beat_schedule.py
+++ b/backend/danswer/background/celery/tasks/beat_schedule.py
@@ -0,0 +1,60 @@
+from datetime import timedelta
+from typing import Any
+
+from danswer.configs.constants import DanswerCeleryPriority
+
+
+tasks_to_schedule = [
+    {
+        "name": "check-for-vespa-sync",
+        "task": "check_for_vespa_sync_task",
+        "schedule": timedelta(seconds=20),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "check-for-connector-deletion",
+        "task": "check_for_connector_deletion_task",
+        "schedule": timedelta(seconds=20),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "check-for-indexing",
+        "task": "check_for_indexing",
+        "schedule": timedelta(seconds=15),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "check-for-prune",
+        "task": "check_for_pruning",
+        "schedule": timedelta(seconds=15),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "kombu-message-cleanup",
+        "task": "kombu_message_cleanup_task",
+        "schedule": timedelta(seconds=3600),
+        "options": {"priority": DanswerCeleryPriority.LOWEST},
+    },
+    {
+        "name": "monitor-vespa-sync",
+        "task": "monitor_vespa_sync",
+        "schedule": timedelta(seconds=5),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "check-for-doc-permissions-sync",
+        "task": "check_for_doc_permissions_sync",
+        "schedule": timedelta(seconds=30),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+    {
+        "name": "check-for-external-group-sync",
+        "task": "check_for_external_group_sync",
+        "schedule": timedelta(seconds=20),
+        "options": {"priority": DanswerCeleryPriority.HIGH},
+    },
+]
+
+
+def get_tasks_to_schedule() -> list[dict[str, Any]]:
+    return tasks_to_schedule
--- a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
@@ -1,22 +1,14 @@
 from datetime import datetime
 from datetime import timezone

-import redis
 from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
+from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from danswer.background.celery.apps.app_base import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorIndexing
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisConnectorStop
-from danswer.background.celery.tasks.shared.RedisConnectorDeletionFenceData import (
-    RedisConnectorDeletionFenceData,
-)
 from danswer.configs.app_configs import JOB_TIMEOUT
 from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
 from danswer.configs.constants import DanswerRedisLocks
@@ -25,6 +17,8 @@ from danswer.db.connector_credential_pair import get_connector_credential_pairs
 from danswer.db.engine import get_session_with_tenant
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.search_settings import get_all_search_settings
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_delete import RedisConnectorDeletePayload
 from danswer.redis.redis_pool import get_redis_client


@@ -42,7 +36,7 @@ class TaskDependencyError(RuntimeError):
 def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> None:
    r = get_redis_client(tenant_id=tenant_id)

-    lock_beat = r.lock(
+    lock_beat: RedisLock = r.lock(
        DanswerRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )
@@ -62,19 +56,19 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> N
        # try running cleanup on the cc_pair_ids
        for cc_pair_id in cc_pair_ids:
            with get_session_with_tenant(tenant_id) as db_session:
-                rcs = RedisConnectorStop(cc_pair_id)
+                redis_connector = RedisConnector(tenant_id, cc_pair_id)
                try:
                    try_generate_document_cc_pair_cleanup_tasks(
-                        self.app, cc_pair_id, db_session, r, lock_beat, tenant_id
+                        self.app, cc_pair_id, db_session, lock_beat, tenant_id
                    )
                except TaskDependencyError as e:
                    # this means we wanted to start deleting but dependent tasks were running
                    # Leave a stop signal to clear indexing and pruning tasks more quickly
                    task_logger.info(str(e))
-                    r.set(rcs.fence_key, cc_pair_id)
+                    redis_connector.stop.set_fence(True)
                else:
                    # clear the stop signal if it exists ... no longer needed
-                    r.delete(rcs.fence_key)
+                    redis_connector.stop.set_fence(False)

    except SoftTimeLimitExceeded:
        task_logger.info(
@@ -91,8 +85,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
    app: Celery,
    cc_pair_id: int,
    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
+    lock_beat: RedisLock,
    tenant_id: str | None,
 ) -> int | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
@@ -106,10 +99,10 @@ def try_generate_document_cc_pair_cleanup_tasks(

    lock_beat.reacquire()

-    rcd = RedisConnectorDeletion(cc_pair_id)
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    # don't generate sync tasks if tasks are still pending
-    if r.exists(rcd.fence_key):
+    if redis_connector.delete.fenced:
        return None

    # we need to load the state of the object inside the fence
@@ -123,47 +116,55 @@ def try_generate_document_cc_pair_cleanup_tasks(
        return None

    # set a basic fence to start
-    fence_value = RedisConnectorDeletionFenceData(
+    fence_payload = RedisConnectorDeletePayload(
        num_tasks=None,
        submitted=datetime.now(timezone.utc),
    )
-    r.set(rcd.fence_key, fence_value.model_dump_json())
+
+    redis_connector.delete.set_fence(fence_payload)

    try:
        # do not proceed if connector indexing or connector pruning are running
        search_settings_list = get_all_search_settings(db_session)
        for search_settings in search_settings_list:
-            rci = RedisConnectorIndexing(cc_pair_id, search_settings.id)
-            if r.get(rci.fence_key):
+            redis_connector_index = redis_connector.new_index(search_settings.id)
+            if redis_connector_index.fenced:
                raise TaskDependencyError(
                    f"Connector deletion - Delayed (indexing in progress): "
                    f"cc_pair={cc_pair_id} "
                    f"search_settings={search_settings.id}"
                )

-        rcp = RedisConnectorPruning(cc_pair_id)
-        if r.get(rcp.fence_key):
+        if redis_connector.prune.fenced:
            raise TaskDependencyError(
                f"Connector deletion - Delayed (pruning in progress): "
                f"cc_pair={cc_pair_id}"
            )

+        if redis_connector.permissions.fenced:
+            raise TaskDependencyError(
+                f"Connector deletion - Delayed (permissions in progress): "
+                f"cc_pair={cc_pair_id}"
+            )
+
        # add tasks to celery and build up the task set to monitor in redis
-        r.delete(rcd.taskset_key)
+        redis_connector.delete.taskset_clear()

        # Add all documents that need to be updated into the queue
        task_logger.info(
            f"RedisConnectorDeletion.generate_tasks starting. cc_pair={cc_pair_id}"
        )
-        tasks_generated = rcd.generate_tasks(app, db_session, r, lock_beat, tenant_id)
+        tasks_generated = redis_connector.delete.generate_tasks(
+            app, db_session, lock_beat
+        )
        if tasks_generated is None:
            raise ValueError("RedisConnectorDeletion.generate_tasks returned None")
    except TaskDependencyError:
-        r.delete(rcd.fence_key)
+        redis_connector.delete.set_fence(None)
        raise
    except Exception:
        task_logger.exception("Unexpected exception")
-        r.delete(rcd.fence_key)
+        redis_connector.delete.set_fence(None)
        return None
    else:
        # Currently we are allowing the sync to proceed with 0 tasks.
@@ -178,7 +179,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
        )

        # set this only after all tasks have been added
-        fence_value.num_tasks = tasks_generated
-        r.set(rcd.fence_key, fence_value.model_dump_json())
+        fence_payload.num_tasks = tasks_generated
+        redis_connector.delete.set_fence(fence_payload)

    return tasks_generated
--- a/backend/danswer/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/danswer/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -0,0 +1,326 @@
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from uuid import uuid4
+
+from celery import Celery
+from celery import shared_task
+from celery import Task
+from celery.exceptions import SoftTimeLimitExceeded
+from redis import Redis
+from redis.lock import Lock as RedisLock
+
+from danswer.access.models import DocExternalAccess
+from danswer.background.celery.apps.app_base import task_logger
+from danswer.configs.app_configs import JOB_TIMEOUT
+from danswer.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT
+from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
+from danswer.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
+from danswer.configs.constants import DanswerCeleryPriority
+from danswer.configs.constants import DanswerCeleryQueues
+from danswer.configs.constants import DanswerRedisLocks
+from danswer.configs.constants import DocumentSource
+from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
+from danswer.db.engine import get_session_with_tenant
+from danswer.db.enums import AccessType
+from danswer.db.enums import ConnectorCredentialPairStatus
+from danswer.db.models import ConnectorCredentialPair
+from danswer.db.users import batch_add_ext_perm_user_if_not_exists
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_doc_perm_sync import (
+    RedisConnectorPermissionSyncPayload,
+)
+from danswer.redis.redis_pool import get_redis_client
+from danswer.utils.logger import doc_permission_sync_ctx
+from danswer.utils.logger import setup_logger
+from ee.danswer.db.connector_credential_pair import get_all_auto_sync_cc_pairs
+from ee.danswer.db.document import upsert_document_external_perms
+from ee.danswer.external_permissions.sync_params import DOC_PERMISSION_SYNC_PERIODS
+from ee.danswer.external_permissions.sync_params import DOC_PERMISSIONS_FUNC_MAP
+
+logger = setup_logger()
+
+
+DOCUMENT_PERMISSIONS_UPDATE_MAX_RETRIES = 3
+
+
+# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT
+LIGHT_SOFT_TIME_LIMIT = 105
+LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15
+
+
+def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
+    """Returns boolean indicating if external doc permissions sync is due."""
+
+    if cc_pair.access_type != AccessType.SYNC:
+        return False
+
+    # skip doc permissions sync if not active
+    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
+        return False
+
+    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
+        return False
+
+    # If the last sync is None, it has never been run so we run the sync
+    last_perm_sync = cc_pair.last_time_perm_sync
+    if last_perm_sync is None:
+        return True
+
+    source_sync_period = DOC_PERMISSION_SYNC_PERIODS.get(cc_pair.connector.source)
+
+    # If RESTRICTED_FETCH_PERIOD[source] is None, we always run the sync.
+    if not source_sync_period:
+        return True
+
+    # If the last sync is greater than the full fetch period, we run the sync
+    next_sync = last_perm_sync + timedelta(seconds=source_sync_period)
+    if datetime.now(timezone.utc) >= next_sync:
+        return True
+
+    return False
+
+
+@shared_task(
+    name="check_for_doc_permissions_sync",
+    soft_time_limit=JOB_TIMEOUT,
+    bind=True,
+)
+def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> None:
+    r = get_redis_client(tenant_id=tenant_id)
+
+    lock_beat = r.lock(
+        DanswerRedisLocks.CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK,
+        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
+    )
+
+    try:
+        # these tasks should never overlap
+        if not lock_beat.acquire(blocking=False):
+            return
+
+        # get all cc pairs that need to be synced
+        cc_pair_ids_to_sync: list[int] = []
+        with get_session_with_tenant(tenant_id) as db_session:
+            cc_pairs = get_all_auto_sync_cc_pairs(db_session)
+
+            for cc_pair in cc_pairs:
+                if _is_external_doc_permissions_sync_due(cc_pair):
+                    cc_pair_ids_to_sync.append(cc_pair.id)
+
+        for cc_pair_id in cc_pair_ids_to_sync:
+            tasks_created = try_creating_permissions_sync_task(
+                self.app, cc_pair_id, r, tenant_id
+            )
+            if not tasks_created:
+                continue
+
+            task_logger.info(f"Doc permissions sync queued: cc_pair={cc_pair_id}")
+    except SoftTimeLimitExceeded:
+        task_logger.info(
+            "Soft time limit exceeded, task is being terminated gracefully."
+        )
+    except Exception:
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
+    finally:
+        if lock_beat.owned():
+            lock_beat.release()
+
+
+def try_creating_permissions_sync_task(
+    app: Celery,
+    cc_pair_id: int,
+    r: Redis,
+    tenant_id: str | None,
+) -> int | None:
+    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
+    Returns None if no syncing is required."""
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    LOCK_TIMEOUT = 30
+
+    lock: RedisLock = r.lock(
+        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_generate_permissions_sync_tasks",
+        timeout=LOCK_TIMEOUT,
+    )
+
+    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
+    if not acquired:
+        return None
+
+    try:
+        if redis_connector.permissions.fenced:
+            return None
+
+        if redis_connector.delete.fenced:
+            return None
+
+        if redis_connector.prune.fenced:
+            return None
+
+        redis_connector.permissions.generator_clear()
+        redis_connector.permissions.taskset_clear()
+
+        custom_task_id = f"{redis_connector.permissions.generator_task_key}_{uuid4()}"
+
+        result = app.send_task(
+            "connector_permission_sync_generator_task",
+            kwargs=dict(
+                cc_pair_id=cc_pair_id,
+                tenant_id=tenant_id,
+            ),
+            queue=DanswerCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC,
+            task_id=custom_task_id,
+            priority=DanswerCeleryPriority.HIGH,
+        )
+
+        # set a basic fence to start
+        payload = RedisConnectorPermissionSyncPayload(
+            started=None, celery_task_id=result.id
+        )
+
+        redis_connector.permissions.set_fence(payload)
+    except Exception:
+        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}")
+        return None
+    finally:
+        if lock.owned():
+            lock.release()
+
+    return 1
+
+
+@shared_task(
+    name="connector_permission_sync_generator_task",
+    acks_late=False,
+    soft_time_limit=JOB_TIMEOUT,
+    track_started=True,
+    trail=False,
+    bind=True,
+)
+def connector_permission_sync_generator_task(
+    self: Task,
+    cc_pair_id: int,
+    tenant_id: str | None,
+) -> None:
+    """
+    Permission sync task that handles document permission syncing for a given connector credential pair
+    This task assumes that the task has already been properly fenced
+    """
+
+    doc_permission_sync_ctx_dict = doc_permission_sync_ctx.get()
+    doc_permission_sync_ctx_dict["cc_pair_id"] = cc_pair_id
+    doc_permission_sync_ctx_dict["request_id"] = self.request.id
+    doc_permission_sync_ctx.set(doc_permission_sync_ctx_dict)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    lock = r.lock(
+        DanswerRedisLocks.CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX
+        + f"_{redis_connector.id}",
+        timeout=CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT,
+    )
+
+    acquired = lock.acquire(blocking=False)
+    if not acquired:
+        task_logger.warning(
+            f"Permission sync task already running, exiting...: cc_pair={cc_pair_id}"
+        )
+        return None
+
+    try:
+        with get_session_with_tenant(tenant_id) as db_session:
+            cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session)
+            if cc_pair is None:
+                raise ValueError(
+                    f"No connector credential pair found for id: {cc_pair_id}"
+                )
+
+            source_type = cc_pair.connector.source
+
+            doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type)
+            if doc_sync_func is None:
+                raise ValueError(
+                    f"No doc sync func found for {source_type} with cc_pair={cc_pair_id}"
+                )
+
+            logger.info(f"Syncing docs for {source_type} with cc_pair={cc_pair_id}")
+
+            payload = redis_connector.permissions.payload
+            if not payload:
+                raise ValueError(f"No fence payload found: cc_pair={cc_pair_id}")
+
+            payload.started = datetime.now(timezone.utc)
+            redis_connector.permissions.set_fence(payload)
+
+            document_external_accesses: list[DocExternalAccess] = doc_sync_func(cc_pair)
+
+            task_logger.info(
+                f"RedisConnector.permissions.generate_tasks starting. cc_pair={cc_pair_id}"
+            )
+            tasks_generated = redis_connector.permissions.generate_tasks(
+                self.app, lock, document_external_accesses, source_type
+            )
+            if tasks_generated is None:
+                return None
+
+            task_logger.info(
+                f"RedisConnector.permissions.generate_tasks finished. "
+                f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
+            )
+
+            redis_connector.permissions.generator_complete = tasks_generated
+
+    except Exception as e:
+        task_logger.exception(f"Failed to run permission sync: cc_pair={cc_pair_id}")
+
+        redis_connector.permissions.generator_clear()
+        redis_connector.permissions.taskset_clear()
+        redis_connector.permissions.set_fence(None)
+        raise e
+    finally:
+        if lock.owned():
+            lock.release()
+
+
+@shared_task(
+    name="update_external_document_permissions_task",
+    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
+    time_limit=LIGHT_TIME_LIMIT,
+    max_retries=DOCUMENT_PERMISSIONS_UPDATE_MAX_RETRIES,
+    bind=True,
+)
+def update_external_document_permissions_task(
+    self: Task,
+    tenant_id: str | None,
+    serialized_doc_external_access: dict,
+    source_string: str,
+) -> bool:
+    document_external_access = DocExternalAccess.from_dict(
+        serialized_doc_external_access
+    )
+    doc_id = document_external_access.doc_id
+    external_access = document_external_access.external_access
+    try:
+        with get_session_with_tenant(tenant_id) as db_session:
+            # Then we build the update requests to update vespa
+            batch_add_ext_perm_user_if_not_exists(
+                db_session=db_session,
+                emails=list(external_access.external_user_emails),
+            )
+            upsert_document_external_perms(
+                db_session=db_session,
+                doc_id=doc_id,
+                external_access=external_access,
+                source_type=DocumentSource(source_string),
+            )
+
+            logger.debug(
+                f"Successfully synced postgres document permissions for {doc_id}"
+            )
+        return True
+    except Exception:
+        logger.exception("Error Syncing Document Permissions")
+        return False
--- a/backend/danswer/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/danswer/background/celery/tasks/external_group_syncing/tasks.py
@@ -0,0 +1,277 @@
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from uuid import uuid4
+
+from celery import Celery
+from celery import shared_task
+from celery import Task
+from celery.exceptions import SoftTimeLimitExceeded
+from redis import Redis
+from redis.lock import Lock as RedisLock
+
+from danswer.background.celery.apps.app_base import task_logger
+from danswer.configs.app_configs import JOB_TIMEOUT
+from danswer.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT
+from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
+from danswer.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
+from danswer.configs.constants import DanswerCeleryPriority
+from danswer.configs.constants import DanswerCeleryQueues
+from danswer.configs.constants import DanswerRedisLocks
+from danswer.db.connector import mark_cc_pair_as_external_group_synced
+from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
+from danswer.db.engine import get_session_with_tenant
+from danswer.db.enums import AccessType
+from danswer.db.enums import ConnectorCredentialPairStatus
+from danswer.db.models import ConnectorCredentialPair
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_ext_group_sync import (
+    RedisConnectorExternalGroupSyncPayload,
+)
+from danswer.redis.redis_pool import get_redis_client
+from danswer.utils.logger import setup_logger
+from ee.danswer.db.connector_credential_pair import get_all_auto_sync_cc_pairs
+from ee.danswer.db.external_perm import ExternalUserGroup
+from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair
+from ee.danswer.external_permissions.sync_params import EXTERNAL_GROUP_SYNC_PERIODS
+from ee.danswer.external_permissions.sync_params import GROUP_PERMISSIONS_FUNC_MAP
+
+logger = setup_logger()
+
+
+EXTERNAL_GROUPS_UPDATE_MAX_RETRIES = 3
+
+
+# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT
+LIGHT_SOFT_TIME_LIMIT = 105
+LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15
+
+
+def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
+    """Returns boolean indicating if external group sync is due."""
+
+    if cc_pair.access_type != AccessType.SYNC:
+        return False
+
+    # skip external group sync if not active
+    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
+        return False
+
+    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
+        return False
+
+    # If there is not group sync function for the connector, we don't run the sync
+    # This is fine because all sources dont necessarily have a concept of groups
+    if not GROUP_PERMISSIONS_FUNC_MAP.get(cc_pair.connector.source):
+        return False
+
+    # If the last sync is None, it has never been run so we run the sync
+    last_ext_group_sync = cc_pair.last_time_external_group_sync
+    if last_ext_group_sync is None:
+        return True
+
+    source_sync_period = EXTERNAL_GROUP_SYNC_PERIODS.get(cc_pair.connector.source)
+
+    # If EXTERNAL_GROUP_SYNC_PERIODS is None, we always run the sync.
+    if not source_sync_period:
+        return True
+
+    # If the last sync is greater than the full fetch period, we run the sync
+    next_sync = last_ext_group_sync + timedelta(seconds=source_sync_period)
+    if datetime.now(timezone.utc) >= next_sync:
+        return True
+
+    return False
+
+
+@shared_task(
+    name="check_for_external_group_sync",
+    soft_time_limit=JOB_TIMEOUT,
+    bind=True,
+)
+def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> None:
+    r = get_redis_client(tenant_id=tenant_id)
+
+    lock_beat = r.lock(
+        DanswerRedisLocks.CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK,
+        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
+    )
+
+    try:
+        # these tasks should never overlap
+        if not lock_beat.acquire(blocking=False):
+            return
+
+        cc_pair_ids_to_sync: list[int] = []
+        with get_session_with_tenant(tenant_id) as db_session:
+            cc_pairs = get_all_auto_sync_cc_pairs(db_session)
+
+            for cc_pair in cc_pairs:
+                if _is_external_group_sync_due(cc_pair):
+                    cc_pair_ids_to_sync.append(cc_pair.id)
+
+        for cc_pair_id in cc_pair_ids_to_sync:
+            tasks_created = try_creating_external_group_sync_task(
+                self.app, cc_pair_id, r, tenant_id
+            )
+            if not tasks_created:
+                continue
+
+            task_logger.info(f"External group sync queued: cc_pair={cc_pair_id}")
+    except SoftTimeLimitExceeded:
+        task_logger.info(
+            "Soft time limit exceeded, task is being terminated gracefully."
+        )
+    except Exception:
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
+    finally:
+        if lock_beat.owned():
+            lock_beat.release()
+
+
+def try_creating_external_group_sync_task(
+    app: Celery,
+    cc_pair_id: int,
+    r: Redis,
+    tenant_id: str | None,
+) -> int | None:
+    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
+    Returns None if no syncing is required."""
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    LOCK_TIMEOUT = 30
+
+    lock = r.lock(
+        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_generate_external_group_sync_tasks",
+        timeout=LOCK_TIMEOUT,
+    )
+
+    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
+    if not acquired:
+        return None
+
+    try:
+        # Dont kick off a new sync if the previous one is still running
+        if redis_connector.external_group_sync.fenced:
+            return None
+
+        redis_connector.external_group_sync.generator_clear()
+        redis_connector.external_group_sync.taskset_clear()
+
+        custom_task_id = f"{redis_connector.external_group_sync.taskset_key}_{uuid4()}"
+
+        result = app.send_task(
+            "connector_external_group_sync_generator_task",
+            kwargs=dict(
+                cc_pair_id=cc_pair_id,
+                tenant_id=tenant_id,
+            ),
+            queue=DanswerCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC,
+            task_id=custom_task_id,
+            priority=DanswerCeleryPriority.HIGH,
+        )
+
+        payload = RedisConnectorExternalGroupSyncPayload(
+            started=datetime.now(timezone.utc),
+            celery_task_id=result.id,
+        )
+
+        redis_connector.external_group_sync.set_fence(payload)
+
+    except Exception:
+        task_logger.exception(
+            f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
+        )
+        return None
+    finally:
+        if lock.owned():
+            lock.release()
+
+    return 1
+
+
+@shared_task(
+    name="connector_external_group_sync_generator_task",
+    acks_late=False,
+    soft_time_limit=JOB_TIMEOUT,
+    track_started=True,
+    trail=False,
+    bind=True,
+)
+def connector_external_group_sync_generator_task(
+    self: Task,
+    cc_pair_id: int,
+    tenant_id: str | None,
+) -> None:
+    """
+    Permission sync task that handles external group syncing for a given connector credential pair
+    This task assumes that the task has already been properly fenced
+    """
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    lock: RedisLock = r.lock(
+        DanswerRedisLocks.CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX
+        + f"_{redis_connector.id}",
+        timeout=CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT,
+    )
+
+    try:
+        acquired = lock.acquire(blocking=False)
+        if not acquired:
+            task_logger.warning(
+                f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
+            )
+            return None
+
+        with get_session_with_tenant(tenant_id) as db_session:
+            cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session)
+            if cc_pair is None:
+                raise ValueError(
+                    f"No connector credential pair found for id: {cc_pair_id}"
+                )
+
+            source_type = cc_pair.connector.source
+
+            ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type)
+            if ext_group_sync_func is None:
+                raise ValueError(
+                    f"No external group sync func found for {source_type} for cc_pair: {cc_pair_id}"
+                )
+
+            logger.info(
+                f"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}"
+            )
+
+            external_user_groups: list[ExternalUserGroup] = ext_group_sync_func(cc_pair)
+
+            logger.info(
+                f"Syncing {len(external_user_groups)} external user groups for {source_type}"
+            )
+
+            replace_user__ext_group_for_cc_pair(
+                db_session=db_session,
+                cc_pair_id=cc_pair.id,
+                group_defs=external_user_groups,
+                source=cc_pair.connector.source,
+            )
+            logger.info(
+                f"Synced {len(external_user_groups)} external user groups for {source_type}"
+            )
+
+            mark_cc_pair_as_external_group_synced(db_session, cc_pair.id)
+    except Exception as e:
+        task_logger.exception(
+            f"Failed to run external group sync: cc_pair={cc_pair_id}"
+        )
+
+        redis_connector.external_group_sync.generator_clear()
+        redis_connector.external_group_sync.taskset_clear()
+        raise e
+    finally:
+        # we always want to clear the fence after the task is done or failed so it doesn't get stuck
+        redis_connector.external_group_sync.set_fence(None)
+        if lock.owned():
+            lock.release()
--- a/backend/danswer/background/celery/tasks/indexing/tasks.py
+++ b/backend/danswer/background/celery/tasks/indexing/tasks.py
@@ -2,27 +2,21 @@ from datetime import datetime
 from datetime import timezone
 from http import HTTPStatus
 from time import sleep
-from typing import cast
-from uuid import uuid4

 import redis
+import sentry_sdk
 from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
 from redis import Redis
+from redis.exceptions import LockError
+from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from danswer.background.celery.apps.app_base import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorIndexing
-from danswer.background.celery.celery_redis import RedisConnectorStop
-from danswer.background.celery.tasks.shared.RedisConnectorIndexingFenceData import (
-    RedisConnectorIndexingFenceData,
-)
 from danswer.background.indexing.job_client import SimpleJobClient
 from danswer.background.indexing.run_indexing import run_indexing_entrypoint
-from danswer.background.indexing.run_indexing import RunIndexingCallbackInterface
 from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from danswer.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
 from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
@@ -31,59 +25,136 @@ from danswer.configs.constants import DanswerCeleryPriority
 from danswer.configs.constants import DanswerCeleryQueues
 from danswer.configs.constants import DanswerRedisLocks
 from danswer.configs.constants import DocumentSource
+from danswer.db.connector import mark_ccpair_with_indexing_trigger
 from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
 from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
 from danswer.db.engine import get_db_current_time
 from danswer.db.engine import get_session_with_tenant
 from danswer.db.enums import ConnectorCredentialPairStatus
+from danswer.db.enums import IndexingMode
 from danswer.db.enums import IndexingStatus
 from danswer.db.enums import IndexModelStatus
 from danswer.db.index_attempt import create_index_attempt
+from danswer.db.index_attempt import delete_index_attempt
+from danswer.db.index_attempt import get_all_index_attempts_by_status
 from danswer.db.index_attempt import get_index_attempt
 from danswer.db.index_attempt import get_last_attempt_for_cc_pair
+from danswer.db.index_attempt import mark_attempt_canceled
 from danswer.db.index_attempt import mark_attempt_failed
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import IndexAttempt
 from danswer.db.models import SearchSettings
+from danswer.db.search_settings import get_active_search_settings
 from danswer.db.search_settings import get_current_search_settings
-from danswer.db.search_settings import get_secondary_search_settings
 from danswer.db.swap_index import check_index_swap
+from danswer.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from danswer.natural_language_processing.search_nlp_models import EmbeddingModel
 from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_index import RedisConnectorIndex
+from danswer.redis.redis_connector_index import RedisConnectorIndexPayload
 from danswer.redis.redis_pool import get_redis_client
 from danswer.utils.logger import setup_logger
 from danswer.utils.variable_functionality import global_version
 from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
 from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
 from shared_configs.configs import MULTI_TENANT
+from shared_configs.configs import SENTRY_DSN

 logger = setup_logger()


-class RunIndexingCallback(RunIndexingCallbackInterface):
+class IndexingCallback(IndexingHeartbeatInterface):
    def __init__(
        self,
        stop_key: str,
        generator_progress_key: str,
-        redis_lock: redis.lock.Lock,
+        redis_lock: RedisLock,
        redis_client: Redis,
    ):
        super().__init__()
-        self.redis_lock: redis.lock.Lock = redis_lock
+        self.redis_lock: RedisLock = redis_lock
        self.stop_key: str = stop_key
        self.generator_progress_key: str = generator_progress_key
        self.redis_client = redis_client
+        self.started: datetime = datetime.now(timezone.utc)
+        self.redis_lock.reacquire()
+
+        self.last_tag: str = "IndexingCallback.__init__"
+        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)

    def should_stop(self) -> bool:
        if self.redis_client.exists(self.stop_key):
            return True
        return False

-    def progress(self, amount: int) -> None:
-        self.redis_lock.reacquire()
+    def progress(self, tag: str, amount: int) -> None:
+        try:
+            self.redis_lock.reacquire()
+            self.last_tag = tag
+            self.last_lock_reacquire = datetime.now(timezone.utc)
+        except LockError:
+            logger.exception(
+                f"IndexingCallback - lock.reacquire exceptioned. "
+                f"lock_timeout={self.redis_lock.timeout} "
+                f"start={self.started} "
+                f"last_tag={self.last_tag} "
+                f"last_reacquired={self.last_lock_reacquire} "
+                f"now={datetime.now(timezone.utc)}"
+            )
+            raise
+
        self.redis_client.incrby(self.generator_progress_key, amount)


+def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[int]:
+    """Gets a list of unfenced index attempts. Should not be possible, so we'd typically
+    want to clean them up.
+
+    Unfenced = attempt not in terminal state and fence does not exist.
+    """
+    unfenced_attempts: list[int] = []
+
+    # inner/outer/inner double check pattern to avoid race conditions when checking for
+    # bad state
+    # inner = index_attempt in non terminal state
+    # outer = r.fence_key down
+
+    # check the db for index attempts in a non terminal state
+    attempts: list[IndexAttempt] = []
+    attempts.extend(
+        get_all_index_attempts_by_status(IndexingStatus.NOT_STARTED, db_session)
+    )
+    attempts.extend(
+        get_all_index_attempts_by_status(IndexingStatus.IN_PROGRESS, db_session)
+    )
+
+    for attempt in attempts:
+        fence_key = RedisConnectorIndex.fence_key_with_ids(
+            attempt.connector_credential_pair_id, attempt.search_settings_id
+        )
+
+        # if the fence is down / doesn't exist, possible error but not confirmed
+        if r.exists(fence_key):
+            continue
+
+        # Between the time the attempts are first looked up and the time we see the fence down,
+        # the attempt may have completed and taken down the fence normally.
+
+        # We need to double check that the index attempt is still in a non terminal state
+        # and matches the original state, which confirms we are really in a bad state.
+        attempt_2 = get_index_attempt(db_session, attempt.id)
+        if not attempt_2:
+            continue
+
+        if attempt.status != attempt_2.status:
+            continue
+
+        unfenced_attempts.append(attempt.id)
+
+    return unfenced_attempts
+
+
@shared_task(
    name="check_for_indexing",
    soft_time_limit=300,
@@ -91,10 +162,10 @@ class RunIndexingCallback(RunIndexingCallbackInterface):
 )
 def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
    tasks_created = 0
-
+    locked = False
    r = get_redis_client(tenant_id=tenant_id)

-    lock_beat = r.lock(
+    lock_beat: RedisLock = r.lock(
        DanswerRedisLocks.CHECK_INDEXING_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )
@@ -104,44 +175,49 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        if not lock_beat.acquire(blocking=False):
            return None

+        locked = True
+
+        # check for search settings swap
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-            check_index_swap(db_session=db_session)
+            old_search_settings = check_index_swap(db_session=db_session)
            current_search_settings = get_current_search_settings(db_session)
            # So that the first time users aren't surprised by really slow speed of first
            # batch of documents indexed
            if current_search_settings.provider_type is None and not MULTI_TENANT:
-                embedding_model = EmbeddingModel.from_db_model(
-                    search_settings=current_search_settings,
-                    server_host=INDEXING_MODEL_SERVER_HOST,
-                    server_port=INDEXING_MODEL_SERVER_PORT,
-                )
-                warm_up_bi_encoder(
-                    embedding_model=embedding_model,
-                )
+                if old_search_settings:
+                    embedding_model = EmbeddingModel.from_db_model(
+                        search_settings=current_search_settings,
+                        server_host=INDEXING_MODEL_SERVER_HOST,
+                        server_port=INDEXING_MODEL_SERVER_PORT,
+                    )

+                    # only warm up if search settings were changed
+                    warm_up_bi_encoder(
+                        embedding_model=embedding_model,
+                    )
+
+        # gather cc_pair_ids
        cc_pair_ids: list[int] = []
        with get_session_with_tenant(tenant_id) as db_session:
+            lock_beat.reacquire()
            cc_pairs = fetch_connector_credential_pairs(db_session)
            for cc_pair_entry in cc_pairs:
                cc_pair_ids.append(cc_pair_entry.id)

+        # kick off index attempts
        for cc_pair_id in cc_pair_ids:
+            lock_beat.reacquire()
+
+            redis_connector = RedisConnector(tenant_id, cc_pair_id)
            with get_session_with_tenant(tenant_id) as db_session:
-                # Get the primary search settings
-                primary_search_settings = get_current_search_settings(db_session)
-                search_settings = [primary_search_settings]
-
-                # Check for secondary search settings
-                secondary_search_settings = get_secondary_search_settings(db_session)
-                if secondary_search_settings is not None:
-                    # If secondary settings exist, add them to the list
-                    search_settings.append(secondary_search_settings)
-
-                for search_settings_instance in search_settings:
-                    rci = RedisConnectorIndexing(
-                        cc_pair_id, search_settings_instance.id
+                search_settings_list: list[SearchSettings] = get_active_search_settings(
+                    db_session
+                )
+                for search_settings_instance in search_settings_list:
+                    redis_connector_index = redis_connector.new_index(
+                        search_settings_instance.id
                    )
-                    if r.exists(rci.fence_key):
+                    if redis_connector_index.fenced:
                        continue

                    cc_pair = get_connector_credential_pair_from_id(
@@ -153,31 +229,80 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
                    last_attempt = get_last_attempt_for_cc_pair(
                        cc_pair.id, search_settings_instance.id, db_session
                    )
+
+                    search_settings_primary = False
+                    if search_settings_instance.id == search_settings_list[0].id:
+                        search_settings_primary = True
+
                    if not _should_index(
                        cc_pair=cc_pair,
                        last_index=last_attempt,
                        search_settings_instance=search_settings_instance,
-                        secondary_index_building=len(search_settings) > 1,
+                        search_settings_primary=search_settings_primary,
+                        secondary_index_building=len(search_settings_list) > 1,
                        db_session=db_session,
                    ):
                        continue

+                    reindex = False
+                    if search_settings_instance.id == search_settings_list[0].id:
+                        # the indexing trigger is only checked and cleared with the primary search settings
+                        if cc_pair.indexing_trigger is not None:
+                            if cc_pair.indexing_trigger == IndexingMode.REINDEX:
+                                reindex = True
+
+                            task_logger.info(
+                                f"Connector indexing manual trigger detected: "
+                                f"cc_pair={cc_pair.id} "
+                                f"search_settings={search_settings_instance.id} "
+                                f"indexing_mode={cc_pair.indexing_trigger}"
+                            )
+
+                            mark_ccpair_with_indexing_trigger(
+                                cc_pair.id, None, db_session
+                            )
+
                    # using a task queue and only allowing one task per cc_pair/search_setting
                    # prevents us from starving out certain attempts
                    attempt_id = try_creating_indexing_task(
                        self.app,
                        cc_pair,
                        search_settings_instance,
-                        False,
+                        reindex,
                        db_session,
                        r,
                        tenant_id,
                    )
                    if attempt_id:
                        task_logger.info(
-                            f"Indexing queued: cc_pair={cc_pair.id} index_attempt={attempt_id}"
+                            f"Connector indexing queued: "
+                            f"index_attempt={attempt_id} "
+                            f"cc_pair={cc_pair.id} "
+                            f"search_settings={search_settings_instance.id}"
                        )
                        tasks_created += 1
+
+        # Fail any index attempts in the DB that don't have fences
+        # This shouldn't ever happen!
+        with get_session_with_tenant(tenant_id) as db_session:
+            unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
+            for attempt_id in unfenced_attempt_ids:
+                lock_beat.reacquire()
+
+                attempt = get_index_attempt(db_session, attempt_id)
+                if not attempt:
+                    continue
+
+                failure_reason = (
+                    f"Unfenced index attempt found in DB: "
+                    f"index_attempt={attempt.id} "
+                    f"cc_pair={attempt.connector_credential_pair_id} "
+                    f"search_settings={attempt.search_settings_id}"
+                )
+                task_logger.error(failure_reason)
+                mark_attempt_failed(
+                    attempt.id, db_session, failure_reason=failure_reason
+                )
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -185,8 +310,14 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
    except Exception:
        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
-        if lock_beat.owned():
-            lock_beat.release()
+        if locked:
+            if lock_beat.owned():
+                lock_beat.release()
+            else:
+                task_logger.error(
+                    "check_for_indexing - Lock not owned on completion: "
+                    f"tenant={tenant_id}"
+                )

    return tasks_created

@@ -195,6 +326,7 @@ def _should_index(
    cc_pair: ConnectorCredentialPair,
    last_index: IndexAttempt | None,
    search_settings_instance: SearchSettings,
+    search_settings_primary: bool,
    secondary_index_building: bool,
    db_session: Session,
 ) -> bool:
@@ -259,6 +391,11 @@ def _should_index(
    ):
        return False

+    if search_settings_primary:
+        if cc_pair.indexing_trigger is not None:
+            # if a manual indexing trigger is on the cc pair, honor it for primary search settings
+            return True
+
    # if no attempt has ever occurred, we should index regardless of refresh_freq
    if not last_index:
        return True
@@ -291,10 +428,11 @@ def try_creating_indexing_task(
    """

    LOCK_TIMEOUT = 30
+    index_attempt_id: int | None = None

    # we need to serialize any attempt to trigger indexing since it can be triggered
    # either via celery beat or manually (API call)
-    lock = r.lock(
+    lock: RedisLock = r.lock(
        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_indexing_task",
        timeout=LOCK_TIMEOUT,
    )
@@ -304,15 +442,15 @@ def try_creating_indexing_task(
        return None

    try:
-        rci = RedisConnectorIndexing(cc_pair.id, search_settings.id)
+        redis_connector = RedisConnector(tenant_id, cc_pair.id)
+        redis_connector_index = redis_connector.new_index(search_settings.id)

        # skip if already indexing
-        if r.exists(rci.fence_key):
+        if redis_connector_index.fenced:
            return None

        # skip indexing if the cc_pair is deleting
-        rcd = RedisConnectorDeletion(cc_pair.id)
-        if r.exists(rcd.fence_key):
+        if redis_connector.delete.fenced:
            return None

        db_session.refresh(cc_pair)
@@ -320,19 +458,17 @@ def try_creating_indexing_task(
            return None

        # add a long running generator task to the queue
-        r.delete(rci.generator_complete_key)
-        r.delete(rci.taskset_key)
-
-        custom_task_id = f"{rci.generator_task_id_prefix}_{uuid4()}"
+        redis_connector_index.generator_clear()

        # set a basic fence to start
-        fence_value = RedisConnectorIndexingFenceData(
+        payload = RedisConnectorIndexPayload(
            index_attempt_id=None,
            started=None,
            submitted=datetime.now(timezone.utc),
            celery_task_id=None,
        )
-        r.set(rci.fence_key, fence_value.model_dump_json())
+
+        redis_connector_index.set_fence(payload)

        # create the index attempt for tracking purposes
        # code elsewhere checks for index attempts without an associated redis key
@@ -345,6 +481,10 @@ def try_creating_indexing_task(
            db_session=db_session,
        )

+        custom_task_id = redis_connector_index.generate_generator_task_id()
+
+        # when the task is sent, we have yet to finish setting up the fence
+        # therefore, the task must contain code that blocks until the fence is ready
        result = celery_app.send_task(
            "connector_indexing_proxy_task",
            kwargs=dict(
@@ -361,17 +501,20 @@ def try_creating_indexing_task(
            raise RuntimeError("send_task for connector_indexing_proxy_task failed.")

        # now fill out the fence with the rest of the data
-        fence_value.index_attempt_id = index_attempt_id
-        fence_value.celery_task_id = result.id
-        r.set(rci.fence_key, fence_value.model_dump_json())
+        payload.index_attempt_id = index_attempt_id
+        payload.celery_task_id = result.id
+        redis_connector_index.set_fence(payload)
    except Exception:
-        r.delete(rci.fence_key)
        task_logger.exception(
-            f"Unexpected exception: "
+            f"try_creating_indexing_task - Unexpected exception: "
            f"tenant={tenant_id} "
            f"cc_pair={cc_pair.id} "
            f"search_settings={search_settings.id}"
        )
+
+        if index_attempt_id is not None:
+            delete_index_attempt(db_session, index_attempt_id)
+        redis_connector_index.set_fence(None)
        return None
    finally:
        if lock.owned():
@@ -380,19 +523,31 @@ def try_creating_indexing_task(
    return index_attempt_id


-@shared_task(name="connector_indexing_proxy_task", acks_late=False, track_started=True)
+@shared_task(
+    name="connector_indexing_proxy_task", bind=True, acks_late=False, track_started=True
+)
 def connector_indexing_proxy_task(
+    self: Task,
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
    tenant_id: str | None,
 ) -> None:
    """celery tasks are forked, but forking is unstable.  This proxies work to a spawned task."""
+    task_logger.info(
+        f"Indexing watchdog - starting: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
+    )
+
+    if not self.request.id:
+        task_logger.error("self.request.id is None!")

    client = SimpleJobClient()

    job = client.submit(
-        connector_indexing_task,
+        connector_indexing_task_wrapper,
        index_attempt_id,
        cc_pair_id,
        search_settings_id,
@@ -402,32 +557,113 @@ def connector_indexing_proxy_task(
    )

    if not job:
+        task_logger.info(
+            f"Indexing watchdog - spawn failed: attempt={index_attempt_id} "
+            f"tenant={tenant_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
+        )
        return

+    task_logger.info(
+        f"Indexing watchdog - spawn succeeded: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
+    )
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    redis_connector_index = redis_connector.new_index(search_settings_id)
+
    while True:
-        sleep(10)
-        with get_session_with_tenant(tenant_id) as db_session:
-            index_attempt = get_index_attempt(
-                db_session=db_session, index_attempt_id=index_attempt_id
+        sleep(5)
+
+        if self.request.id and redis_connector_index.terminating(self.request.id):
+            task_logger.warning(
+                "Indexing proxy - termination signal detected: "
+                f"attempt={index_attempt_id} "
+                f"tenant={tenant_id} "
+                f"cc_pair={cc_pair_id} "
+                f"search_settings={search_settings_id}"
            )

-            # do nothing for ongoing jobs that haven't been stopped
-            if not job.done():
+            with get_session_with_tenant(tenant_id) as db_session:
+                mark_attempt_canceled(
+                    index_attempt_id,
+                    db_session,
+                    "Connector termination signal detected",
+                )
+
+            job.cancel()
+            break
+
+        # do nothing for ongoing jobs that haven't been stopped
+        if not job.done():
+            with get_session_with_tenant(tenant_id) as db_session:
+                index_attempt = get_index_attempt(
+                    db_session=db_session, index_attempt_id=index_attempt_id
+                )
+
                if not index_attempt:
                    continue

                if not index_attempt.is_finished():
                    continue

-            if job.status == "error":
-                logger.error(job.exception())
+        if job.status == "error":
+            task_logger.error(
+                f"Indexing watchdog - spawned task exceptioned: "
+                f"attempt={index_attempt_id} "
+                f"tenant={tenant_id} "
+                f"cc_pair={cc_pair_id} "
+                f"search_settings={search_settings_id} "
+                f"error={job.exception()}"
+            )

-            job.release()
-            break
+        job.release()
+        break

+    task_logger.info(
+        f"Indexing watchdog - finished: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
+    )
    return


+def connector_indexing_task_wrapper(
+    index_attempt_id: int,
+    cc_pair_id: int,
+    search_settings_id: int,
+    tenant_id: str | None,
+    is_ee: bool,
+) -> int | None:
+    """Just wraps connector_indexing_task so we can log any exceptions before
+    re-raising it."""
+    result: int | None = None
+
+    try:
+        result = connector_indexing_task(
+            index_attempt_id,
+            cc_pair_id,
+            search_settings_id,
+            tenant_id,
+            is_ee,
+        )
+    except:
+        logger.exception(
+            f"connector_indexing_task exceptioned: "
+            f"tenant={tenant_id} "
+            f"index_attempt={index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
+        )
+        raise
+
+    return result
+
+
 def connector_indexing_task(
    index_attempt_id: int,
    cc_pair_id: int,
@@ -446,78 +682,99 @@ def connector_indexing_task(

    Returns None if the task did not run (possibly due to a conflict).
    Otherwise, returns an int >= 0 representing the number of indexed docs.
+
+    NOTE: if an exception is raised out of this task, the primary worker will detect
+    that the task transitioned to a "READY" state but the generator_complete_key doesn't exist.
+    This will cause the primary worker to abort the indexing attempt and clean up.
    """

-    attempt = None
-    n_final_progress = 0
+    # Since connector_indexing_proxy_task spawns a new process using this function as
+    # the entrypoint, we init Sentry here.
+    if SENTRY_DSN:
+        sentry_sdk.init(
+            dsn=SENTRY_DSN,
+            traces_sample_rate=0.1,
+        )
+        logger.info("Sentry initialized")
+    else:
+        logger.debug("Sentry DSN not provided, skipping Sentry initialization")
+
+    logger.info(
+        f"Indexing spawned task starting: "
+        f"attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
+    )
+
+    attempt_found = False
+    n_final_progress: int | None = None
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    redis_connector_index = redis_connector.new_index(search_settings_id)

    r = get_redis_client(tenant_id=tenant_id)

-    rcd = RedisConnectorDeletion(cc_pair_id)
-    if r.exists(rcd.fence_key):
+    if redis_connector.delete.fenced:
        raise RuntimeError(
            f"Indexing will not start because connector deletion is in progress: "
+            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
-            f"fence={rcd.fence_key}"
+            f"fence={redis_connector.delete.fence_key}"
        )

-    rcs = RedisConnectorStop(cc_pair_id)
-    if r.exists(rcs.fence_key):
+    if redis_connector.stop.fenced:
        raise RuntimeError(
            f"Indexing will not start because a connector stop signal was detected: "
+            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
-            f"fence={rcs.fence_key}"
+            f"fence={redis_connector.stop.fence_key}"
        )

-    rci = RedisConnectorIndexing(cc_pair_id, search_settings_id)
-
    while True:
-        # read related data and evaluate/print task progress
-        fence_value = cast(bytes, r.get(rci.fence_key))
-        if fence_value is None:
+        if not redis_connector_index.fenced:  # The fence must exist
            raise ValueError(
-                f"connector_indexing_task: fence_value not found: fence={rci.fence_key}"
+                f"connector_indexing_task - fence not found: fence={redis_connector_index.fence_key}"
            )

-        try:
-            fence_json = fence_value.decode("utf-8")
-            fence_data = RedisConnectorIndexingFenceData.model_validate_json(
-                cast(str, fence_json)
-            )
-        except ValueError:
-            task_logger.exception(
-                f"connector_indexing_task: fence_data not decodeable: fence={rci.fence_key}"
-            )
-            raise
+        payload = redis_connector_index.payload  # The payload must exist
+        if not payload:
+            raise ValueError("connector_indexing_task: payload invalid or not found")

-        if fence_data.index_attempt_id is None or fence_data.celery_task_id is None:
-            task_logger.info(
-                f"connector_indexing_task - Waiting for fence: fence={rci.fence_key}"
+        if payload.index_attempt_id is None or payload.celery_task_id is None:
+            logger.info(
+                f"connector_indexing_task - Waiting for fence: fence={redis_connector_index.fence_key}"
            )
            sleep(1)
            continue

-        task_logger.info(
-            f"connector_indexing_task - Fence found, continuing...: fence={rci.fence_key}"
+        if payload.index_attempt_id != index_attempt_id:
+            raise ValueError(
+                f"connector_indexing_task - id mismatch. Task may be left over from previous run.: "
+                f"task_index_attempt={index_attempt_id} "
+                f"payload_index_attempt={payload.index_attempt_id}"
+            )
+
+        logger.info(
+            f"connector_indexing_task - Fence found, continuing...: fence={redis_connector_index.fence_key}"
        )
        break

-    lock = r.lock(
-        rci.generator_lock_key,
+    lock: RedisLock = r.lock(
+        redis_connector_index.generator_lock_key,
        timeout=CELERY_INDEXING_LOCK_TIMEOUT,
    )

    acquired = lock.acquire(blocking=False)
    if not acquired:
-        task_logger.warning(
+        logger.warning(
            f"Indexing task already running, exiting...: "
-            f"cc_pair={cc_pair_id} search_settings={search_settings_id}"
+            f"index_attempt={index_attempt_id} cc_pair={cc_pair_id} search_settings={search_settings_id}"
        )
-        # r.set(rci.generator_complete_key, HTTPStatus.CONFLICT.value)
        return None

-    fence_data.started = datetime.now(timezone.utc)
-    r.set(rci.fence_key, fence_data.model_dump_json())
+    payload.started = datetime.now(timezone.utc)
+    redis_connector_index.set_fence(payload)

    try:
        with get_session_with_tenant(tenant_id) as db_session:
@@ -526,6 +783,7 @@ def connector_indexing_task(
                raise ValueError(
                    f"Index attempt not found: index_attempt={index_attempt_id}"
                )
+            attempt_found = True

            cc_pair = get_connector_credential_pair_from_id(
                cc_pair_id=cc_pair_id,
@@ -545,43 +803,52 @@ def connector_indexing_task(
                    f"Credential not found: cc_pair={cc_pair_id} credential={cc_pair.credential_id}"
                )

-            rci = RedisConnectorIndexing(cc_pair_id, search_settings_id)
+        # define a callback class
+        callback = IndexingCallback(
+            redis_connector.stop.fence_key,
+            redis_connector_index.generator_progress_key,
+            lock,
+            r,
+        )

-            # define a callback class
-            callback = RunIndexingCallback(
-                rcs.fence_key, rci.generator_progress_key, lock, r
-            )
+        logger.info(
+            f"Indexing spawned task running entrypoint: attempt={index_attempt_id} "
+            f"tenant={tenant_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
+        )

-            run_indexing_entrypoint(
-                index_attempt_id,
-                tenant_id,
-                cc_pair_id,
-                is_ee,
-                callback=callback,
-            )
+        run_indexing_entrypoint(
+            index_attempt_id,
+            tenant_id,
+            cc_pair_id,
+            is_ee,
+            callback=callback,
+        )

-            # get back the total number of indexed docs and return it
-            generator_progress_value = r.get(rci.generator_progress_key)
-            if generator_progress_value is not None:
-                try:
-                    n_final_progress = int(cast(int, generator_progress_value))
-                except ValueError:
-                    pass
-
-            r.set(rci.generator_complete_key, HTTPStatus.OK.value)
+        # get back the total number of indexed docs and return it
+        n_final_progress = redis_connector_index.get_progress()
+        redis_connector_index.set_generator_complete(HTTPStatus.OK.value)
    except Exception as e:
-        task_logger.exception(f"Indexing failed: cc_pair={cc_pair_id}")
-        if attempt:
+        logger.exception(
+            f"Indexing spawned task failed: attempt={index_attempt_id} "
+            f"tenant={tenant_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
+        )
+        if attempt_found:
            with get_session_with_tenant(tenant_id) as db_session:
-                mark_attempt_failed(attempt, db_session, failure_reason=str(e))
+                mark_attempt_failed(index_attempt_id, db_session, failure_reason=str(e))

-        r.delete(rci.generator_lock_key)
-        r.delete(rci.generator_progress_key)
-        r.delete(rci.taskset_key)
-        r.delete(rci.fence_key)
        raise e
    finally:
        if lock.owned():
            lock.release()

+    logger.info(
+        f"Indexing spawned task finished: attempt={index_attempt_id} "
+        f"tenant={tenant_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
+    )
    return n_final_progress
--- a/backend/danswer/background/celery/tasks/pruning/tasks.py
+++ b/backend/danswer/background/celery/tasks/pruning/tasks.py
@@ -11,11 +11,8 @@ from redis import Redis
 from sqlalchemy.orm import Session

 from danswer.background.celery.apps.app_base import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisConnectorStop
 from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector
-from danswer.background.celery.tasks.indexing.tasks import RunIndexingCallback
+from danswer.background.celery.tasks.indexing.tasks import IndexingCallback
 from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from danswer.configs.app_configs import JOB_TIMEOUT
 from danswer.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
@@ -33,6 +30,7 @@ from danswer.db.document import get_documents_for_connector_credential_pair
 from danswer.db.engine import get_session_with_tenant
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.models import ConnectorCredentialPair
+from danswer.redis.redis_connector import RedisConnector
 from danswer.redis.redis_pool import get_redis_client
 from danswer.utils.logger import pruning_ctx
 from danswer.utils.logger import setup_logger
@@ -40,6 +38,42 @@ from danswer.utils.logger import setup_logger
 logger = setup_logger()


+def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
+    """Returns boolean indicating if pruning is due.
+
+    Next pruning time is calculated as a delta from the last successful prune, or the
+    last successful indexing if pruning has never succeeded.
+
+    TODO(rkuo): consider whether we should allow pruning to be immediately rescheduled
+    if pruning fails (which is what it does now). A backoff could be reasonable.
+    """
+
+    # skip pruning if no prune frequency is set
+    # pruning can still be forced via the API which will run a pruning task directly
+    if not cc_pair.connector.prune_freq:
+        return False
+
+    # skip pruning if not active
+    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
+        return False
+
+    # skip pruning if the next scheduled prune time hasn't been reached yet
+    last_pruned = cc_pair.last_pruned
+    if not last_pruned:
+        if not cc_pair.last_successful_index_time:
+            # if we've never indexed, we can't prune
+            return False
+
+        # if never pruned, use the last time the connector indexed successfully
+        last_pruned = cc_pair.last_successful_index_time
+
+    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)
+    if datetime.now(timezone.utc) < next_prune:
+        return False
+
+    return True
+
+
@shared_task(
    name="check_for_pruning",
    soft_time_limit=JOB_TIMEOUT,
@@ -71,7 +105,7 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> None:
                if not cc_pair:
                    continue

-                if not is_pruning_due(cc_pair, db_session, r):
+                if not _is_pruning_due(cc_pair):
                    continue

                tasks_created = try_creating_prune_generator_task(
@@ -92,47 +126,6 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> None:
            lock_beat.release()


-def is_pruning_due(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-) -> bool:
-    """Returns an int if pruning is triggered.
-    The int represents the number of prune tasks generated (in this case, only one
-    because the task is a long running generator task.)
-    Returns None if no pruning is triggered (due to not being needed or
-    other reasons such as simultaneous pruning restrictions.
-
-    Checks for scheduling related conditions, then delegates the rest of the checks to
-    try_creating_prune_generator_task.
-    """
-
-    # skip pruning if no prune frequency is set
-    # pruning can still be forced via the API which will run a pruning task directly
-    if not cc_pair.connector.prune_freq:
-        return False
-
-    # skip pruning if not active
-    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
-        return False
-
-    # skip pruning if the next scheduled prune time hasn't been reached yet
-    last_pruned = cc_pair.last_pruned
-    if not last_pruned:
-        if not cc_pair.last_successful_index_time:
-            # if we've never indexed, we can't prune
-            return False
-
-        # if never pruned, use the last time the connector indexed successfully
-        last_pruned = cc_pair.last_successful_index_time
-
-    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)
-    if datetime.now(timezone.utc) < next_prune:
-        return False
-
-    return True
-
-
 def try_creating_prune_generator_task(
    celery_app: Celery,
    cc_pair: ConnectorCredentialPair,
@@ -147,8 +140,11 @@ def try_creating_prune_generator_task(
    is used to trigger prunes immediately, e.g. via the web ui.
    """

+    redis_connector = RedisConnector(tenant_id, cc_pair.id)
+
    if not ALLOW_SIMULTANEOUS_PRUNING:
-        for key in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
+        count = redis_connector.prune.get_active_task_count()
+        if count > 0:
            return None

    LOCK_TIMEOUT = 30
@@ -165,15 +161,16 @@ def try_creating_prune_generator_task(
        return None

    try:
-        rcp = RedisConnectorPruning(cc_pair.id)
-
        # skip pruning if already pruning
-        if r.exists(rcp.fence_key):
+        if redis_connector.prune.fenced:
            return None

        # skip pruning if the cc_pair is deleting
-        rcd = RedisConnectorDeletion(cc_pair.id)
-        if r.exists(rcd.fence_key):
+        if redis_connector.delete.fenced:
+            return None
+
+        # skip pruning if doc permissions sync is running
+        if redis_connector.permissions.fenced:
            return None

        db_session.refresh(cc_pair)
@@ -181,10 +178,10 @@ def try_creating_prune_generator_task(
            return None

        # add a long running generator task to the queue
-        r.delete(rcp.generator_complete_key)
-        r.delete(rcp.taskset_key)
+        redis_connector.prune.generator_clear()
+        redis_connector.prune.taskset_clear()

-        custom_task_id = f"{rcp.generator_task_id_prefix}_{uuid4()}"
+        custom_task_id = f"{redis_connector.prune.generator_task_key}_{uuid4()}"

        celery_app.send_task(
            "connector_pruning_generator_task",
@@ -200,7 +197,7 @@ def try_creating_prune_generator_task(
        )

        # set this only after all tasks have been added
-        r.set(rcp.fence_key, 1)
+        redis_connector.prune.set_fence(True)
    except Exception:
        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}")
        return None
@@ -235,12 +232,14 @@ def connector_pruning_generator_task(
    pruning_ctx_dict["request_id"] = self.request.id
    pruning_ctx.set(pruning_ctx_dict)

-    rcp = RedisConnectorPruning(cc_pair_id)
+    task_logger.info(f"Pruning generator starting: cc_pair={cc_pair_id}")
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)

    r = get_redis_client(tenant_id=tenant_id)

    lock = r.lock(
-        DanswerRedisLocks.PRUNING_LOCK_PREFIX + f"_{rcp._id}",
+        DanswerRedisLocks.PRUNING_LOCK_PREFIX + f"_{redis_connector.id}",
        timeout=CELERY_PRUNING_LOCK_TIMEOUT,
    )

@@ -265,6 +264,11 @@ def connector_pruning_generator_task(
                )
                return

+            task_logger.info(
+                f"Pruning generator running connector: "
+                f"cc_pair={cc_pair_id} "
+                f"connector_source={cc_pair.connector.source}"
+            )
            runnable_connector = instantiate_connector(
                db_session,
                cc_pair.connector.source,
@@ -273,11 +277,13 @@ def connector_pruning_generator_task(
                cc_pair.credential,
            )

-            rcs = RedisConnectorStop(cc_pair_id)
-
-            callback = RunIndexingCallback(
-                rcs.fence_key, rcp.generator_progress_key, lock, r
+            callback = IndexingCallback(
+                redis_connector.stop.fence_key,
+                redis_connector.prune.generator_progress_key,
+                lock,
+                r,
            )
+
            # a list of docs in the source
            all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector(
                runnable_connector, callback
@@ -299,36 +305,34 @@ def connector_pruning_generator_task(
            task_logger.info(
                f"Pruning set collected: "
                f"cc_pair={cc_pair_id} "
-                f"docs_to_remove={len(doc_ids_to_remove)} "
-                f"doc_source={cc_pair.connector.source}"
+                f"connector_source={cc_pair.connector.source} "
+                f"docs_to_remove={len(doc_ids_to_remove)}"
            )

-            rcp.documents_to_prune = set(doc_ids_to_remove)
-
            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks starting. cc_pair={cc_pair.id}"
+                f"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}"
            )
-            tasks_generated = rcp.generate_tasks(
-                self.app, db_session, r, None, tenant_id
+            tasks_generated = redis_connector.prune.generate_tasks(
+                set(doc_ids_to_remove), self.app, db_session, None
            )
            if tasks_generated is None:
                return None

            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks finished. "
-                f"cc_pair={cc_pair.id} tasks_generated={tasks_generated}"
+                f"RedisConnector.prune.generate_tasks finished. "
+                f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}"
            )

-            r.set(rcp.generator_complete_key, tasks_generated)
+            redis_connector.prune.generator_complete = tasks_generated
    except Exception as e:
        task_logger.exception(
            f"Failed to run pruning: cc_pair={cc_pair_id} connector={connector_id}"
        )

-        r.delete(rcp.generator_progress_key)
-        r.delete(rcp.taskset_key)
-        r.delete(rcp.fence_key)
+        redis_connector.prune.reset()
        raise e
    finally:
        if lock.owned():
            lock.release()
+
+        task_logger.info(f"Pruning generator finished: cc_pair={cc_pair_id}")
--- a/backend/danswer/background/celery/tasks/shared/RedisConnectorDeletionFenceData.py
+++ b/backend/danswer/background/celery/tasks/shared/RedisConnectorDeletionFenceData.py
@@ -1,8 +0,0 @@
-from datetime import datetime
-
-from pydantic import BaseModel
-
-
-class RedisConnectorDeletionFenceData(BaseModel):
-    num_tasks: int | None
-    submitted: datetime
--- a/backend/danswer/background/celery/tasks/shared/RedisConnectorIndexingFenceData.py
+++ b/backend/danswer/background/celery/tasks/shared/RedisConnectorIndexingFenceData.py
@@ -1,10 +0,0 @@
-from datetime import datetime
-
-from pydantic import BaseModel
-
-
-class RedisConnectorIndexingFenceData(BaseModel):
-    index_attempt_id: int | None
-    started: datetime | None
-    submitted: datetime
-    celery_task_id: str | None
--- a/backend/danswer/background/celery/tasks/shared/tasks.py
+++ b/backend/danswer/background/celery/tasks/shared/tasks.py
@@ -59,7 +59,7 @@ def document_by_cc_pair_cleanup_task(
    connector / credential pair from the access list
    (6) delete all relevant entries from postgres
    """
-    task_logger.info(f"tenant={tenant_id} doc={document_id}")
+    task_logger.debug(f"Task start: tenant={tenant_id} doc={document_id}")

    try:
        with get_session_with_tenant(tenant_id) as db_session:
@@ -141,7 +141,9 @@ def document_by_cc_pair_cleanup_task(
        return False
    except Exception as ex:
        if isinstance(ex, RetryError):
-            task_logger.info(f"Retry failed: {ex.last_attempt.attempt_number}")
+            task_logger.warning(
+                f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
+            )

            # only set the inner exception if it is of type Exception
            e_temp = ex.last_attempt.exception()
@@ -171,11 +173,21 @@ def document_by_cc_pair_cleanup_task(
        else:
            # This is the last attempt! mark the document as dirty in the db so that it
            # eventually gets fixed out of band via stale document reconciliation
-            task_logger.info(
-                f"Max retries reached. Marking doc as dirty for reconciliation: "
+            task_logger.warning(
+                f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
                f"tenant={tenant_id} doc={document_id}"
            )
-            with get_session_with_tenant(tenant_id):
+            with get_session_with_tenant(tenant_id) as db_session:
+                # delete the cc pair relationship now and let reconciliation clean it up
+                # in vespa
+                delete_document_by_connector_credential_pair__no_commit(
+                    db_session=db_session,
+                    document_id=document_id,
+                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
+                        connector_id=connector_id,
+                        credential_id=credential_id,
+                    ),
+                )
                mark_document_as_modified(document_id, db_session)
        return False

--- a/backend/danswer/background/celery/tasks/vespa/tasks.py
+++ b/backend/danswer/background/celery/tasks/vespa/tasks.py
@@ -5,7 +5,6 @@ from http import HTTPStatus
 from typing import cast

 import httpx
-import redis
 from celery import Celery
 from celery import shared_task
 from celery import Task
@@ -13,24 +12,13 @@ from celery.exceptions import SoftTimeLimitExceeded
 from celery.result import AsyncResult
 from celery.states import READY_STATES
 from redis import Redis
+from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session
 from tenacity import RetryError

 from danswer.access.access import get_access_for_document
 from danswer.background.celery.apps.app_base import task_logger
 from danswer.background.celery.celery_redis import celery_get_queue_length
-from danswer.background.celery.celery_redis import RedisConnectorCredentialPair
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorIndexing
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisDocumentSet
-from danswer.background.celery.celery_redis import RedisUserGroup
-from danswer.background.celery.tasks.shared.RedisConnectorDeletionFenceData import (
-    RedisConnectorDeletionFenceData,
-)
-from danswer.background.celery.tasks.shared.RedisConnectorIndexingFenceData import (
-    RedisConnectorIndexingFenceData,
-)
 from danswer.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
 from danswer.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
 from danswer.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
@@ -39,6 +27,7 @@ from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
 from danswer.configs.constants import DanswerCeleryQueues
 from danswer.configs.constants import DanswerRedisLocks
 from danswer.db.connector import fetch_connector_by_id
+from danswer.db.connector import mark_cc_pair_as_permissions_synced
 from danswer.db.connector import mark_ccpair_as_pruned
 from danswer.db.connector_credential_pair import add_deletion_failure_message
 from danswer.db.connector_credential_pair import (
@@ -59,15 +48,24 @@ from danswer.db.document_set import mark_document_set_as_synced
 from danswer.db.engine import get_session_with_tenant
 from danswer.db.enums import IndexingStatus
 from danswer.db.index_attempt import delete_index_attempts
-from danswer.db.index_attempt import get_all_index_attempts_by_status
 from danswer.db.index_attempt import get_index_attempt
 from danswer.db.index_attempt import mark_attempt_failed
 from danswer.db.models import DocumentSet
-from danswer.db.models import IndexAttempt
 from danswer.document_index.document_index_utils import get_both_index_names
 from danswer.document_index.factory import get_default_document_index
 from danswer.document_index.interfaces import VespaDocumentFields
+from danswer.redis.redis_connector import RedisConnector
+from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
+from danswer.redis.redis_connector_delete import RedisConnectorDelete
+from danswer.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from danswer.redis.redis_connector_doc_perm_sync import (
+    RedisConnectorPermissionSyncPayload,
+)
+from danswer.redis.redis_connector_index import RedisConnectorIndex
+from danswer.redis.redis_connector_prune import RedisConnectorPrune
+from danswer.redis.redis_document_set import RedisDocumentSet
 from danswer.redis.redis_pool import get_redis_client
+from danswer.redis.redis_usergroup import RedisUserGroup
 from danswer.utils.logger import setup_logger
 from danswer.utils.variable_functionality import fetch_versioned_implementation
 from danswer.utils.variable_functionality import (
@@ -167,7 +165,7 @@ def try_generate_stale_document_sync_tasks(
    celery_app: Celery,
    db_session: Session,
    r: Redis,
-    lock_beat: redis.lock.Lock,
+    lock_beat: RedisLock,
    tenant_id: str | None,
 ) -> int | None:
    # the fence is up, do nothing
@@ -185,30 +183,34 @@ def try_generate_stale_document_sync_tasks(
        f"Stale documents found (at least {stale_doc_count}). Generating sync tasks by cc pair."
    )

-    task_logger.info("RedisConnector.generate_tasks starting by cc_pair.")
+    task_logger.info(
+        "RedisConnector.generate_tasks starting by cc_pair. "
+        "Documents spanning multiple cc_pairs will only be synced once."
+    )
+
+    docs_to_skip: set[str] = set()

    # rkuo: we could technically sync all stale docs in one big pass.
    # but I feel it's more understandable to group the docs by cc_pair
    total_tasks_generated = 0
    cc_pairs = get_connector_credential_pairs(db_session)
    for cc_pair in cc_pairs:
-        rc = RedisConnectorCredentialPair(cc_pair.id)
-        tasks_generated = rc.generate_tasks(
-            celery_app, db_session, r, lock_beat, tenant_id
-        )
+        rc = RedisConnectorCredentialPair(tenant_id, cc_pair.id)
+        rc.set_skip_docs(docs_to_skip)
+        result = rc.generate_tasks(celery_app, db_session, r, lock_beat, tenant_id)

-        if tasks_generated is None:
+        if result is None:
            continue

-        if tasks_generated == 0:
+        if result[1] == 0:
            continue

        task_logger.info(
            f"RedisConnector.generate_tasks finished for single cc_pair. "
-            f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
+            f"cc_pair={cc_pair.id} tasks_generated={result[0]} tasks_possible={result[1]}"
        )

-        total_tasks_generated += tasks_generated
+        total_tasks_generated += result[0]

    task_logger.info(
        f"RedisConnector.generate_tasks finished for all cc_pairs. total_tasks_generated={total_tasks_generated}"
@@ -223,15 +225,15 @@ def try_generate_document_set_sync_tasks(
    document_set_id: int,
    db_session: Session,
    r: Redis,
-    lock_beat: redis.lock.Lock,
+    lock_beat: RedisLock,
    tenant_id: str | None,
 ) -> int | None:
    lock_beat.reacquire()

-    rds = RedisDocumentSet(document_set_id)
+    rds = RedisDocumentSet(tenant_id, document_set_id)

    # don't generate document set sync tasks if tasks are still pending
-    if r.exists(rds.fence_key):
+    if rds.fenced:
        return None

    # don't generate sync tasks if we're up to date
@@ -251,12 +253,11 @@ def try_generate_document_set_sync_tasks(
    )

    # Add all documents that need to be updated into the queue
-    tasks_generated = rds.generate_tasks(
-        celery_app, db_session, r, lock_beat, tenant_id
-    )
-    if tasks_generated is None:
+    result = rds.generate_tasks(celery_app, db_session, r, lock_beat, tenant_id)
+    if result is None:
        return None

+    tasks_generated = result[0]
    # Currently we are allowing the sync to proceed with 0 tasks.
    # It's possible for sets/groups to be generated initially with no entries
    # and they still need to be marked as up to date.
@@ -265,11 +266,11 @@ def try_generate_document_set_sync_tasks(

    task_logger.info(
        f"RedisDocumentSet.generate_tasks finished. "
-        f"document_set_id={document_set.id} tasks_generated={tasks_generated}"
+        f"document_set={document_set.id} tasks_generated={tasks_generated}"
    )

    # set this only after all tasks have been added
-    r.set(rds.fence_key, tasks_generated)
+    rds.set_fence(tasks_generated)
    return tasks_generated


@@ -278,15 +279,14 @@ def try_generate_user_group_sync_tasks(
    usergroup_id: int,
    db_session: Session,
    r: Redis,
-    lock_beat: redis.lock.Lock,
+    lock_beat: RedisLock,
    tenant_id: str | None,
 ) -> int | None:
    lock_beat.reacquire()

-    rug = RedisUserGroup(usergroup_id)
-
-    # don't generate sync tasks if tasks are still pending
-    if r.exists(rug.fence_key):
+    rug = RedisUserGroup(tenant_id, usergroup_id)
+    if rug.fenced:
+        # don't generate sync tasks if tasks are still pending
        return None

    # race condition with the monitor/cleanup function if we use a cached result!
@@ -308,12 +308,11 @@ def try_generate_user_group_sync_tasks(
    task_logger.info(
        f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}"
    )
-    tasks_generated = rug.generate_tasks(
-        celery_app, db_session, r, lock_beat, tenant_id
-    )
-    if tasks_generated is None:
+    result = rug.generate_tasks(celery_app, db_session, r, lock_beat, tenant_id)
+    if result is None:
        return None

+    tasks_generated = result[0]
    # Currently we are allowing the sync to proceed with 0 tasks.
    # It's possible for sets/groups to be generated initially with no entries
    # and they still need to be marked as up to date.
@@ -322,11 +321,11 @@ def try_generate_user_group_sync_tasks(

    task_logger.info(
        f"RedisUserGroup.generate_tasks finished. "
-        f"usergroup_id={usergroup.id} tasks_generated={tasks_generated}"
+        f"usergroup={usergroup.id} tasks_generated={tasks_generated}"
    )

    # set this only after all tasks have been added
-    r.set(rug.fence_key, tasks_generated)
+    rug.set_fence(tasks_generated)
    return tasks_generated


@@ -352,7 +351,7 @@ def monitor_connector_taskset(r: Redis) -> None:


 def monitor_document_set_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    document_set_id_str = RedisDocumentSet.get_id_from_fence_key(fence_key)
@@ -362,16 +361,12 @@ def monitor_document_set_taskset(

    document_set_id = int(document_set_id_str)

-    rds = RedisDocumentSet(document_set_id)
-
-    fence_value = r.get(rds.fence_key)
-    if fence_value is None:
+    rds = RedisDocumentSet(tenant_id, document_set_id)
+    if not rds.fenced:
        return

-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
+    initial_count = rds.payload
+    if initial_count is None:
        return

    count = cast(int, r.scard(rds.taskset_key))
@@ -399,48 +394,38 @@ def monitor_document_set_taskset(
                f"Successfully synced document set: document_set={document_set_id}"
            )

-    r.delete(rds.taskset_key)
-    r.delete(rds.fence_key)
+    rds.reset()


 def monitor_connector_deletion_taskset(
-    key_bytes: bytes, r: Redis, tenant_id: str | None
+    tenant_id: str | None, key_bytes: bytes, r: Redis
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnectorDeletion.get_id_from_fence_key(fence_key)
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
        return

    cc_pair_id = int(cc_pair_id_str)

-    rcd = RedisConnectorDeletion(cc_pair_id)
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    # read related data and evaluate/print task progress
-    fence_value = cast(bytes, r.get(rcd.fence_key))
-    if fence_value is None:
+    fence_data = redis_connector.delete.payload
+    if not fence_data:
+        task_logger.warning(
+            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
+        )
        return

-    try:
-        fence_json = fence_value.decode("utf-8")
-        fence_data = RedisConnectorDeletionFenceData.model_validate_json(
-            cast(str, fence_json)
-        )
-    except ValueError:
-        task_logger.exception(
-            "monitor_ccpair_indexing_taskset: fence_data not decodeable."
-        )
-        raise
-
-    # the fence is setting up but isn't ready yet
    if fence_data.num_tasks is None:
+        # the fence is setting up but isn't ready yet
        return

-    count = cast(int, r.scard(rcd.taskset_key))
+    remaining = redis_connector.delete.get_remaining()
    task_logger.info(
-        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={count} initial={fence_data.num_tasks}"
+        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
    )
-    if count > 0:
+    if remaining > 0:
        return

    with get_session_with_tenant(tenant_id) as db_session:
@@ -456,11 +441,22 @@ def monitor_connector_deletion_taskset(
                db_session, cc_pair.connector_id, cc_pair.credential_id
            )
            if len(doc_ids) > 0:
-                # if this happens, documents somehow got added while deletion was in progress. Likely a bug
-                # gating off pruning and indexing work before deletion starts
+                # NOTE(rkuo): if this happens, documents somehow got added while
+                # deletion was in progress. Likely a bug gating off pruning and indexing
+                # work before deletion starts.
                task_logger.warning(
-                    f"Connector deletion - documents still found after taskset completion: "
-                    f"cc_pair={cc_pair_id} num={len(doc_ids)}"
+                    "Connector deletion - documents still found after taskset completion. "
+                    "Clearing the current deletion attempt and allowing deletion to restart: "
+                    f"cc_pair={cc_pair_id} "
+                    f"docs_deleted={fence_data.num_tasks} "
+                    f"docs_remaining={len(doc_ids)}"
+                )
+
+                # We don't want to waive off why we get into this state, but resetting
+                # our attempt and letting the deletion restart is a good way to recover
+                redis_connector.delete.reset()
+                raise RuntimeError(
+                    "Connector deletion - documents still found after taskset completion"
                )

            # clean up the rest of the related Postgres entities
@@ -524,15 +520,14 @@ def monitor_connector_deletion_taskset(
        f"docs_deleted={fence_data.num_tasks}"
    )

-    r.delete(rcd.taskset_key)
-    r.delete(rcd.fence_key)
+    redis_connector.delete.reset()


 def monitor_ccpair_pruning_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnectorPruning.get_id_from_fence_key(fence_key)
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
        task_logger.warning(
            f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}"
@@ -541,46 +536,76 @@ def monitor_ccpair_pruning_taskset(

    cc_pair_id = int(cc_pair_id_str)

-    rcp = RedisConnectorPruning(cc_pair_id)
-
-    fence_value = r.get(rcp.fence_key)
-    if fence_value is None:
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    if not redis_connector.prune.fenced:
        return

-    generator_value = r.get(rcp.generator_complete_key)
-    if generator_value is None:
+    initial = redis_connector.prune.generator_complete
+    if initial is None:
        return

-    try:
-        initial_count = int(cast(int, generator_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rcp.taskset_key))
+    remaining = redis_connector.prune.get_remaining()
    task_logger.info(
-        f"Connector pruning progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}"
+        f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
    )
-    if count > 0:
+    if remaining > 0:
        return

    mark_ccpair_as_pruned(int(cc_pair_id), db_session)
    task_logger.info(
-        f"Successfully pruned connector credential pair. cc_pair_id={cc_pair_id}"
+        f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}"
    )

-    r.delete(rcp.taskset_key)
-    r.delete(rcp.generator_progress_key)
-    r.delete(rcp.generator_complete_key)
-    r.delete(rcp.fence_key)
+    redis_connector.prune.taskset_clear()
+    redis_connector.prune.generator_clear()
+    redis_connector.prune.set_fence(False)
+
+
+def monitor_ccpair_permissions_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+) -> None:
+    fence_key = key_bytes.decode("utf-8")
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
+    if cc_pair_id_str is None:
+        task_logger.warning(
+            f"monitor_ccpair_permissions_taskset: could not parse cc_pair_id from {fence_key}"
+        )
+        return
+
+    cc_pair_id = int(cc_pair_id_str)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    if not redis_connector.permissions.fenced:
+        return
+
+    initial = redis_connector.permissions.generator_complete
+    if initial is None:
+        return
+
+    remaining = redis_connector.permissions.get_remaining()
+    task_logger.info(
+        f"Permissions sync progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}"
+    )
+    if remaining > 0:
+        return
+
+    payload: RedisConnectorPermissionSyncPayload | None = (
+        redis_connector.permissions.payload
+    )
+    start_time: datetime | None = payload.started if payload else None
+
+    mark_cc_pair_as_permissions_synced(db_session, int(cc_pair_id), start_time)
+    task_logger.info(f"Successfully synced permissions for cc_pair={cc_pair_id}")
+
+    redis_connector.permissions.reset()


 def monitor_ccpair_indexing_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    # if the fence doesn't exist, there's nothing to do
    fence_key = key_bytes.decode("utf-8")
-    composite_id = RedisConnectorIndexing.get_id_from_fence_key(fence_key)
+    composite_id = RedisConnector.get_id_from_fence_key(fence_key)
    if composite_id is None:
        task_logger.warning(
            f"monitor_ccpair_indexing_taskset: could not parse composite_id from {fence_key}"
@@ -595,100 +620,87 @@ def monitor_ccpair_indexing_taskset(
    cc_pair_id = int(parts[0])
    search_settings_id = int(parts[1])

-    rci = RedisConnectorIndexing(cc_pair_id, search_settings_id)
-
-    # read related data and evaluate/print task progress
-    fence_value = cast(bytes, r.get(rci.fence_key))
-    if fence_value is None:
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    redis_connector_index = redis_connector.new_index(search_settings_id)
+    if not redis_connector_index.fenced:
        return

-    try:
-        fence_json = fence_value.decode("utf-8")
-        fence_data = RedisConnectorIndexingFenceData.model_validate_json(
-            cast(str, fence_json)
+    payload = redis_connector_index.payload
+    if not payload:
+        return
+
+    elapsed_submitted = datetime.now(timezone.utc) - payload.submitted
+
+    progress = redis_connector_index.get_progress()
+    if progress is not None:
+        task_logger.info(
+            f"Connector indexing progress: cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id} "
+            f"progress={progress} "
+            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}"
        )
-    except ValueError:
-        task_logger.exception(
-            "monitor_ccpair_indexing_taskset: fence_data not decodeable."
-        )
-        raise

-    elapsed_submitted = datetime.now(timezone.utc) - fence_data.submitted
-
-    generator_progress_value = r.get(rci.generator_progress_key)
-    if generator_progress_value is not None:
-        try:
-            progress_count = int(cast(int, generator_progress_value))
-
-            task_logger.info(
-                f"Connector indexing progress: cc_pair_id={cc_pair_id} "
-                f"search_settings_id={search_settings_id} "
-                f"progress={progress_count} "
-                f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}"
-            )
-        except ValueError:
-            task_logger.error(
-                "monitor_ccpair_indexing_taskset: generator_progress_value is not an integer."
-            )
-
-    if fence_data.index_attempt_id is None or fence_data.celery_task_id is None:
+    if payload.index_attempt_id is None or payload.celery_task_id is None:
        # the task is still setting up
        return

-    # Read result state BEFORE generator_complete_key to avoid a race condition
    # never use any blocking methods on the result from inside a task!
-    result: AsyncResult = AsyncResult(fence_data.celery_task_id)
-    result_state = result.state
+    result: AsyncResult = AsyncResult(payload.celery_task_id)

-    generator_complete_value = r.get(rci.generator_complete_key)
-    if generator_complete_value is None:
-        if result_state in READY_STATES:
-            # IF the task state is READY, THEN generator_complete should be set
-            # if it isn't, then the worker crashed
-            task_logger.info(
-                f"Connector indexing aborted: "
-                f"cc_pair_id={cc_pair_id} "
-                f"search_settings_id={search_settings_id} "
-                f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}"
-            )
+    # inner/outer/inner double check pattern to avoid race conditions when checking for
+    # bad state

-            index_attempt = get_index_attempt(db_session, fence_data.index_attempt_id)
-            if index_attempt:
-                mark_attempt_failed(
-                    index_attempt=index_attempt,
-                    db_session=db_session,
-                    failure_reason="Connector indexing aborted or exceptioned.",
+    # inner = get_completion / generator_complete not signaled
+    # outer = result.state in READY state
+    status_int = redis_connector_index.get_completion()
+    if status_int is None:  # inner signal not set ... possible error
+        result_state = result.state
+        if (
+            result_state in READY_STATES
+        ):  # outer signal in terminal state ... possible error
+            # Now double check!
+            if redis_connector_index.get_completion() is None:
+                # inner signal still not set (and cannot change when outer result_state is READY)
+                # Task is finished but generator complete isn't set.
+                # We have a problem! Worker may have crashed.
+
+                msg = (
+                    f"Connector indexing aborted or exceptioned: "
+                    f"attempt={payload.index_attempt_id} "
+                    f"celery_task={payload.celery_task_id} "
+                    f"result_state={result_state} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id} "
+                    f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}"
                )
+                task_logger.warning(msg)

-            r.delete(rci.generator_lock_key)
-            r.delete(rci.taskset_key)
-            r.delete(rci.generator_progress_key)
-            r.delete(rci.generator_complete_key)
-            r.delete(rci.fence_key)
+                index_attempt = get_index_attempt(db_session, payload.index_attempt_id)
+                if index_attempt:
+                    if (
+                        index_attempt.status != IndexingStatus.CANCELED
+                        and index_attempt.status != IndexingStatus.FAILED
+                    ):
+                        mark_attempt_failed(
+                            index_attempt_id=payload.index_attempt_id,
+                            db_session=db_session,
+                            failure_reason=msg,
+                        )
+
+                redis_connector_index.reset()
        return

-    status_enum = HTTPStatus.INTERNAL_SERVER_ERROR
-    try:
-        status_value = int(cast(int, generator_complete_value))
-        status_enum = HTTPStatus(status_value)
-    except ValueError:
-        task_logger.error(
-            f"monitor_ccpair_indexing_taskset: "
-            f"generator_complete_value=f{generator_complete_value} could not be parsed."
-        )
+    status_enum = HTTPStatus(status_int)

    task_logger.info(
-        f"Connector indexing finished: cc_pair_id={cc_pair_id} "
-        f"search_settings_id={search_settings_id} "
+        f"Connector indexing finished: cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id} "
+        f"progress={progress} "
        f"status={status_enum.name} "
        f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}"
    )

-    r.delete(rci.generator_lock_key)
-    r.delete(rci.taskset_key)
-    r.delete(rci.generator_progress_key)
-    r.delete(rci.generator_complete_key)
-    r.delete(rci.fence_key)
+    redis_connector_index.reset()


@shared_task(name="monitor_vespa_sync", soft_time_limit=300, bind=True)
@@ -700,11 +712,11 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
    This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't
    do anything too expensive in this function!

-    Returns True if the task actually did work, False
+    Returns True if the task actually did work, False if it exited early to prevent overlap
    """
    r = get_redis_client(tenant_id=tenant_id)

-    lock_beat: redis.lock.Lock = r.lock(
+    lock_beat: RedisLock = r.lock(
        DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
    )
@@ -716,7 +728,7 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:

        # print current queue lengths
        r_celery = self.app.broker_connection().channel().client  # type: ignore
-        n_celery = celery_get_queue_length("celery", r)
+        n_celery = celery_get_queue_length("celery", r_celery)
        n_indexing = celery_get_queue_length(
            DanswerCeleryQueues.CONNECTOR_INDEXING, r_celery
        )
@@ -729,49 +741,33 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
        n_pruning = celery_get_queue_length(
            DanswerCeleryQueues.CONNECTOR_PRUNING, r_celery
        )
+        n_permissions_sync = celery_get_queue_length(
+            DanswerCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
+        )

        task_logger.info(
            f"Queue lengths: celery={n_celery} "
            f"indexing={n_indexing} "
            f"sync={n_sync} "
            f"deletion={n_deletion} "
-            f"pruning={n_pruning}"
+            f"pruning={n_pruning} "
+            f"permissions_sync={n_permissions_sync} "
        )

-        # do some cleanup before clearing fences
-        # check the db for any outstanding index attempts
-        with get_session_with_tenant(tenant_id) as db_session:
-            attempts: list[IndexAttempt] = []
-            attempts.extend(
-                get_all_index_attempts_by_status(IndexingStatus.NOT_STARTED, db_session)
-            )
-            attempts.extend(
-                get_all_index_attempts_by_status(IndexingStatus.IN_PROGRESS, db_session)
-            )
-
-            for a in attempts:
-                # if attempts exist in the db but we don't detect them in redis, mark them as failed
-                rci = RedisConnectorIndexing(
-                    a.connector_credential_pair_id, a.search_settings_id
-                )
-                failure_reason = f"Unknown index attempt {a.id}. Might be left over from a process restart."
-                if not r.exists(rci.fence_key):
-                    mark_attempt_failed(a, db_session, failure_reason=failure_reason)
-
        lock_beat.reacquire()
        if r.exists(RedisConnectorCredentialPair.get_fence_key()):
            monitor_connector_taskset(r)

        lock_beat.reacquire()
-        for key_bytes in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"):
+        for key_bytes in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + "*"):
            lock_beat.reacquire()
-            monitor_connector_deletion_taskset(key_bytes, r, tenant_id)
+            monitor_connector_deletion_taskset(tenant_id, key_bytes, r)

        lock_beat.reacquire()
        for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
            lock_beat.reacquire()
            with get_session_with_tenant(tenant_id) as db_session:
-                monitor_document_set_taskset(key_bytes, r, db_session)
+                monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)

        lock_beat.reacquire()
        for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
@@ -782,19 +778,25 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
                noop_fallback,
            )
            with get_session_with_tenant(tenant_id) as db_session:
-                monitor_usergroup_taskset(key_bytes, r, db_session)
+                monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)

        lock_beat.reacquire()
-        for key_bytes in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
+        for key_bytes in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"):
            lock_beat.reacquire()
            with get_session_with_tenant(tenant_id) as db_session:
-                monitor_ccpair_pruning_taskset(key_bytes, r, db_session)
+                monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)

        lock_beat.reacquire()
-        for key_bytes in r.scan_iter(RedisConnectorIndexing.FENCE_PREFIX + "*"):
+        for key_bytes in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"):
            lock_beat.reacquire()
            with get_session_with_tenant(tenant_id) as db_session:
-                monitor_ccpair_indexing_taskset(key_bytes, r, db_session)
+                monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
+
+        lock_beat.reacquire()
+        for key_bytes in r.scan_iter(RedisConnectorPermissionSync.FENCE_PREFIX + "*"):
+            lock_beat.reacquire()
+            with get_session_with_tenant(tenant_id) as db_session:
+                monitor_ccpair_permissions_taskset(tenant_id, key_bytes, r, db_session)

        # uncomment for debugging if needed
        # r_celery = celery_app.broker_connection().channel().client
@@ -866,7 +868,9 @@ def vespa_metadata_sync_task(
        )
    except Exception as ex:
        if isinstance(ex, RetryError):
-            task_logger.warning(f"Retry failed: {ex.last_attempt.attempt_number}")
+            task_logger.warning(
+                f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
+            )

            # only set the inner exception if it is of type Exception
            e_temp = ex.last_attempt.exception()
--- a/backend/danswer/background/celery/versioned_apps/beat.py
+++ b/backend/danswer/background/celery/versioned_apps/beat.py
@@ -1,8 +1,8 @@
 """Factory stub for running celery worker / celery beat."""
-from danswer.utils.variable_functionality import fetch_versioned_implementation
+from celery import Celery
+
+from danswer.background.celery.apps.beat import celery_app
 from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable

 set_is_ee_based_on_env_variable()
-app = fetch_versioned_implementation(
-    "danswer.background.celery.apps.beat", "celery_app"
-)
+app: Celery = celery_app
--- a/backend/danswer/background/celery/versioned_apps/primary.py
+++ b/backend/danswer/background/celery/versioned_apps/primary.py
@@ -1,8 +1,10 @@
 """Factory stub for running celery worker / celery beat."""
+from celery import Celery
+
 from danswer.utils.variable_functionality import fetch_versioned_implementation
 from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable

 set_is_ee_based_on_env_variable()
-app = fetch_versioned_implementation(
+app: Celery = fetch_versioned_implementation(
    "danswer.background.celery.apps.primary", "celery_app"
 )
--- a/backend/danswer/background/indexing/job_client.py
+++ b/backend/danswer/background/indexing/job_client.py
@@ -29,18 +29,26 @@ JobStatusType = (
 def _initializer(
    func: Callable, args: list | tuple, kwargs: dict[str, Any] | None = None
 ) -> Any:
-    """Ensure the parent proc's database connections are not touched
-    in the new connection pool
+    """Initialize the child process with a fresh SQLAlchemy Engine.

-    Based on the recommended approach in the SQLAlchemy docs found:
+    Based on SQLAlchemy's recommendations to handle multiprocessing:
    https://docs.sqlalchemy.org/en/20/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork
    """
    if kwargs is None:
        kwargs = {}

    logger.info("Initializing spawned worker child process.")
+
+    # Reset the engine in the child process
+    SqlEngine.reset_engine()
+
+    # Optionally set a custom app name for database logging purposes
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME)
+
+    # Initialize a new engine with desired parameters
    SqlEngine.init_engine(pool_size=4, max_overflow=12, pool_recycle=60)
+
+    # Proceed with executing the target function
    return func(*args, **kwargs)


--- a/backend/danswer/background/indexing/run_indexing.py
+++ b/backend/danswer/background/indexing/run_indexing.py
@@ -1,7 +1,5 @@
 import time
 import traceback
-from abc import ABC
-from abc import abstractmethod
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
@@ -21,6 +19,7 @@ from danswer.db.connector_credential_pair import get_last_successful_attempt_tim
 from danswer.db.connector_credential_pair import update_connector_credential_pair
 from danswer.db.engine import get_session_with_tenant
 from danswer.db.enums import ConnectorCredentialPairStatus
+from danswer.db.index_attempt import mark_attempt_canceled
 from danswer.db.index_attempt import mark_attempt_failed
 from danswer.db.index_attempt import mark_attempt_partially_succeeded
 from danswer.db.index_attempt import mark_attempt_succeeded
@@ -31,10 +30,10 @@ from danswer.db.models import IndexingStatus
 from danswer.db.models import IndexModelStatus
 from danswer.document_index.factory import get_default_document_index
 from danswer.indexing.embedder import DefaultIndexingEmbedder
-from danswer.indexing.indexing_heartbeat import IndexingHeartbeat
+from danswer.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from danswer.indexing.indexing_pipeline import build_indexing_pipeline
-from danswer.utils.logger import IndexAttemptSingleton
 from danswer.utils.logger import setup_logger
+from danswer.utils.logger import TaskAttemptSingleton
 from danswer.utils.variable_functionality import global_version

 logger = setup_logger()
@@ -42,19 +41,6 @@ logger = setup_logger()
 INDEXING_TRACER_NUM_PRINT_ENTRIES = 5


-class RunIndexingCallbackInterface(ABC):
-    """Defines a callback interface to be passed to
-    to run_indexing_entrypoint."""
-
-    @abstractmethod
-    def should_stop(self) -> bool:
-        """Signal to stop the looping function in flight."""
-
-    @abstractmethod
-    def progress(self, amount: int) -> None:
-        """Send progress updates to the caller."""
-
-
 def _get_connector_runner(
    db_session: Session,
    attempt: IndexAttempt,
@@ -102,11 +88,15 @@ def _get_connector_runner(
    )


+class ConnectorStopSignal(Exception):
+    """A custom exception used to signal a stop in processing."""
+
+
 def _run_indexing(
    db_session: Session,
    index_attempt: IndexAttempt,
    tenant_id: str | None,
-    callback: RunIndexingCallbackInterface | None = None,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> None:
    """
    1. Get documents which are either new or updated from specified application
@@ -118,7 +108,13 @@ def _run_indexing(
    """
    start_time = time.time()

+    if index_attempt.search_settings is None:
+        raise ValueError(
+            "Search settings must be set for indexing. This should not be possible."
+        )
+
    search_settings = index_attempt.search_settings
+
    index_name = search_settings.index_name

    # Only update cc-pair status for primary index jobs
@@ -132,13 +128,7 @@ def _run_indexing(

    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
        search_settings=search_settings,
-        heartbeat=IndexingHeartbeat(
-            index_attempt_id=index_attempt.id,
-            db_session=db_session,
-            # let the world know we're still making progress after
-            # every 10 batches
-            freq=10,
-        ),
+        callback=callback,
    )

    indexing_pipeline = build_indexing_pipeline(
@@ -151,6 +141,7 @@ def _run_indexing(
        ),
        db_session=db_session,
        tenant_id=tenant_id,
+        callback=callback,
    )

    db_cc_pair = index_attempt.connector_credential_pair
@@ -222,7 +213,7 @@ def _run_indexing(
                # contents still need to be initially pulled.
                if callback:
                    if callback.should_stop():
-                        raise RuntimeError("Connector stop signal detected")
+                        raise ConnectorStopSignal("Connector stop signal detected")

                # TODO: should we move this into the above callback instead?
                db_session.refresh(db_cc_pair)
@@ -283,7 +274,7 @@ def _run_indexing(
                db_session.commit()

                if callback:
-                    callback.progress(len(doc_batch))
+                    callback.progress("_run_indexing", len(doc_batch))

                # This new value is updated every batch, so UI can refresh per batch update
                update_docs_indexed(
@@ -316,26 +307,16 @@ def _run_indexing(
                )
        except Exception as e:
            logger.exception(
-                f"Connector run ran into exception after elapsed time: {time.time() - start_time} seconds"
+                f"Connector run exceptioned after elapsed time: {time.time() - start_time} seconds"
            )
-            # Only mark the attempt as a complete failure if this is the first indexing window.
-            # Otherwise, some progress was made - the next run will not start from the beginning.
-            # In this case, it is not accurate to mark it as a failure. When the next run begins,
-            # if that fails immediately, it will be marked as a failure.
-            #
-            # NOTE: if the connector is manually disabled, we should mark it as a failure regardless
-            # to give better clarity in the UI, as the next run will never happen.
-            if (
-                ind == 0
-                or not db_cc_pair.status.is_active()
-                or index_attempt.status != IndexingStatus.IN_PROGRESS
-            ):
-                mark_attempt_failed(
-                    index_attempt,
+
+            if isinstance(e, ConnectorStopSignal):
+                mark_attempt_canceled(
+                    index_attempt.id,
                    db_session,
-                    failure_reason=str(e),
-                    full_exception_trace=traceback.format_exc(),
+                    reason=str(e),
                )
+
                if is_primary:
                    update_connector_credential_pair(
                        db_session=db_session,
@@ -347,6 +328,37 @@ def _run_indexing(
                if INDEXING_TRACER_INTERVAL > 0:
                    tracer.stop()
                raise e
+            else:
+                # Only mark the attempt as a complete failure if this is the first indexing window.
+                # Otherwise, some progress was made - the next run will not start from the beginning.
+                # In this case, it is not accurate to mark it as a failure. When the next run begins,
+                # if that fails immediately, it will be marked as a failure.
+                #
+                # NOTE: if the connector is manually disabled, we should mark it as a failure regardless
+                # to give better clarity in the UI, as the next run will never happen.
+                if (
+                    ind == 0
+                    or not db_cc_pair.status.is_active()
+                    or index_attempt.status != IndexingStatus.IN_PROGRESS
+                ):
+                    mark_attempt_failed(
+                        index_attempt.id,
+                        db_session,
+                        failure_reason=str(e),
+                        full_exception_trace=traceback.format_exc(),
+                    )
+
+                    if is_primary:
+                        update_connector_credential_pair(
+                            db_session=db_session,
+                            connector_id=db_connector.id,
+                            credential_id=db_credential.id,
+                            net_docs=net_doc_change,
+                        )
+
+                    if INDEXING_TRACER_INTERVAL > 0:
+                        tracer.stop()
+                    raise e

            # break => similar to success case. As mentioned above, if the next run fails for the same
            # reason it will then be marked as a failure
@@ -366,7 +378,7 @@ def _run_indexing(
        and index_attempt_md.num_exceptions >= batch_num
    ):
        mark_attempt_failed(
-            index_attempt,
+            index_attempt.id,
            db_session,
            failure_reason="All batches exceptioned.",
        )
@@ -413,7 +425,7 @@ def run_indexing_entrypoint(
    tenant_id: str | None,
    connector_credential_pair_id: int,
    is_ee: bool = False,
-    callback: RunIndexingCallbackInterface | None = None,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> None:
    try:
        if is_ee:
@@ -421,17 +433,19 @@ def run_indexing_entrypoint(

        # set the indexing attempt ID so that all log messages from this process
        # will have it added as a prefix
-        IndexAttemptSingleton.set_cc_and_index_id(
+        TaskAttemptSingleton.set_cc_and_index_id(
            index_attempt_id, connector_credential_pair_id
        )
        with get_session_with_tenant(tenant_id) as db_session:
            attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)

+            tenant_str = ""
+            if tenant_id is not None:
+                tenant_str = f" for tenant {tenant_id}"
+
            logger.info(
-                f"Indexing starting for tenant {tenant_id}: "
-                if tenant_id is not None
-                else ""
-                + f"connector='{attempt.connector_credential_pair.connector.name}' "
+                f"Indexing starting{tenant_str}: "
+                f"connector='{attempt.connector_credential_pair.connector.name}' "
                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
                f"credentials='{attempt.connector_credential_pair.connector_id}'"
            )
@@ -439,10 +453,8 @@ def run_indexing_entrypoint(
            _run_indexing(db_session, attempt, tenant_id, callback)

            logger.info(
-                f"Indexing finished for tenant {tenant_id}: "
-                if tenant_id is not None
-                else ""
-                + f"connector='{attempt.connector_credential_pair.connector.name}' "
+                f"Indexing finished{tenant_str}: "
+                f"connector='{attempt.connector_credential_pair.connector.name}' "
                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
                f"credentials='{attempt.connector_credential_pair.connector_id}'"
            )
--- a/backend/danswer/background/task_utils.py
+++ b/backend/danswer/background/task_utils.py
@@ -14,15 +14,6 @@ from danswer.db.tasks import mark_task_start
 from danswer.db.tasks import register_task


-def name_cc_prune_task(
-    connector_id: int | None = None, credential_id: int | None = None
-) -> str:
-    task_name = f"prune_connector_credential_pair_{connector_id}_{credential_id}"
-    if not connector_id or not credential_id:
-        task_name = "prune_connector_credential_pair"
-    return task_name
-
-
 T = TypeVar("T", bound=Callable)


--- a/backend/danswer/chat/chat_utils.py
+++ b/backend/danswer/chat/chat_utils.py
@@ -7,10 +7,10 @@ from sqlalchemy.orm import Session

 from danswer.chat.models import CitationInfo
 from danswer.chat.models import LlmDoc
+from danswer.context.search.models import InferenceSection
 from danswer.db.chat import get_chat_messages_by_session
 from danswer.db.models import ChatMessage
 from danswer.llm.answering.models import PreviousMessage
-from danswer.search.models import InferenceSection
 from danswer.utils.logger import setup_logger

 logger = setup_logger()
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@@ -6,11 +6,11 @@ from typing import Any
 from pydantic import BaseModel

 from danswer.configs.constants import DocumentSource
-from danswer.search.enums import QueryFlow
-from danswer.search.enums import SearchType
-from danswer.search.models import RetrievalDocs
-from danswer.search.models import SearchResponse
-from danswer.tools.custom.base_tool_types import ToolResultType
+from danswer.context.search.enums import QueryFlow
+from danswer.context.search.enums import SearchType
+from danswer.context.search.models import RetrievalDocs
+from danswer.context.search.models import SearchResponse
+from danswer.tools.tool_implementations.custom.base_tool_types import ToolResultType


 class LlmDoc(BaseModel):
@@ -156,7 +156,7 @@ class QAResponse(SearchResponse, DanswerAnswer):
    error_msg: str | None = None


-class ImageGenerationDisplay(BaseModel):
+class FileChatDisplay(BaseModel):
    file_ids: list[str]


@@ -170,7 +170,7 @@ AnswerQuestionPossibleReturn = (
    | DanswerQuotes
    | CitationInfo
    | DanswerContexts
-    | ImageGenerationDisplay
+    | FileChatDisplay
    | CustomToolResponse
    | StreamingError
    | StreamStopInfo
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -11,23 +11,28 @@ from danswer.chat.models import AllCitations
 from danswer.chat.models import CitationInfo
 from danswer.chat.models import CustomToolResponse
 from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.models import FileChatDisplay
 from danswer.chat.models import FinalUsedContextDocsResponse
-from danswer.chat.models import ImageGenerationDisplay
 from danswer.chat.models import LLMRelevanceFilterResponse
 from danswer.chat.models import MessageResponseIDInfo
 from danswer.chat.models import MessageSpecificCitations
 from danswer.chat.models import QADocsResponse
 from danswer.chat.models import StreamingError
-from danswer.configs.app_configs import AZURE_DALLE_API_BASE
-from danswer.configs.app_configs import AZURE_DALLE_API_KEY
-from danswer.configs.app_configs import AZURE_DALLE_API_VERSION
-from danswer.configs.app_configs import AZURE_DALLE_DEPLOYMENT_NAME
-from danswer.configs.chat_configs import BING_API_KEY
+from danswer.chat.models import StreamStopInfo
 from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
 from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from danswer.configs.constants import MessageType
-from danswer.configs.model_configs import GEN_AI_TEMPERATURE
+from danswer.context.search.enums import OptionalSearchSetting
+from danswer.context.search.enums import QueryFlow
+from danswer.context.search.enums import SearchType
+from danswer.context.search.models import InferenceSection
+from danswer.context.search.models import RetrievalDetails
+from danswer.context.search.retrieval.search_runner import inference_sections_from_ids
+from danswer.context.search.utils import chunks_or_sections_to_search_docs
+from danswer.context.search.utils import dedupe_documents
+from danswer.context.search.utils import drop_llm_indices
+from danswer.context.search.utils import relevant_sections_to_indices
 from danswer.db.chat import attach_files_to_chat_message
 from danswer.db.chat import create_db_search_doc
 from danswer.db.chat import create_new_chat_message
@@ -40,7 +45,6 @@ from danswer.db.chat import reserve_message_id
 from danswer.db.chat import translate_db_message_to_chat_message_detail
 from danswer.db.chat import translate_db_search_doc_to_server_search_doc
 from danswer.db.engine import get_session_context_manager
-from danswer.db.llm import fetch_existing_llm_providers
 from danswer.db.models import SearchDoc as DbSearchDoc
 from danswer.db.models import ToolCall
 from danswer.db.models import User
@@ -60,53 +64,55 @@ from danswer.llm.answering.models import PromptConfig
 from danswer.llm.exceptions import GenAIDisabledException
 from danswer.llm.factory import get_llms_for_persona
 from danswer.llm.factory import get_main_llm_from_tuple
-from danswer.llm.interfaces import LLMConfig
 from danswer.llm.utils import litellm_exception_to_error_msg
 from danswer.natural_language_processing.utils import get_tokenizer
-from danswer.search.enums import LLMEvaluationType
-from danswer.search.enums import OptionalSearchSetting
-from danswer.search.enums import QueryFlow
-from danswer.search.enums import SearchType
-from danswer.search.models import InferenceSection
-from danswer.search.retrieval.search_runner import inference_sections_from_ids
-from danswer.search.utils import chunks_or_sections_to_search_docs
-from danswer.search.utils import dedupe_documents
-from danswer.search.utils import drop_llm_indices
-from danswer.search.utils import relevant_sections_to_indices
 from danswer.server.query_and_chat.models import ChatMessageDetail
 from danswer.server.query_and_chat.models import CreateChatMessageRequest
 from danswer.server.utils import get_json_line
-from danswer.tools.built_in_tools import get_built_in_tool_by_id
-from danswer.tools.custom.custom_tool import (
-    build_custom_tools_from_openapi_schema_and_headers,
-)
-from danswer.tools.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID
-from danswer.tools.custom.custom_tool import CustomToolCallSummary
 from danswer.tools.force import ForceUseTool
-from danswer.tools.images.image_generation_tool import IMAGE_GENERATION_RESPONSE_ID
-from danswer.tools.images.image_generation_tool import ImageGenerationResponse
-from danswer.tools.images.image_generation_tool import ImageGenerationTool
-from danswer.tools.internet_search.internet_search_tool import (
+from danswer.tools.models import ToolResponse
+from danswer.tools.tool import Tool
+from danswer.tools.tool_constructor import construct_tools
+from danswer.tools.tool_constructor import CustomToolConfig
+from danswer.tools.tool_constructor import ImageGenerationToolConfig
+from danswer.tools.tool_constructor import InternetSearchToolConfig
+from danswer.tools.tool_constructor import SearchToolConfig
+from danswer.tools.tool_implementations.custom.custom_tool import (
+    CUSTOM_TOOL_RESPONSE_ID,
+)
+from danswer.tools.tool_implementations.custom.custom_tool import CustomToolCallSummary
+from danswer.tools.tool_implementations.images.image_generation_tool import (
+    IMAGE_GENERATION_RESPONSE_ID,
+)
+from danswer.tools.tool_implementations.images.image_generation_tool import (
+    ImageGenerationResponse,
+)
+from danswer.tools.tool_implementations.internet_search.internet_search_tool import (
    INTERNET_SEARCH_RESPONSE_ID,
 )
-from danswer.tools.internet_search.internet_search_tool import (
+from danswer.tools.tool_implementations.internet_search.internet_search_tool import (
    internet_search_response_to_search_docs,
 )
-from danswer.tools.internet_search.internet_search_tool import InternetSearchResponse
-from danswer.tools.internet_search.internet_search_tool import InternetSearchTool
-from danswer.tools.models import DynamicSchemaInfo
-from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID
-from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID
-from danswer.tools.search.search_tool import SearchResponseSummary
-from danswer.tools.search.search_tool import SearchTool
-from danswer.tools.search.search_tool import SECTION_RELEVANCE_LIST_ID
-from danswer.tools.tool import Tool
-from danswer.tools.tool import ToolResponse
+from danswer.tools.tool_implementations.internet_search.internet_search_tool import (
+    InternetSearchResponse,
+)
+from danswer.tools.tool_implementations.internet_search.internet_search_tool import (
+    InternetSearchTool,
+)
+from danswer.tools.tool_implementations.search.search_tool import (
+    FINAL_CONTEXT_DOCUMENTS_ID,
+)
+from danswer.tools.tool_implementations.search.search_tool import (
+    SEARCH_RESPONSE_SUMMARY_ID,
+)
+from danswer.tools.tool_implementations.search.search_tool import SearchResponseSummary
+from danswer.tools.tool_implementations.search.search_tool import SearchTool
+from danswer.tools.tool_implementations.search.search_tool import (
+    SECTION_RELEVANCE_LIST_ID,
+)
 from danswer.tools.tool_runner import ToolCallFinalResult
-from danswer.tools.utils import compute_all_tool_tokens
-from danswer.tools.utils import explicit_tool_calling_supported
-from danswer.utils.headers import header_dict_to_header_list
 from danswer.utils.logger import setup_logger
+from danswer.utils.long_term_log import LongTermLogger
 from danswer.utils.timing import log_generator_function_time

 logger = setup_logger()
@@ -256,10 +262,11 @@ ChatPacket = (
    | DanswerAnswerPiece
    | AllCitations
    | CitationInfo
-    | ImageGenerationDisplay
+    | FileChatDisplay
    | CustomToolResponse
    | MessageSpecificCitations
    | MessageResponseIDInfo
+    | StreamStopInfo
 )
 ChatPacketStream = Iterator[ChatPacket]

@@ -275,7 +282,6 @@ def stream_chat_message_objects(
    max_document_percentage: float = CHAT_TARGET_CHUNK_PERCENTAGE,
    # if specified, uses the last user message and does not create a new user message based
    # on the `new_msg_req.message`. Currently, requires a state where the last message is a
-    use_existing_user_message: bool = False,
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    is_connected: Callable[[], bool] | None = None,
@@ -287,6 +293,9 @@ def stream_chat_message_objects(
    3. [always] A set of streamed LLM tokens or an error anywhere along the line if something fails
    4. [always] Details on the final AI response message that is created
    """
+    use_existing_user_message = new_msg_req.use_existing_user_message
+    existing_assistant_message_id = new_msg_req.existing_assistant_message_id
+
    # Currently surrounding context is not supported for chat
    # Chat is already token heavy and harder for the model to process plus it would roll history over much faster
    new_msg_req.chunks_above = 0
@@ -308,6 +317,11 @@ def stream_chat_message_objects(
        retrieval_options = new_msg_req.retrieval_options
        alternate_assistant_id = new_msg_req.alternate_assistant_id

+        # permanent "log" store, used primarily for debugging
+        long_term_logger = LongTermLogger(
+            metadata={"user_id": str(user_id), "chat_session_id": str(chat_session_id)}
+        )
+
        # use alternate persona if alternative assistant id is passed in
        if alternate_assistant_id is not None:
            persona = get_persona_by_id(
@@ -333,6 +347,7 @@ def stream_chat_message_objects(
                persona=persona,
                llm_override=new_msg_req.llm_override or chat_session.llm_override,
                additional_headers=litellm_additional_headers,
+                long_term_logger=long_term_logger,
            )
        except GenAIDisabledException:
            raise RuntimeError("LLM is disabled. Can't use chat flow without LLM.")
@@ -408,12 +423,20 @@ def stream_chat_message_objects(
            final_msg, history_msgs = create_chat_chain(
                chat_session_id=chat_session_id, db_session=db_session
            )
-            if final_msg.message_type != MessageType.USER:
-                raise RuntimeError(
-                    "The last message was not a user message. Cannot call "
-                    "`stream_chat_message_objects` with `is_regenerate=True` "
-                    "when the last message is not a user message."
-                )
+            if existing_assistant_message_id is None:
+                if final_msg.message_type != MessageType.USER:
+                    raise RuntimeError(
+                        "The last message was not a user message. Cannot call "
+                        "`stream_chat_message_objects` with `is_regenerate=True` "
+                        "when the last message is not a user message."
+                    )
+            else:
+                if final_msg.id != existing_assistant_message_id:
+                    raise RuntimeError(
+                        "The last message was not the existing assistant message. "
+                        f"Final message id: {final_msg.id}, "
+                        f"existing assistant message id: {existing_assistant_message_id}"
+                    )

        # Disable Query Rephrasing for the first message
        # This leads to a better first response since the LLM rephrasing the question
@@ -484,13 +507,19 @@ def stream_chat_message_objects(
                ),
                max_window_percentage=max_document_percentage,
            )
-        reserved_message_id = reserve_message_id(
-            db_session=db_session,
-            chat_session_id=chat_session_id,
-            parent_message=user_message.id
-            if user_message is not None
-            else parent_message.id,
-            message_type=MessageType.ASSISTANT,
+
+        # we don't need to reserve a message id if we're using an existing assistant message
+        reserved_message_id = (
+            final_msg.id
+            if existing_assistant_message_id is not None
+            else reserve_message_id(
+                db_session=db_session,
+                chat_session_id=chat_session_id,
+                parent_message=user_message.id
+                if user_message is not None
+                else parent_message.id,
+                message_type=MessageType.ASSISTANT,
+            )
        )
        yield MessageResponseIDInfo(
            user_message_id=user_message.id if user_message else None,
@@ -505,7 +534,13 @@ def stream_chat_message_objects(
        partial_response = partial(
            create_new_chat_message,
            chat_session_id=chat_session_id,
-            parent_message=final_msg,
+            # if we're using an existing assistant message, then this will just be an
+            # update operation, in which case the parent should be the parent of
+            # the latest. If we're creating a new assistant message, then the parent
+            # should be the latest message (latest user message)
+            parent_message=(
+                final_msg if existing_assistant_message_id is None else parent_message
+            ),
            prompt_id=prompt_id,
            overridden_model=overridden_model,
            # message=,
@@ -517,6 +552,7 @@ def stream_chat_message_objects(
            # reference_docs=,
            db_session=db_session,
            commit=False,
+            reserved_message_id=reserved_message_id,
        )

        if not final_msg.prompt:
@@ -532,148 +568,54 @@ def stream_chat_message_objects(
            if not persona
            else PromptConfig.from_model(persona.prompts[0])
        )
+        answer_style_config = AnswerStyleConfig(
+            citation_config=CitationConfig(
+                all_docs_useful=selected_db_search_docs is not None
+            ),
+            document_pruning_config=document_pruning_config,
+            structured_response_format=new_msg_req.structured_response_format,
+        )

-        # find out what tools to use
-        search_tool: SearchTool | None = None
-        tool_dict: dict[int, list[Tool]] = {}  # tool_id to tool
-        for db_tool_model in persona.tools:
-            # handle in-code tools specially
-            if db_tool_model.in_code_tool_id:
-                tool_cls = get_built_in_tool_by_id(db_tool_model.id, db_session)
-                if tool_cls.__name__ == SearchTool.__name__ and not latest_query_files:
-                    search_tool = SearchTool(
-                        db_session=db_session,
-                        user=user,
-                        persona=persona,
-                        retrieval_options=retrieval_options,
-                        prompt_config=prompt_config,
-                        llm=llm,
-                        fast_llm=fast_llm,
-                        pruning_config=document_pruning_config,
-                        selected_sections=selected_sections,
-                        chunks_above=new_msg_req.chunks_above,
-                        chunks_below=new_msg_req.chunks_below,
-                        full_doc=new_msg_req.full_doc,
-                        evaluation_type=LLMEvaluationType.BASIC
-                        if persona.llm_relevance_filter
-                        else LLMEvaluationType.SKIP,
-                    )
-                    tool_dict[db_tool_model.id] = [search_tool]
-                elif tool_cls.__name__ == ImageGenerationTool.__name__:
-                    img_generation_llm_config: LLMConfig | None = None
-                    if (
-                        llm
-                        and llm.config.api_key
-                        and llm.config.model_provider == "openai"
-                    ):
-                        img_generation_llm_config = LLMConfig(
-                            model_provider=llm.config.model_provider,
-                            model_name="dall-e-3",
-                            temperature=GEN_AI_TEMPERATURE,
-                            api_key=llm.config.api_key,
-                            api_base=llm.config.api_base,
-                            api_version=llm.config.api_version,
-                        )
-                    elif (
-                        llm.config.model_provider == "azure"
-                        and AZURE_DALLE_API_KEY is not None
-                    ):
-                        img_generation_llm_config = LLMConfig(
-                            model_provider="azure",
-                            model_name=f"azure/{AZURE_DALLE_DEPLOYMENT_NAME}",
-                            temperature=GEN_AI_TEMPERATURE,
-                            api_key=AZURE_DALLE_API_KEY,
-                            api_base=AZURE_DALLE_API_BASE,
-                            api_version=AZURE_DALLE_API_VERSION,
-                        )
-                    else:
-                        llm_providers = fetch_existing_llm_providers(db_session)
-                        openai_provider = next(
-                            iter(
-                                [
-                                    llm_provider
-                                    for llm_provider in llm_providers
-                                    if llm_provider.provider == "openai"
-                                ]
-                            ),
-                            None,
-                        )
-                        if not openai_provider or not openai_provider.api_key:
-                            raise ValueError(
-                                "Image generation tool requires an OpenAI API key"
-                            )
-                        img_generation_llm_config = LLMConfig(
-                            model_provider=openai_provider.provider,
-                            model_name="dall-e-3",
-                            temperature=GEN_AI_TEMPERATURE,
-                            api_key=openai_provider.api_key,
-                            api_base=openai_provider.api_base,
-                            api_version=openai_provider.api_version,
-                        )
-                    tool_dict[db_tool_model.id] = [
-                        ImageGenerationTool(
-                            api_key=cast(str, img_generation_llm_config.api_key),
-                            api_base=img_generation_llm_config.api_base,
-                            api_version=img_generation_llm_config.api_version,
-                            additional_headers=litellm_additional_headers,
-                            model=img_generation_llm_config.model_name,
-                        )
-                    ]
-                elif tool_cls.__name__ == InternetSearchTool.__name__:
-                    bing_api_key = BING_API_KEY
-                    if not bing_api_key:
-                        raise ValueError(
-                            "Internet search tool requires a Bing API key, please contact your Danswer admin to get it added!"
-                        )
-                    tool_dict[db_tool_model.id] = [
-                        InternetSearchTool(api_key=bing_api_key)
-                    ]
-
-                continue
-
-            # handle all custom tools
-            if db_tool_model.openapi_schema:
-                tool_dict[db_tool_model.id] = cast(
-                    list[Tool],
-                    build_custom_tools_from_openapi_schema_and_headers(
-                        db_tool_model.openapi_schema,
-                        dynamic_schema_info=DynamicSchemaInfo(
-                            chat_session_id=chat_session_id,
-                            message_id=user_message.id if user_message else None,
-                        ),
-                        custom_headers=(db_tool_model.custom_headers or [])
-                        + (
-                            header_dict_to_header_list(
-                                custom_tool_additional_headers or {}
-                            )
-                        ),
-                    ),
-                )
+        tool_dict = construct_tools(
+            persona=persona,
+            prompt_config=prompt_config,
+            db_session=db_session,
+            user=user,
+            llm=llm,
+            fast_llm=fast_llm,
+            search_tool_config=SearchToolConfig(
+                answer_style_config=answer_style_config,
+                document_pruning_config=document_pruning_config,
+                retrieval_options=retrieval_options or RetrievalDetails(),
+                selected_sections=selected_sections,
+                chunks_above=new_msg_req.chunks_above,
+                chunks_below=new_msg_req.chunks_below,
+                full_doc=new_msg_req.full_doc,
+                latest_query_files=latest_query_files,
+            ),
+            internet_search_tool_config=InternetSearchToolConfig(
+                answer_style_config=answer_style_config,
+            ),
+            image_generation_tool_config=ImageGenerationToolConfig(
+                additional_headers=litellm_additional_headers,
+            ),
+            custom_tool_config=CustomToolConfig(
+                chat_session_id=chat_session_id,
+                message_id=user_message.id if user_message else None,
+                additional_headers=custom_tool_additional_headers,
+            ),
+        )

        tools: list[Tool] = []
        for tool_list in tool_dict.values():
            tools.extend(tool_list)

-        # factor in tool definition size when pruning
-        document_pruning_config.tool_num_tokens = compute_all_tool_tokens(
-            tools, llm_tokenizer
-        )
-        document_pruning_config.using_tool_message = explicit_tool_calling_supported(
-            llm_provider, llm_model_name
-        )
-
        # LLM prompt building, response capturing, etc.
        answer = Answer(
            is_connected=is_connected,
            question=final_msg.message,
            latest_query_files=latest_query_files,
-            answer_style_config=AnswerStyleConfig(
-                citation_config=CitationConfig(
-                    all_docs_useful=selected_db_search_docs is not None
-                ),
-                document_pruning_config=document_pruning_config,
-                structured_response_format=new_msg_req.structured_response_format,
-            ),
+            answer_style_config=answer_style_config,
            prompt_config=prompt_config,
            llm=(
                llm
@@ -741,7 +683,6 @@ def stream_chat_message_objects(
                        yield LLMRelevanceFilterResponse(
                            llm_selected_doc_indices=llm_indices
                        )
-
                elif packet.id == FINAL_CONTEXT_DOCUMENTS_ID:
                    yield FinalUsedContextDocsResponse(
                        final_context_docs=packet.response
@@ -759,7 +700,7 @@ def stream_chat_message_objects(
                        FileDescriptor(id=str(file_id), type=ChatFileType.IMAGE)
                        for file_id in file_ids
                    ]
-                    yield ImageGenerationDisplay(
+                    yield FileChatDisplay(
                        file_ids=[str(file_id) for file_id in file_ids]
                    )
                elif packet.id == INTERNET_SEARCH_RESPONSE_ID:
@@ -773,11 +714,32 @@ def stream_chat_message_objects(
                    yield qa_docs_response
                elif packet.id == CUSTOM_TOOL_RESPONSE_ID:
                    custom_tool_response = cast(CustomToolCallSummary, packet.response)
-                    yield CustomToolResponse(
-                        response=custom_tool_response.tool_result,
-                        tool_name=custom_tool_response.tool_name,
-                    )

+                    if (
+                        custom_tool_response.response_type == "image"
+                        or custom_tool_response.response_type == "csv"
+                    ):
+                        file_ids = custom_tool_response.tool_result.file_ids
+                        ai_message_files = [
+                            FileDescriptor(
+                                id=str(file_id),
+                                type=ChatFileType.IMAGE
+                                if custom_tool_response.response_type == "image"
+                                else ChatFileType.CSV,
+                            )
+                            for file_id in file_ids
+                        ]
+                        yield FileChatDisplay(
+                            file_ids=[str(file_id) for file_id in file_ids]
+                        )
+                    else:
+                        yield CustomToolResponse(
+                            response=custom_tool_response.tool_result,
+                            tool_name=custom_tool_response.tool_name,
+                        )
+
+            elif isinstance(packet, StreamStopInfo):
+                pass
            else:
                if isinstance(packet, ToolCallFinalResult):
                    tool_result = packet
@@ -807,6 +769,7 @@ def stream_chat_message_objects(

    # Post-LLM answer processing
    try:
+        logger.debug("Post-LLM answer processing")
        message_specific_citations: MessageSpecificCitations | None = None
        if reference_db_search_docs:
            message_specific_citations = _translate_citations(
@@ -822,7 +785,6 @@ def stream_chat_message_objects(
                tool_name_to_tool_id[tool.name] = tool_id

        gen_ai_response_message = partial_response(
-            reserved_message_id=reserved_message_id,
            message=answer.llm_answer,
            rephrased_query=(
                qa_docs_response.rephrased_query if qa_docs_response else None
@@ -830,21 +792,21 @@ def stream_chat_message_objects(
            reference_docs=reference_db_search_docs,
            files=ai_message_files,
            token_count=len(llm_tokenizer_encode_func(answer.llm_answer)),
-            citations=message_specific_citations.citation_map
-            if message_specific_citations
-            else None,
+            citations=(
+                message_specific_citations.citation_map
+                if message_specific_citations
+                else None
+            ),
            error=None,
-            tool_calls=(
-                [
-                    ToolCall(
-                        tool_id=tool_name_to_tool_id[tool_result.tool_name],
-                        tool_name=tool_result.tool_name,
-                        tool_arguments=tool_result.tool_args,
-                        tool_result=tool_result.tool_result,
-                    )
-                ]
+            tool_call=(
+                ToolCall(
+                    tool_id=tool_name_to_tool_id[tool_result.tool_name],
+                    tool_name=tool_result.tool_name,
+                    tool_arguments=tool_result.tool_args,
+                    tool_result=tool_result.tool_result,
+                )
                if tool_result
-                else []
+                else None
            ),
        )

@@ -868,7 +830,6 @@ def stream_chat_message_objects(
 def stream_chat_message(
    new_msg_req: CreateChatMessageRequest,
    user: User | None,
-    use_existing_user_message: bool = False,
    litellm_additional_headers: dict[str, str] | None = None,
    custom_tool_additional_headers: dict[str, str] | None = None,
    is_connected: Callable[[], bool] | None = None,
@@ -878,7 +839,6 @@ def stream_chat_message(
            new_msg_req=new_msg_req,
            user=user,
            db_session=db_session,
-            use_existing_user_message=use_existing_user_message,
            litellm_additional_headers=litellm_additional_headers,
            custom_tool_additional_headers=custom_tool_additional_headers,
            is_connected=is_connected,
--- a/backend/danswer/chat/tools.py
+++ b/backend/danswer/chat/tools.py
@@ -1,115 +0,0 @@
-from typing_extensions import TypedDict  # noreorder
-
-from pydantic import BaseModel
-
-from danswer.prompts.chat_tools import DANSWER_TOOL_DESCRIPTION
-from danswer.prompts.chat_tools import DANSWER_TOOL_NAME
-from danswer.prompts.chat_tools import TOOL_FOLLOWUP
-from danswer.prompts.chat_tools import TOOL_LESS_FOLLOWUP
-from danswer.prompts.chat_tools import TOOL_LESS_PROMPT
-from danswer.prompts.chat_tools import TOOL_TEMPLATE
-from danswer.prompts.chat_tools import USER_INPUT
-
-
-class ToolInfo(TypedDict):
-    name: str
-    description: str
-
-
-class DanswerChatModelOut(BaseModel):
-    model_raw: str
-    action: str
-    action_input: str
-
-
-def call_tool(
-    model_actions: DanswerChatModelOut,
-) -> str:
-    raise NotImplementedError("There are no additional tool integrations right now")
-
-
-def form_user_prompt_text(
-    query: str,
-    tool_text: str | None,
-    hint_text: str | None,
-    user_input_prompt: str = USER_INPUT,
-    tool_less_prompt: str = TOOL_LESS_PROMPT,
-) -> str:
-    user_prompt = tool_text or tool_less_prompt
-
-    user_prompt += user_input_prompt.format(user_input=query)
-
-    if hint_text:
-        if user_prompt[-1] != "\n":
-            user_prompt += "\n"
-        user_prompt += "\nHint: " + hint_text
-
-    return user_prompt.strip()
-
-
-def form_tool_section_text(
-    tools: list[ToolInfo] | None, retrieval_enabled: bool, template: str = TOOL_TEMPLATE
-) -> str | None:
-    if not tools and not retrieval_enabled:
-        return None
-
-    if retrieval_enabled and tools:
-        tools.append(
-            {"name": DANSWER_TOOL_NAME, "description": DANSWER_TOOL_DESCRIPTION}
-        )
-
-    tools_intro = []
-    if tools:
-        num_tools = len(tools)
-        for tool in tools:
-            description_formatted = tool["description"].replace("\n", " ")
-            tools_intro.append(f"> {tool['name']}: {description_formatted}")
-
-        prefix = "Must be one of " if num_tools > 1 else "Must be "
-
-        tools_intro_text = "\n".join(tools_intro)
-        tool_names_text = prefix + ", ".join([tool["name"] for tool in tools])
-
-    else:
-        return None
-
-    return template.format(
-        tool_overviews=tools_intro_text, tool_names=tool_names_text
-    ).strip()
-
-
-def form_tool_followup_text(
-    tool_output: str,
-    query: str,
-    hint_text: str | None,
-    tool_followup_prompt: str = TOOL_FOLLOWUP,
-    ignore_hint: bool = False,
-) -> str:
-    # If multi-line query, it likely confuses the model more than helps
-    if "\n" not in query:
-        optional_reminder = f"\nAs a reminder, my query was: {query}\n"
-    else:
-        optional_reminder = ""
-
-    if not ignore_hint and hint_text:
-        hint_text_spaced = f"\nHint: {hint_text}\n"
-    else:
-        hint_text_spaced = ""
-
-    return tool_followup_prompt.format(
-        tool_output=tool_output,
-        optional_reminder=optional_reminder,
-        hint=hint_text_spaced,
-    ).strip()
-
-
-def form_tool_less_followup_text(
-    tool_output: str,
-    query: str,
-    hint_text: str | None,
-    tool_followup_prompt: str = TOOL_LESS_FOLLOWUP,
-) -> str:
-    hint = f"Hint: {hint_text}" if hint_text else ""
-    return tool_followup_prompt.format(
-        context_str=tool_output, user_query=query, hint_text=hint
-    ).strip()
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -163,6 +163,17 @@ try:
 except ValueError:
    POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT

+# Experimental setting to control idle transactions
+POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT = 0  # milliseconds
+try:
+    POSTGRES_IDLE_SESSIONS_TIMEOUT = int(
+        os.environ.get(
+            "POSTGRES_IDLE_SESSIONS_TIMEOUT", POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT
+        )
+    )
+except ValueError:
+    POSTGRES_IDLE_SESSIONS_TIMEOUT = POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT
+
 REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true"
 REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
 REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
@@ -223,7 +234,7 @@ except ValueError:
        CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT
    )

-CELERY_WORKER_INDEXING_CONCURRENCY_DEFAULT = 1
+CELERY_WORKER_INDEXING_CONCURRENCY_DEFAULT = 3
 try:
    env_value = os.environ.get("CELERY_WORKER_INDEXING_CONCURRENCY")
    if not env_value:
@@ -251,9 +262,6 @@ ENABLED_CONNECTOR_TYPES = os.environ.get("ENABLED_CONNECTOR_TYPES") or ""
 # for some connectors
 ENABLE_EXPENSIVE_EXPERT_CALLS = False

-GOOGLE_DRIVE_INCLUDE_SHARED = False
-GOOGLE_DRIVE_FOLLOW_SHORTCUTS = False
-GOOGLE_DRIVE_ONLY_ORG_PUBLIC = False

 # TODO these should be available for frontend configuration, via advanced options expandable
 WEB_CONNECTOR_IGNORED_CLASSES = os.environ.get(
@@ -414,6 +422,9 @@ LOG_ALL_MODEL_INTERACTIONS = (
 LOG_DANSWER_MODEL_INTERACTIONS = (
    os.environ.get("LOG_DANSWER_MODEL_INTERACTIONS", "").lower() == "true"
 )
+LOG_INDIVIDUAL_MODEL_TOKENS = (
+    os.environ.get("LOG_INDIVIDUAL_MODEL_TOKENS", "").lower() == "true"
+)
 # If set to `true` will enable additional logs about Vespa query performance
 # (time spent on finding the right docs + time spent fetching summaries from disk)
 LOG_VESPA_TIMING_INFORMATION = (
@@ -481,3 +492,21 @@ CONTROL_PLANE_API_BASE_URL = os.environ.get(

 # JWT configuration
 JWT_ALGORITHM = "HS256"
+
+# Super Users
+SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", '["pablo@danswer.ai"]'))
+SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
+
+
+#####
+# API Key Configs
+#####
+# refers to the rounds described here: https://passlib.readthedocs.io/en/stable/lib/passlib.hash.sha256_crypt.html
+_API_KEY_HASH_ROUNDS_RAW = os.environ.get("API_KEY_HASH_ROUNDS")
+API_KEY_HASH_ROUNDS = (
+    int(_API_KEY_HASH_ROUNDS_RAW) if _API_KEY_HASH_ROUNDS_RAW else None
+)
+
+
+POD_NAME = os.environ.get("POD_NAME")
+POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
--- a/backend/danswer/configs/chat_configs.py
+++ b/backend/danswer/configs/chat_configs.py
@@ -1,9 +1,9 @@
 import os


-PROMPTS_YAML = "./danswer/chat/prompts.yaml"
-PERSONAS_YAML = "./danswer/chat/personas.yaml"
-INPUT_PROMPT_YAML = "./danswer/chat/input_prompts.yaml"
+PROMPTS_YAML = "./danswer/seeding/prompts.yaml"
+PERSONAS_YAML = "./danswer/seeding/personas.yaml"
+INPUT_PROMPT_YAML = "./danswer/seeding/input_prompts.yaml"

 NUM_RETURNED_HITS = 50
 # Used for LLM filtering and reranking
@@ -17,9 +17,6 @@ MAX_CHUNKS_FED_TO_CHAT = float(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 10.0)
 # ~3k input, half for docs, half for chat history + prompts
 CHAT_TARGET_CHUNK_PERCENTAGE = 512 * 3 / 3072

-# For selecting a different LLM question-answering prompt format
-# Valid values: default, cot, weak
-QA_PROMPT_OVERRIDE = os.environ.get("QA_PROMPT_OVERRIDE") or None
 # 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
 # Capped in Vespa at 0.5
 DOC_TIME_DECAY = float(
@@ -27,8 +24,6 @@ DOC_TIME_DECAY = float(
 )
 BASE_RECENCY_DECAY = 0.5
 FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
-# Currently this next one is not configurable via env
-DISABLE_LLM_QUERY_ANSWERABILITY = QA_PROMPT_OVERRIDE == "weak"
 # For the highest matching base size chunk, how many chunks above and below do we pull in by default
 # Note this is not in any of the deployment configs yet
 # Currently only applies to search flow not chat
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -60,7 +60,6 @@ KV_GMAIL_CRED_KEY = "gmail_app_credential"
 KV_GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key"
 KV_GOOGLE_DRIVE_CRED_KEY = "google_drive_app_credential"
 KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key"
-KV_SLACK_BOT_TOKENS_CONFIG_KEY = "slack_bot_tokens_config_key"
 KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time"
 KV_SETTINGS_KEY = "danswer_settings"
 KV_CUSTOMER_UUID_KEY = "customer_uuid"
@@ -74,12 +73,16 @@ CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120

 # needs to be long enough to cover the maximum time it takes to download an object
 # if we can get callbacks as object bytes download, we could lower this a lot.
-CELERY_INDEXING_LOCK_TIMEOUT = 60 * 60  # 60 min
+CELERY_INDEXING_LOCK_TIMEOUT = 3 * 60 * 60  # 60 min

 # needs to be long enough to cover the maximum time it takes to download an object
 # if we can get callbacks as object bytes download, we could lower this a lot.
 CELERY_PRUNING_LOCK_TIMEOUT = 300  # 5 min

+CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT = 300  # 5 min
+
+CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300  # 5 min
+
 DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"


@@ -125,6 +128,8 @@ class DocumentSource(str, Enum):
    OCI_STORAGE = "oci_storage"
    XENFORO = "xenforo"
    NOT_APPLICABLE = "not_applicable"
+    FRESHDESK = "freshdesk"
+    FIREFLIES = "fireflies"


 DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]
@@ -207,9 +212,17 @@ class PostgresAdvisoryLocks(Enum):


 class DanswerCeleryQueues:
+    # Light queue
    VESPA_METADATA_SYNC = "vespa_metadata_sync"
+    DOC_PERMISSIONS_UPSERT = "doc_permissions_upsert"
    CONNECTOR_DELETION = "connector_deletion"
+
+    # Heavy queue
    CONNECTOR_PRUNING = "connector_pruning"
+    CONNECTOR_DOC_PERMISSIONS_SYNC = "connector_doc_permissions_sync"
+    CONNECTOR_EXTERNAL_GROUP_SYNC = "connector_external_group_sync"
+
+    # Indexing queue
    CONNECTOR_INDEXING = "connector_indexing"


@@ -219,11 +232,24 @@ class DanswerRedisLocks:
    CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat"
    CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"
    CHECK_INDEXING_BEAT_LOCK = "da_lock:check_indexing_beat"
+    CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK = (
+        "da_lock:check_connector_doc_permissions_sync_beat"
+    )
+    CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (
+        "da_lock:check_connector_external_group_sync_beat"
+    )
    MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat"

+    CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX = (
+        "da_lock:connector_doc_permissions_sync"
+    )
+    CONNECTOR_EXTERNAL_GROUP_SYNC_LOCK_PREFIX = "da_lock:connector_external_group_sync"
    PRUNING_LOCK_PREFIX = "da_lock:pruning"
    INDEXING_METADATA_PREFIX = "da_metadata:indexing"

+    SLACK_BOT_LOCK = "da_lock:slack_bot"
+    SLACK_BOT_HEARTBEAT_PREFIX = "da_heartbeat:slack_bot"
+

 class DanswerCeleryPriority(int, Enum):
    HIGHEST = 0
--- a/backend/danswer/configs/model_configs.py
+++ b/backend/danswer/configs/model_configs.py
@@ -70,7 +70,9 @@ GEN_AI_NUM_RESERVED_OUTPUT_TOKENS = int(
 )

 # Typically, GenAI models nowadays are at least 4K tokens
-GEN_AI_MODEL_FALLBACK_MAX_TOKENS = 4096
+GEN_AI_MODEL_FALLBACK_MAX_TOKENS = int(
+    os.environ.get("GEN_AI_MODEL_FALLBACK_MAX_TOKENS") or 4096
+)

 # Number of tokens from chat history to include at maximum
 # 3000 should be enough context regardless of use, no need to include as much as possible
@@ -119,3 +121,14 @@ if _LITELLM_PASS_THROUGH_HEADERS_RAW:
        logger.error(
            "Failed to parse LITELLM_PASS_THROUGH_HEADERS, must be a valid JSON object"
        )
+
+
+# if specified, will merge the specified JSON with the existing body of the
+# request before sending it to the LLM
+LITELLM_EXTRA_BODY: dict | None = None
+_LITELLM_EXTRA_BODY_RAW = os.environ.get("LITELLM_EXTRA_BODY")
+if _LITELLM_EXTRA_BODY_RAW:
+    try:
+        LITELLM_EXTRA_BODY = json.loads(_LITELLM_EXTRA_BODY_RAW)
+    except Exception:
+        pass
--- a/backend/danswer/connectors/blob/connector.py
+++ b/backend/danswer/connectors/blob/connector.py
@@ -5,9 +5,9 @@ from io import BytesIO
 from typing import Any
 from typing import Optional

-import boto3
-from botocore.client import Config
-from mypy_boto3_s3 import S3Client
+import boto3  # type: ignore
+from botocore.client import Config  # type: ignore
+from mypy_boto3_s3 import S3Client  # type: ignore

 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import BlobType
--- a/backend/danswer/connectors/confluence/connector.py
+++ b/backend/danswer/connectors/confluence/connector.py
@@ -7,9 +7,9 @@ from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
 from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
+from danswer.connectors.confluence.onyx_confluence import build_confluence_client
 from danswer.connectors.confluence.onyx_confluence import OnyxConfluence
 from danswer.connectors.confluence.utils import attachment_to_content
-from danswer.connectors.confluence.utils import build_confluence_client
 from danswer.connectors.confluence.utils import build_confluence_document_id
 from danswer.connectors.confluence.utils import datetime_from_string
 from danswer.connectors.confluence.utils import extract_text_from_confluence_html
@@ -17,6 +17,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import GenerateSlimDocumentOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
+from danswer.connectors.interfaces import SecondsSinceUnixEpoch
 from danswer.connectors.interfaces import SlimConnector
 from danswer.connectors.models import BasicExpertInfo
 from danswer.connectors.models import ConnectorMissingCredentialError
@@ -50,6 +51,8 @@ _RESTRICTIONS_EXPANSION_FIELDS = [
    "restrictions.read.restrictions.group",
 ]

+_SLIM_DOC_BATCH_SIZE = 5000
+

 class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(
@@ -69,7 +72,7 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
    ) -> None:
        self.batch_size = batch_size
        self.continue_on_failure = continue_on_failure
-        self.confluence_client: OnyxConfluence | None = None
+        self._confluence_client: OnyxConfluence | None = None
        self.is_cloud = is_cloud

        # Remove trailing slash from wiki_base if present
@@ -80,15 +83,15 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
        if cql_query:
            # if a cql_query is provided, we will use it to fetch the pages
            cql_page_query = cql_query
-        elif space:
-            # if no cql_query is provided, we will use the space to fetch the pages
-            cql_page_query += f" and space='{quote(space)}'"
        elif page_id:
+            # if a cql_query is not provided, we will use the page_id to fetch the page
            if index_recursively:
                cql_page_query += f" and ancestor='{page_id}'"
            else:
-                # if neither a space nor a cql_query is provided, we will use the page_id to fetch the page
                cql_page_query += f" and id='{page_id}'"
+        elif space:
+            # if no cql_query or page_id is provided, we will use the space to fetch the pages
+            cql_page_query += f" and space='{quote(space)}'"

        self.cql_page_query = cql_page_query
        self.cql_time_filter = ""
@@ -96,39 +99,44 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
        self.cql_label_filter = ""
        if labels_to_skip:
            labels_to_skip = list(set(labels_to_skip))
-            comma_separated_labels = ",".join(f"'{label}'" for label in labels_to_skip)
+            comma_separated_labels = ",".join(
+                f"'{quote(label)}'" for label in labels_to_skip
+            )
            self.cql_label_filter = f" and label not in ({comma_separated_labels})"

+    @property
+    def confluence_client(self) -> OnyxConfluence:
+        if self._confluence_client is None:
+            raise ConnectorMissingCredentialError("Confluence")
+        return self._confluence_client
+
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
        # see https://github.com/atlassian-api/atlassian-python-api/blob/master/atlassian/rest_client.py
        # for a list of other hidden constructor args
-        self.confluence_client = build_confluence_client(
-            credentials_json=credentials,
+        self._confluence_client = build_confluence_client(
+            credentials=credentials,
            is_cloud=self.is_cloud,
            wiki_base=self.wiki_base,
        )
        return None

    def _get_comment_string_for_page_id(self, page_id: str) -> str:
-        if self.confluence_client is None:
-            raise ConnectorMissingCredentialError("Confluence")
-
        comment_string = ""

        comment_cql = f"type=comment and container='{page_id}'"
        comment_cql += self.cql_label_filter

        expand = ",".join(_COMMENT_EXPANSION_FIELDS)
-        for comments in self.confluence_client.paginated_cql_page_retrieval(
+        for comment in self.confluence_client.paginated_cql_retrieval(
            cql=comment_cql,
            expand=expand,
        ):
-            for comment in comments:
-                comment_string += "\nComment:\n"
-                comment_string += extract_text_from_confluence_html(
-                    confluence_client=self.confluence_client,
-                    confluence_object=comment,
-                )
+            comment_string += "\nComment:\n"
+            comment_string += extract_text_from_confluence_html(
+                confluence_client=self.confluence_client,
+                confluence_object=comment,
+                fetched_titles=set(),
+            )

        return comment_string

@@ -140,28 +148,28 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
        If its a page, it extracts the text, adds the comments for the document text.
        If its an attachment, it just downloads the attachment and converts that into a document.
        """
-        if self.confluence_client is None:
-            raise ConnectorMissingCredentialError("Confluence")
-
        # The url and the id are the same
        object_url = build_confluence_document_id(
-            self.wiki_base, confluence_object["_links"]["webui"]
+            self.wiki_base, confluence_object["_links"]["webui"], self.is_cloud
        )

        object_text = None
        # Extract text from page
        if confluence_object["type"] == "page":
            object_text = extract_text_from_confluence_html(
-                self.confluence_client, confluence_object
+                confluence_client=self.confluence_client,
+                confluence_object=confluence_object,
+                fetched_titles={confluence_object.get("title", "")},
            )
            # Add comments to text
            object_text += self._get_comment_string_for_page_id(confluence_object["id"])
        elif confluence_object["type"] == "attachment":
            object_text = attachment_to_content(
-                self.confluence_client, confluence_object
+                confluence_client=self.confluence_client, attachment=confluence_object
            )

        if object_text is None:
+            # This only happens for attachments that are not parseable
            return None

        # Get space name
@@ -192,44 +200,39 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
        )

    def _fetch_document_batches(self) -> GenerateDocumentsOutput:
-        if self.confluence_client is None:
-            raise ConnectorMissingCredentialError("Confluence")
-
        doc_batch: list[Document] = []
        confluence_page_ids: list[str] = []

        page_query = self.cql_page_query + self.cql_label_filter + self.cql_time_filter
        # Fetch pages as Documents
-        for page_batch in self.confluence_client.paginated_cql_page_retrieval(
+        for page in self.confluence_client.paginated_cql_retrieval(
            cql=page_query,
            expand=",".join(_PAGE_EXPANSION_FIELDS),
            limit=self.batch_size,
        ):
-            for page in page_batch:
-                confluence_page_ids.append(page["id"])
-                doc = self._convert_object_to_document(page)
-                if doc is not None:
-                    doc_batch.append(doc)
-                if len(doc_batch) >= self.batch_size:
-                    yield doc_batch
-                    doc_batch = []
+            confluence_page_ids.append(page["id"])
+            doc = self._convert_object_to_document(page)
+            if doc is not None:
+                doc_batch.append(doc)
+            if len(doc_batch) >= self.batch_size:
+                yield doc_batch
+                doc_batch = []

        # Fetch attachments as Documents
        for confluence_page_id in confluence_page_ids:
            attachment_cql = f"type=attachment and container='{confluence_page_id}'"
            attachment_cql += self.cql_label_filter
            # TODO: maybe should add time filter as well?
-            for attachments in self.confluence_client.paginated_cql_page_retrieval(
+            for attachment in self.confluence_client.paginated_cql_retrieval(
                cql=attachment_cql,
                expand=",".join(_ATTACHMENT_EXPANSION_FIELDS),
            ):
-                for attachment in attachments:
-                    doc = self._convert_object_to_document(attachment)
-                    if doc is not None:
-                        doc_batch.append(doc)
-                    if len(doc_batch) >= self.batch_size:
-                        yield doc_batch
-                        doc_batch = []
+                doc = self._convert_object_to_document(attachment)
+                if doc is not None:
+                    doc_batch.append(doc)
+                if len(doc_batch) >= self.batch_size:
+                    yield doc_batch
+                    doc_batch = []

        if doc_batch:
            yield doc_batch
@@ -249,49 +252,57 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
        self.cql_time_filter += f" and lastmodified <= '{formatted_end_time}'"
        return self._fetch_document_batches()

-    def retrieve_all_slim_documents(self) -> GenerateSlimDocumentOutput:
-        if self.confluence_client is None:
-            raise ConnectorMissingCredentialError("Confluence")
-
+    def retrieve_all_slim_documents(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+    ) -> GenerateSlimDocumentOutput:
        doc_metadata_list: list[SlimDocument] = []

        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

        page_query = self.cql_page_query + self.cql_label_filter
-        for pages in self.confluence_client.cql_paginate_all_expansions(
+        for page in self.confluence_client.cql_paginate_all_expansions(
            cql=page_query,
            expand=restrictions_expand,
+            limit=_SLIM_DOC_BATCH_SIZE,
        ):
-            for page in pages:
-                # If the page has restrictions, add them to the perm_sync_data
-                # These will be used by doc_sync.py to sync permissions
-                perm_sync_data = {
-                    "restrictions": page.get("restrictions", {}),
-                    "space_key": page.get("space", {}).get("key"),
-                }
+            # If the page has restrictions, add them to the perm_sync_data
+            # These will be used by doc_sync.py to sync permissions
+            perm_sync_data = {
+                "restrictions": page.get("restrictions", {}),
+                "space_key": page.get("space", {}).get("key"),
+            }

+            doc_metadata_list.append(
+                SlimDocument(
+                    id=build_confluence_document_id(
+                        self.wiki_base,
+                        page["_links"]["webui"],
+                        self.is_cloud,
+                    ),
+                    perm_sync_data=perm_sync_data,
+                )
+            )
+            attachment_cql = f"type=attachment and container='{page['id']}'"
+            attachment_cql += self.cql_label_filter
+            for attachment in self.confluence_client.cql_paginate_all_expansions(
+                cql=attachment_cql,
+                expand=restrictions_expand,
+                limit=_SLIM_DOC_BATCH_SIZE,
+            ):
                doc_metadata_list.append(
                    SlimDocument(
                        id=build_confluence_document_id(
-                            self.wiki_base, page["_links"]["webui"]
+                            self.wiki_base,
+                            attachment["_links"]["webui"],
+                            self.is_cloud,
                        ),
                        perm_sync_data=perm_sync_data,
                    )
                )
-                attachment_cql = f"type=attachment and container='{page['id']}'"
-                attachment_cql += self.cql_label_filter
-                for attachments in self.confluence_client.cql_paginate_all_expansions(
-                    cql=attachment_cql,
-                    expand=restrictions_expand,
-                ):
-                    for attachment in attachments:
-                        doc_metadata_list.append(
-                            SlimDocument(
-                                id=build_confluence_document_id(
-                                    self.wiki_base, attachment["_links"]["webui"]
-                                ),
-                                perm_sync_data=perm_sync_data,
-                            )
-                        )
-                yield doc_metadata_list
-                doc_metadata_list = []
+            if len(doc_metadata_list) > _SLIM_DOC_BATCH_SIZE:
+                yield doc_metadata_list[:_SLIM_DOC_BATCH_SIZE]
+                doc_metadata_list = doc_metadata_list[_SLIM_DOC_BATCH_SIZE:]
+
+        yield doc_metadata_list
--- a/backend/danswer/connectors/confluence/onyx_confluence.py
+++ b/backend/danswer/connectors/confluence/onyx_confluence.py
@@ -20,6 +20,10 @@ F = TypeVar("F", bound=Callable[..., Any])

 RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()

+# https://jira.atlassian.com/browse/CONFCLOUD-76433
+_PROBLEMATIC_EXPANSIONS = "body.storage.value"
+_REPLACEMENT_EXPANSIONS = "body.view.value"
+

 class ConfluenceRateLimitError(Exception):
    pass
@@ -80,7 +84,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
    def wrapped_call(*args: list[Any], **kwargs: Any) -> Any:
        MAX_RETRIES = 5

-        TIMEOUT = 3600
+        TIMEOUT = 600
        timeout_at = time.monotonic() + TIMEOUT

        for attempt in range(MAX_RETRIES):
@@ -95,6 +99,10 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
                return confluence_call(*args, **kwargs)
            except HTTPError as e:
                delay_until = _handle_http_error(e, attempt)
+                logger.warning(
+                    f"HTTPError in confluence call. "
+                    f"Retrying in {delay_until} seconds..."
+                )
                while time.monotonic() < delay_until:
                    # in the future, check a signal here to exit
                    time.sleep(1)
@@ -112,7 +120,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F:
    return cast(F, wrapped_call)


-_DEFAULT_PAGINATION_LIMIT = 100
+_DEFAULT_PAGINATION_LIMIT = 1000


 class OnyxConfluence(Confluence):
@@ -141,7 +149,7 @@ class OnyxConfluence(Confluence):

    def _paginate_url(
        self, url_suffix: str, limit: int | None = None
-    ) -> Iterator[list[dict[str, Any]]]:
+    ) -> Iterator[dict[str, Any]]:
        """
        This will paginate through the top level query.
        """
@@ -153,46 +161,43 @@ class OnyxConfluence(Confluence):

        while url_suffix:
            try:
+                logger.debug(f"Making confluence call to {url_suffix}")
                next_response = self.get(url_suffix)
            except Exception as e:
-                logger.exception("Error in danswer_cql: \n")
-                raise e
-            yield next_response.get("results", [])
+                logger.warning(f"Error in confluence call to {url_suffix}")
+
+                # If the problematic expansion is in the url, replace it
+                # with the replacement expansion and try again
+                # If that fails, raise the error
+                if _PROBLEMATIC_EXPANSIONS not in url_suffix:
+                    logger.exception(f"Error in confluence call to {url_suffix}")
+                    raise e
+                logger.warning(
+                    f"Replacing {_PROBLEMATIC_EXPANSIONS} with {_REPLACEMENT_EXPANSIONS}"
+                    " and trying again."
+                )
+                url_suffix = url_suffix.replace(
+                    _PROBLEMATIC_EXPANSIONS,
+                    _REPLACEMENT_EXPANSIONS,
+                )
+                continue
+
+            # yield the results individually
+            yield from next_response.get("results", [])
+
            url_suffix = next_response.get("_links", {}).get("next")

-    def paginated_groups_retrieval(
-        self,
-        limit: int | None = None,
-    ) -> Iterator[list[dict[str, Any]]]:
-        return self._paginate_url("rest/api/group", limit)
-
-    def paginated_group_members_retrieval(
-        self,
-        group_name: str,
-        limit: int | None = None,
-    ) -> Iterator[list[dict[str, Any]]]:
-        group_name = quote(group_name)
-        return self._paginate_url(f"rest/api/group/{group_name}/member", limit)
-
-    def paginated_cql_user_retrieval(
+    def paginated_cql_retrieval(
        self,
        cql: str,
        expand: str | None = None,
        limit: int | None = None,
-    ) -> Iterator[list[dict[str, Any]]]:
+    ) -> Iterator[dict[str, Any]]:
+        """
+        The content/search endpoint can be used to fetch pages, attachments, and comments.
+        """
        expand_string = f"&expand={expand}" if expand else ""
-        return self._paginate_url(
-            f"rest/api/search/user?cql={cql}{expand_string}", limit
-        )
-
-    def paginated_cql_page_retrieval(
-        self,
-        cql: str,
-        expand: str | None = None,
-        limit: int | None = None,
-    ) -> Iterator[list[dict[str, Any]]]:
-        expand_string = f"&expand={expand}" if expand else ""
-        return self._paginate_url(
+        yield from self._paginate_url(
            f"rest/api/content/search?cql={cql}{expand_string}", limit
        )

@@ -201,7 +206,7 @@ class OnyxConfluence(Confluence):
        cql: str,
        expand: str | None = None,
        limit: int | None = None,
-    ) -> Iterator[list[dict[str, Any]]]:
+    ) -> Iterator[dict[str, Any]]:
        """
        This function will paginate through the top level query first, then
        paginate through all of the expansions.
@@ -221,6 +226,113 @@ class OnyxConfluence(Confluence):
                for item in data:
                    _traverse_and_update(item)

-        for results in self.paginated_cql_page_retrieval(cql, expand, limit):
-            _traverse_and_update(results)
-            yield results
+        for confluence_object in self.paginated_cql_retrieval(cql, expand, limit):
+            _traverse_and_update(confluence_object)
+            yield confluence_object
+
+    def paginated_cql_user_retrieval(
+        self,
+        expand: str | None = None,
+        limit: int | None = None,
+    ) -> Iterator[dict[str, Any]]:
+        """
+        The search/user endpoint can be used to fetch users.
+        It's a seperate endpoint from the content/search endpoint used only for users.
+        Otherwise it's very similar to the content/search endpoint.
+        """
+        cql = "type=user"
+        url = "rest/api/search/user" if self.cloud else "rest/api/search"
+        expand_string = f"&expand={expand}" if expand else ""
+        url += f"?cql={cql}{expand_string}"
+        yield from self._paginate_url(url, limit)
+
+    def paginated_groups_by_user_retrieval(
+        self,
+        user: dict[str, Any],
+        limit: int | None = None,
+    ) -> Iterator[dict[str, Any]]:
+        """
+        This is not an SQL like query.
+        It's a confluence specific endpoint that can be used to fetch groups.
+        """
+        user_field = "accountId" if self.cloud else "key"
+        user_value = user["accountId"] if self.cloud else user["userKey"]
+        # Server uses userKey (but calls it key during the API call), Cloud uses accountId
+        user_query = f"{user_field}={quote(user_value)}"
+
+        url = f"rest/api/user/memberof?{user_query}"
+        yield from self._paginate_url(url, limit)
+
+    def paginated_groups_retrieval(
+        self,
+        limit: int | None = None,
+    ) -> Iterator[dict[str, Any]]:
+        """
+        This is not an SQL like query.
+        It's a confluence specific endpoint that can be used to fetch groups.
+        """
+        yield from self._paginate_url("rest/api/group", limit)
+
+    def paginated_group_members_retrieval(
+        self,
+        group_name: str,
+        limit: int | None = None,
+    ) -> Iterator[dict[str, Any]]:
+        """
+        This is not an SQL like query.
+        It's a confluence specific endpoint that can be used to fetch the members of a group.
+        THIS DOESN'T WORK FOR SERVER because it breaks when there is a slash in the group name.
+        E.g. neither "test/group" nor "test%2Fgroup" works for confluence.
+        """
+        group_name = quote(group_name)
+        yield from self._paginate_url(f"rest/api/group/{group_name}/member", limit)
+
+
+def _validate_connector_configuration(
+    credentials: dict[str, Any],
+    is_cloud: bool,
+    wiki_base: str,
+) -> None:
+    # test connection with direct client, no retries
+    confluence_client_with_minimal_retries = Confluence(
+        api_version="cloud" if is_cloud else "latest",
+        url=wiki_base.rstrip("/"),
+        username=credentials["confluence_username"] if is_cloud else None,
+        password=credentials["confluence_access_token"] if is_cloud else None,
+        token=credentials["confluence_access_token"] if not is_cloud else None,
+        backoff_and_retry=True,
+        max_backoff_retries=6,
+        max_backoff_seconds=10,
+    )
+    spaces = confluence_client_with_minimal_retries.get_all_spaces(limit=1)
+
+    if not spaces:
+        raise RuntimeError(
+            f"No spaces found at {wiki_base}! "
+            "Check your credentials and wiki_base and make sure "
+            "is_cloud is set correctly."
+        )
+
+
+def build_confluence_client(
+    credentials: dict[str, Any],
+    is_cloud: bool,
+    wiki_base: str,
+) -> OnyxConfluence:
+    _validate_connector_configuration(
+        credentials=credentials,
+        is_cloud=is_cloud,
+        wiki_base=wiki_base,
+    )
+    return OnyxConfluence(
+        api_version="cloud" if is_cloud else "latest",
+        # Remove trailing slash from wiki_base if present
+        url=wiki_base.rstrip("/"),
+        # passing in username causes issues for Confluence data center
+        username=credentials["confluence_username"] if is_cloud else None,
+        password=credentials["confluence_access_token"] if is_cloud else None,
+        token=credentials["confluence_access_token"] if not is_cloud else None,
+        backoff_and_retry=True,
+        max_backoff_retries=10,
+        max_backoff_seconds=60,
+    )
--- a/backend/danswer/connectors/confluence/utils.py
+++ b/backend/danswer/connectors/confluence/utils.py
@@ -2,6 +2,7 @@ import io
 from datetime import datetime
 from datetime import timezone
 from typing import Any
+from urllib.parse import quote

 import bs4

@@ -71,7 +72,9 @@ def _get_user(confluence_client: OnyxConfluence, user_id: str) -> str:


 def extract_text_from_confluence_html(
-    confluence_client: OnyxConfluence, confluence_object: dict[str, Any]
+    confluence_client: OnyxConfluence,
+    confluence_object: dict[str, Any],
+    fetched_titles: set[str],
 ) -> str:
    """Parse a Confluence html page and replace the 'user Id' by the real
        User Display Name
@@ -79,7 +82,7 @@ def extract_text_from_confluence_html(
    Args:
        confluence_object (dict): The confluence object as a dict
        confluence_client (Confluence): Confluence client
-
+        fetched_titles (set[str]): The titles of the pages that have already been fetched
    Returns:
        str: loaded and formated Confluence page
    """
@@ -100,6 +103,73 @@ def extract_text_from_confluence_html(
            continue
        # Include @ sign for tagging, more clear for LLM
        user.replaceWith("@" + _get_user(confluence_client, user_id))
+
+    for html_page_reference in soup.findAll("ac:structured-macro"):
+        # Here, we only want to process page within page macros
+        if html_page_reference.attrs.get("ac:name") != "include":
+            continue
+
+        page_data = html_page_reference.find("ri:page")
+        if not page_data:
+            logger.warning(
+                f"Skipping retrieval of {html_page_reference} because because page data is missing"
+            )
+            continue
+
+        page_title = page_data.attrs.get("ri:content-title")
+        if not page_title:
+            # only fetch pages that have a title
+            logger.warning(
+                f"Skipping retrieval of {html_page_reference} because it has no title"
+            )
+            continue
+
+        if page_title in fetched_titles:
+            # prevent recursive fetching of pages
+            logger.debug(f"Skipping {page_title} because it has already been fetched")
+            continue
+
+        fetched_titles.add(page_title)
+
+        # Wrap this in a try-except because there are some pages that might not exist
+        try:
+            page_query = f"type=page and title='{quote(page_title)}'"
+
+            page_contents: dict[str, Any] | None = None
+            # Confluence enforces title uniqueness, so we should only get one result here
+            for page in confluence_client.paginated_cql_retrieval(
+                cql=page_query,
+                expand="body.storage.value",
+                limit=1,
+            ):
+                page_contents = page
+                break
+        except Exception as e:
+            logger.warning(
+                f"Error getting page contents for object {confluence_object}: {e}"
+            )
+            continue
+
+        if not page_contents:
+            continue
+
+        text_from_page = extract_text_from_confluence_html(
+            confluence_client=confluence_client,
+            confluence_object=page_contents,
+            fetched_titles=fetched_titles,
+        )
+
+        html_page_reference.replaceWith(text_from_page)
+
+    for html_link_body in soup.findAll("ac:link-body"):
+        # This extracts the text from inline links in the page so they can be
+        # represented in the document text as plain text
+        try:
+            text_from_link = html_link_body.text
+            html_link_body.replaceWith(f"(LINK TEXT: {text_from_link})")
+        except Exception as e:
+            logger.warning(f"Error processing ac:link-body: {e}")
+
    return format_document_soup(soup)


@@ -153,7 +223,9 @@ def attachment_to_content(
    return extracted_text


-def build_confluence_document_id(base_url: str, content_url: str) -> str:
+def build_confluence_document_id(
+    base_url: str, content_url: str, is_cloud: bool
+) -> str:
    """For confluence, the document id is the page url for a page based document
        or the attachment download url for an attachment based document

@@ -164,6 +236,8 @@ def build_confluence_document_id(base_url: str, content_url: str) -> str:
    Returns:
        str: The document id
    """
+    if is_cloud and not base_url.endswith("/wiki"):
+        base_url += "/wiki"
    return f"{base_url}{content_url}"


@@ -195,20 +269,3 @@ def datetime_from_string(datetime_string: str) -> datetime:
        datetime_object = datetime_object.astimezone(timezone.utc)

    return datetime_object
-
-
-def build_confluence_client(
-    credentials_json: dict[str, Any], is_cloud: bool, wiki_base: str
-) -> OnyxConfluence:
-    return OnyxConfluence(
-        api_version="cloud" if is_cloud else "latest",
-        # Remove trailing slash from wiki_base if present
-        url=wiki_base.rstrip("/"),
-        # passing in username causes issues for Confluence data center
-        username=credentials_json["confluence_username"] if is_cloud else None,
-        password=credentials_json["confluence_access_token"] if is_cloud else None,
-        token=credentials_json["confluence_access_token"] if not is_cloud else None,
-        backoff_and_retry=True,
-        max_backoff_retries=60,
-        max_backoff_seconds=60,
-    )
--- a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -23,7 +23,16 @@ def datetime_to_utc(dt: datetime) -> datetime:


 def time_str_to_utc(datetime_str: str) -> datetime:
-    dt = parse(datetime_str)
+    try:
+        dt = parse(datetime_str)
+    except ValueError:
+        # Handle malformed timezone by attempting to fix common format issues
+        if "0000" in datetime_str:
+            # Convert "0000" to "+0000" for proper timezone parsing
+            fixed_dt_str = datetime_str.replace(" 0000", " +0000")
+            dt = parse(fixed_dt_str)
+        else:
+            raise
    return datetime_to_utc(dt)


--- a/backend/danswer/connectors/danswer_jira/connector.py
+++ b/backend/danswer/connectors/danswer_jira/connector.py
@@ -1,8 +1,8 @@
 import os
+from collections.abc import Iterable
 from datetime import datetime
 from datetime import timezone
 from typing import Any
-from urllib.parse import urlparse

 from jira import JIRA
 from jira.resources import Issue
@@ -12,129 +12,93 @@ from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
 from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
+from danswer.connectors.danswer_jira.utils import best_effort_basic_expert_info
+from danswer.connectors.danswer_jira.utils import best_effort_get_field_from_issue
+from danswer.connectors.danswer_jira.utils import build_jira_client
+from danswer.connectors.danswer_jira.utils import build_jira_url
+from danswer.connectors.danswer_jira.utils import extract_jira_project
+from danswer.connectors.danswer_jira.utils import extract_text_from_adf
+from danswer.connectors.danswer_jira.utils import get_comment_strs
 from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import GenerateSlimDocumentOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
-from danswer.connectors.models import BasicExpertInfo
+from danswer.connectors.interfaces import SlimConnector
 from danswer.connectors.models import ConnectorMissingCredentialError
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
+from danswer.connectors.models import SlimDocument
 from danswer.utils.logger import setup_logger


 logger = setup_logger()
-PROJECT_URL_PAT = "projects"
+
 JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
+_JIRA_SLIM_PAGE_SIZE = 500
+_JIRA_FULL_PAGE_SIZE = 50


-def extract_jira_project(url: str) -> tuple[str, str]:
-    parsed_url = urlparse(url)
-    jira_base = parsed_url.scheme + "://" + parsed_url.netloc
+def _paginate_jql_search(
+    jira_client: JIRA,
+    jql: str,
+    max_results: int,
+    fields: str | None = None,
+) -> Iterable[Issue]:
+    start = 0
+    while True:
+        logger.debug(
+            f"Fetching Jira issues with JQL: {jql}, "
+            f"starting at {start}, max results: {max_results}"
+        )
+        issues = jira_client.search_issues(
+            jql_str=jql,
+            startAt=start,
+            maxResults=max_results,
+            fields=fields,
+        )

-    # Split the path by '/' and find the position of 'projects' to get the project name
-    split_path = parsed_url.path.split("/")
-    if PROJECT_URL_PAT in split_path:
-        project_pos = split_path.index(PROJECT_URL_PAT)
-        if len(split_path) > project_pos + 1:
-            jira_project = split_path[project_pos + 1]
-        else:
-            raise ValueError("No project name found in the URL")
-    else:
-        raise ValueError("'projects' not found in the URL")
+        for issue in issues:
+            if isinstance(issue, Issue):
+                yield issue
+            else:
+                raise Exception(f"Found Jira object not of type Issue: {issue}")

-    return jira_base, jira_project
+        if len(issues) < max_results:
+            break

-
-def extract_text_from_adf(adf: dict | None) -> str:
-    """Extracts plain text from Atlassian Document Format:
-    https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/
-
-    WARNING: This function is incomplete and will e.g. skip lists!
-    """
-    texts = []
-    if adf is not None and "content" in adf:
-        for block in adf["content"]:
-            if "content" in block:
-                for item in block["content"]:
-                    if item["type"] == "text":
-                        texts.append(item["text"])
-    return " ".join(texts)
-
-
-def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
-    if hasattr(jira_issue.fields, field):
-        return getattr(jira_issue.fields, field)
-
-    try:
-        return jira_issue.raw["fields"][field]
-    except Exception:
-        return None
-
-
-def _get_comment_strs(
-    jira: Issue, comment_email_blacklist: tuple[str, ...] = ()
-) -> list[str]:
-    comment_strs = []
-    for comment in jira.fields.comment.comments:
-        try:
-            body_text = (
-                comment.body
-                if JIRA_API_VERSION == "2"
-                else extract_text_from_adf(comment.raw["body"])
-            )
-
-            if (
-                hasattr(comment, "author")
-                and hasattr(comment.author, "emailAddress")
-                and comment.author.emailAddress in comment_email_blacklist
-            ):
-                continue  # Skip adding comment if author's email is in blacklist
-
-            comment_strs.append(body_text)
-        except Exception as e:
-            logger.error(f"Failed to process comment due to an error: {e}")
-            continue
-
-    return comment_strs
+        start += max_results


 def fetch_jira_issues_batch(
-    jql: str,
-    start_index: int,
    jira_client: JIRA,
-    batch_size: int = INDEX_BATCH_SIZE,
+    jql: str,
+    batch_size: int,
    comment_email_blacklist: tuple[str, ...] = (),
    labels_to_skip: set[str] | None = None,
-) -> tuple[list[Document], int]:
-    doc_batch = []
-
-    batch = jira_client.search_issues(
-        jql,
-        startAt=start_index,
-        maxResults=batch_size,
-    )
-
-    for jira in batch:
-        if type(jira) != Issue:
-            logger.warning(f"Found Jira object not of type Issue {jira}")
-            continue
-
-        if labels_to_skip and any(
-            label in jira.fields.labels for label in labels_to_skip
-        ):
-            logger.info(
-                f"Skipping {jira.key} because it has a label to skip. Found "
-                f"labels: {jira.fields.labels}. Labels to skip: {labels_to_skip}."
-            )
-            continue
+) -> Iterable[Document]:
+    for issue in _paginate_jql_search(
+        jira_client=jira_client,
+        jql=jql,
+        max_results=batch_size,
+    ):
+        if labels_to_skip:
+            if any(label in issue.fields.labels for label in labels_to_skip):
+                logger.info(
+                    f"Skipping {issue.key} because it has a label to skip. Found "
+                    f"labels: {issue.fields.labels}. Labels to skip: {labels_to_skip}."
+                )
+                continue

        description = (
-            jira.fields.description
+            issue.fields.description
            if JIRA_API_VERSION == "2"
-            else extract_text_from_adf(jira.raw["fields"]["description"])
+            else extract_text_from_adf(issue.raw["fields"]["description"])
+        )
+        comments = get_comment_strs(
+            issue=issue,
+            comment_email_blacklist=comment_email_blacklist,
        )
-        comments = _get_comment_strs(jira, comment_email_blacklist)
        ticket_content = f"{description}\n" + "\n".join(
            [f"Comment: {comment}" for comment in comments if comment]
        )
@@ -142,66 +106,53 @@ def fetch_jira_issues_batch(
        # Check ticket size
        if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
            logger.info(
-                f"Skipping {jira.key} because it exceeds the maximum size of "
+                f"Skipping {issue.key} because it exceeds the maximum size of "
                f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
            )
            continue

-        page_url = f"{jira_client.client_info()}/browse/{jira.key}"
+        page_url = f"{jira_client.client_info()}/browse/{issue.key}"

        people = set()
        try:
-            people.add(
-                BasicExpertInfo(
-                    display_name=jira.fields.creator.displayName,
-                    email=jira.fields.creator.emailAddress,
-                )
-            )
+            creator = best_effort_get_field_from_issue(issue, "creator")
+            if basic_expert_info := best_effort_basic_expert_info(creator):
+                people.add(basic_expert_info)
        except Exception:
            # Author should exist but if not, doesn't matter
            pass

        try:
-            people.add(
-                BasicExpertInfo(
-                    display_name=jira.fields.assignee.displayName,  # type: ignore
-                    email=jira.fields.assignee.emailAddress,  # type: ignore
-                )
-            )
+            assignee = best_effort_get_field_from_issue(issue, "assignee")
+            if basic_expert_info := best_effort_basic_expert_info(assignee):
+                people.add(basic_expert_info)
        except Exception:
            # Author should exist but if not, doesn't matter
            pass

        metadata_dict = {}
-        priority = best_effort_get_field_from_issue(jira, "priority")
-        if priority:
+        if priority := best_effort_get_field_from_issue(issue, "priority"):
            metadata_dict["priority"] = priority.name
-        status = best_effort_get_field_from_issue(jira, "status")
-        if status:
+        if status := best_effort_get_field_from_issue(issue, "status"):
            metadata_dict["status"] = status.name
-        resolution = best_effort_get_field_from_issue(jira, "resolution")
-        if resolution:
+        if resolution := best_effort_get_field_from_issue(issue, "resolution"):
            metadata_dict["resolution"] = resolution.name
-        labels = best_effort_get_field_from_issue(jira, "labels")
-        if labels:
+        if labels := best_effort_get_field_from_issue(issue, "labels"):
            metadata_dict["label"] = labels

-        doc_batch.append(
-            Document(
-                id=page_url,
-                sections=[Section(link=page_url, text=ticket_content)],
-                source=DocumentSource.JIRA,
-                semantic_identifier=jira.fields.summary,
-                doc_updated_at=time_str_to_utc(jira.fields.updated),
-                primary_owners=list(people) or None,
-                # TODO add secondary_owners (commenters) if needed
-                metadata=metadata_dict,
-            )
+        yield Document(
+            id=page_url,
+            sections=[Section(link=page_url, text=ticket_content)],
+            source=DocumentSource.JIRA,
+            semantic_identifier=issue.fields.summary,
+            doc_updated_at=time_str_to_utc(issue.fields.updated),
+            primary_owners=list(people) or None,
+            # TODO add secondary_owners (commenters) if needed
+            metadata=metadata_dict,
        )
-    return doc_batch, len(batch)


-class JiraConnector(LoadConnector, PollConnector):
+class JiraConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(
        self,
        jira_project_url: str,
@@ -213,8 +164,8 @@ class JiraConnector(LoadConnector, PollConnector):
        labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP,
    ) -> None:
        self.batch_size = batch_size
-        self.jira_base, self.jira_project = extract_jira_project(jira_project_url)
-        self.jira_client: JIRA | None = None
+        self.jira_base, self._jira_project = extract_jira_project(jira_project_url)
+        self._jira_client: JIRA | None = None
        self._comment_email_blacklist = comment_email_blacklist or []

        self.labels_to_skip = set(labels_to_skip)
@@ -223,54 +174,45 @@ class JiraConnector(LoadConnector, PollConnector):
    def comment_email_blacklist(self) -> tuple:
        return tuple(email.strip() for email in self._comment_email_blacklist)

+    @property
+    def jira_client(self) -> JIRA:
+        if self._jira_client is None:
+            raise ConnectorMissingCredentialError("Jira")
+        return self._jira_client
+
+    @property
+    def quoted_jira_project(self) -> str:
+        # Quote the project name to handle reserved words
+        return f'"{self._jira_project}"'
+
    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        api_token = credentials["jira_api_token"]
-        # if user provide an email we assume it's cloud
-        if "jira_user_email" in credentials:
-            email = credentials["jira_user_email"]
-            self.jira_client = JIRA(
-                basic_auth=(email, api_token),
-                server=self.jira_base,
-                options={"rest_api_version": JIRA_API_VERSION},
-            )
-        else:
-            self.jira_client = JIRA(
-                token_auth=api_token,
-                server=self.jira_base,
-                options={"rest_api_version": JIRA_API_VERSION},
-            )
+        self._jira_client = build_jira_client(
+            credentials=credentials,
+            jira_base=self.jira_base,
+        )
        return None

    def load_from_state(self) -> GenerateDocumentsOutput:
-        if self.jira_client is None:
-            raise ConnectorMissingCredentialError("Jira")
+        jql = f"project = {self.quoted_jira_project}"

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
-        start_ind = 0
-        while True:
-            doc_batch, fetched_batch_size = fetch_jira_issues_batch(
-                jql=f"project = {quoted_project}",
-                start_index=start_ind,
-                jira_client=self.jira_client,
-                batch_size=self.batch_size,
-                comment_email_blacklist=self.comment_email_blacklist,
-                labels_to_skip=self.labels_to_skip,
-            )
+        document_batch = []
+        for doc in fetch_jira_issues_batch(
+            jira_client=self.jira_client,
+            jql=jql,
+            batch_size=_JIRA_FULL_PAGE_SIZE,
+            comment_email_blacklist=self.comment_email_blacklist,
+            labels_to_skip=self.labels_to_skip,
+        ):
+            document_batch.append(doc)
+            if len(document_batch) >= self.batch_size:
+                yield document_batch
+                document_batch = []

-            if doc_batch:
-                yield doc_batch
-
-            start_ind += fetched_batch_size
-            if fetched_batch_size < self.batch_size:
-                break
+        yield document_batch

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
-        if self.jira_client is None:
-            raise ConnectorMissingCredentialError("Jira")
-
        start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
            "%Y-%m-%d %H:%M"
        )
@@ -278,31 +220,54 @@ class JiraConnector(LoadConnector, PollConnector):
            "%Y-%m-%d %H:%M"
        )

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
        jql = (
-            f"project = {quoted_project} AND "
+            f"project = {self.quoted_jira_project} AND "
            f"updated >= '{start_date_str}' AND "
            f"updated <= '{end_date_str}'"
        )

-        start_ind = 0
-        while True:
-            doc_batch, fetched_batch_size = fetch_jira_issues_batch(
-                jql=jql,
-                start_index=start_ind,
-                jira_client=self.jira_client,
-                batch_size=self.batch_size,
-                comment_email_blacklist=self.comment_email_blacklist,
-                labels_to_skip=self.labels_to_skip,
+        document_batch = []
+        for doc in fetch_jira_issues_batch(
+            jira_client=self.jira_client,
+            jql=jql,
+            batch_size=_JIRA_FULL_PAGE_SIZE,
+            comment_email_blacklist=self.comment_email_blacklist,
+            labels_to_skip=self.labels_to_skip,
+        ):
+            document_batch.append(doc)
+            if len(document_batch) >= self.batch_size:
+                yield document_batch
+                document_batch = []
+
+        yield document_batch
+
+    def retrieve_all_slim_documents(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        jql = f"project = {self.quoted_jira_project}"
+
+        slim_doc_batch = []
+        for issue in _paginate_jql_search(
+            jira_client=self.jira_client,
+            jql=jql,
+            max_results=_JIRA_SLIM_PAGE_SIZE,
+            fields="key",
+        ):
+            issue_key = best_effort_get_field_from_issue(issue, "key")
+            id = build_jira_url(self.jira_client, issue_key)
+            slim_doc_batch.append(
+                SlimDocument(
+                    id=id,
+                    perm_sync_data=None,
+                )
            )
+            if len(slim_doc_batch) >= _JIRA_SLIM_PAGE_SIZE:
+                yield slim_doc_batch
+                slim_doc_batch = []

-            if doc_batch:
-                yield doc_batch
-
-            start_ind += fetched_batch_size
-            if fetched_batch_size < self.batch_size:
-                break
+        yield slim_doc_batch


 if __name__ == "__main__":
--- a/backend/danswer/connectors/danswer_jira/utils.py
+++ b/backend/danswer/connectors/danswer_jira/utils.py
@@ -1,17 +1,136 @@
 """Module with custom fields processing functions"""
+import os
 from typing import Any
 from typing import List
+from urllib.parse import urlparse

 from jira import JIRA
 from jira.resources import CustomFieldOption
 from jira.resources import Issue
 from jira.resources import User

+from danswer.connectors.models import BasicExpertInfo
 from danswer.utils.logger import setup_logger

 logger = setup_logger()


+PROJECT_URL_PAT = "projects"
+JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
+
+
+def best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None:
+    display_name = None
+    email = None
+    if hasattr(obj, "display_name"):
+        display_name = obj.display_name
+    else:
+        display_name = obj.get("displayName")
+
+    if hasattr(obj, "emailAddress"):
+        email = obj.emailAddress
+    else:
+        email = obj.get("emailAddress")
+
+    if not email and not display_name:
+        return None
+
+    return BasicExpertInfo(display_name=display_name, email=email)
+
+
+def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
+    if hasattr(jira_issue.fields, field):
+        return getattr(jira_issue.fields, field)
+
+    try:
+        return jira_issue.raw["fields"][field]
+    except Exception:
+        return None
+
+
+def extract_text_from_adf(adf: dict | None) -> str:
+    """Extracts plain text from Atlassian Document Format:
+    https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/
+
+    WARNING: This function is incomplete and will e.g. skip lists!
+    """
+    texts = []
+    if adf is not None and "content" in adf:
+        for block in adf["content"]:
+            if "content" in block:
+                for item in block["content"]:
+                    if item["type"] == "text":
+                        texts.append(item["text"])
+    return " ".join(texts)
+
+
+def build_jira_url(jira_client: JIRA, issue_key: str) -> str:
+    return f"{jira_client.client_info()}/browse/{issue_key}"
+
+
+def build_jira_client(credentials: dict[str, Any], jira_base: str) -> JIRA:
+    api_token = credentials["jira_api_token"]
+    # if user provide an email we assume it's cloud
+    if "jira_user_email" in credentials:
+        email = credentials["jira_user_email"]
+        return JIRA(
+            basic_auth=(email, api_token),
+            server=jira_base,
+            options={"rest_api_version": JIRA_API_VERSION},
+        )
+    else:
+        return JIRA(
+            token_auth=api_token,
+            server=jira_base,
+            options={"rest_api_version": JIRA_API_VERSION},
+        )
+
+
+def extract_jira_project(url: str) -> tuple[str, str]:
+    parsed_url = urlparse(url)
+    jira_base = parsed_url.scheme + "://" + parsed_url.netloc
+
+    # Split the path by '/' and find the position of 'projects' to get the project name
+    split_path = parsed_url.path.split("/")
+    if PROJECT_URL_PAT in split_path:
+        project_pos = split_path.index(PROJECT_URL_PAT)
+        if len(split_path) > project_pos + 1:
+            jira_project = split_path[project_pos + 1]
+        else:
+            raise ValueError("No project name found in the URL")
+    else:
+        raise ValueError("'projects' not found in the URL")
+
+    return jira_base, jira_project
+
+
+def get_comment_strs(
+    issue: Issue, comment_email_blacklist: tuple[str, ...] = ()
+) -> list[str]:
+    comment_strs = []
+    for comment in issue.fields.comment.comments:
+        try:
+            body_text = (
+                comment.body
+                if JIRA_API_VERSION == "2"
+                else extract_text_from_adf(comment.raw["body"])
+            )
+
+            if (
+                hasattr(comment, "author")
+                and hasattr(comment.author, "emailAddress")
+                and comment.author.emailAddress in comment_email_blacklist
+            ):
+                continue  # Skip adding comment if author's email is in blacklist
+
+            comment_strs.append(body_text)
+        except Exception as e:
+            logger.error(f"Failed to process comment due to an error: {e}")
+            continue
+
+    return comment_strs
+
+
 class CustomFieldExtractor:
    @staticmethod
    def _process_custom_field_value(value: Any) -> str:
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -16,6 +16,8 @@ from danswer.connectors.discourse.connector import DiscourseConnector
 from danswer.connectors.document360.connector import Document360Connector
 from danswer.connectors.dropbox.connector import DropboxConnector
 from danswer.connectors.file.connector import LocalFileConnector
+from danswer.connectors.fireflies.connector import FirefliesConnector
+from danswer.connectors.freshdesk.connector import FreshdeskConnector
 from danswer.connectors.github.connector import GithubConnector
 from danswer.connectors.gitlab.connector import GitlabConnector
 from danswer.connectors.gmail.connector import GmailConnector
@@ -99,6 +101,8 @@ def identify_connector_class(
        DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
        DocumentSource.OCI_STORAGE: BlobStorageConnector,
        DocumentSource.XENFORO: XenforoConnector,
+        DocumentSource.FRESHDESK: FreshdeskConnector,
+        DocumentSource.FIREFLIES: FirefliesConnector,
    }
    connector_by_source = connector_map.get(source, {})

--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -27,8 +27,8 @@ from danswer.file_processing.extract_file_text import read_pdf_file
 from danswer.file_processing.extract_file_text import read_text_file
 from danswer.file_store.file_store import get_default_file_store
 from danswer.utils.logger import setup_logger
-from shared_configs.configs import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()

@@ -123,9 +123,13 @@ def _process_file(
            "filename",
            "file_display_name",
            "title",
+            "connector_type",
        ]
    }

+    source_type_str = all_metadata.get("connector_type")
+    source_type = DocumentSource(source_type_str) if source_type_str else None
+
    p_owner_names = all_metadata.get("primary_owners")
    s_owner_names = all_metadata.get("secondary_owners")
    p_owners = (
@@ -145,7 +149,7 @@ def _process_file(
            sections=[
                Section(link=all_metadata.get("link"), text=file_content_raw.strip())
            ],
-            source=DocumentSource.FILE,
+            source=source_type or DocumentSource.FILE,
            semantic_identifier=file_display_name,
            title=title,
            doc_updated_at=final_time_updated,
--- a/backend/danswer/connectors/fireflies/init.py
+++ b/backend/danswer/connectors/fireflies/init.py
--- a/backend/danswer/connectors/fireflies/connector.py
+++ b/backend/danswer/connectors/fireflies/connector.py
@@ -0,0 +1,182 @@
+from collections.abc import Iterator
+from datetime import datetime
+from datetime import timezone
+from typing import List
+
+import requests
+
+from danswer.configs.app_configs import INDEX_BATCH_SIZE
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import LoadConnector
+from danswer.connectors.interfaces import PollConnector
+from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.connectors.models import BasicExpertInfo
+from danswer.connectors.models import ConnectorMissingCredentialError
+from danswer.connectors.models import Document
+from danswer.connectors.models import Section
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_FIREFLIES_ID_PREFIX = "FIREFLIES_"
+
+_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"
+
+_FIREFLIES_TRANSCRIPT_QUERY_SIZE = 50  # Max page size is 50
+
+_FIREFLIES_API_QUERY = """
+    query Transcripts($fromDate: DateTime, $toDate: DateTime, $limit: Int!, $skip: Int!) {
+        transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
+            id
+            title
+            host_email
+            participants
+            date
+            transcript_url
+            sentences {
+                text
+                speaker_name
+            }
+        }
+    }
+"""
+
+
+def _create_doc_from_transcript(transcript: dict) -> Document | None:
+    meeting_text = ""
+    sentences = transcript.get("sentences", [])
+    if sentences:
+        for sentence in sentences:
+            meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
+            meeting_text += ": " + sentence.get("text", "") + "\n\n"
+    else:
+        return None
+
+    meeting_link = transcript["transcript_url"]
+
+    fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
+
+    meeting_title = transcript["title"] or "No Title"
+
+    meeting_date_unix = transcript["date"]
+    meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
+
+    meeting_host_email = transcript["host_email"]
+    host_email_user_info = [BasicExpertInfo(email=meeting_host_email)]
+
+    meeting_participants_email_list = []
+    for participant in transcript.get("participants", []):
+        if participant != meeting_host_email and participant:
+            meeting_participants_email_list.append(BasicExpertInfo(email=participant))
+
+    return Document(
+        id=fireflies_id,
+        sections=[
+            Section(
+                link=meeting_link,
+                text=meeting_text,
+            )
+        ],
+        source=DocumentSource.FIREFLIES,
+        semantic_identifier=meeting_title,
+        metadata={},
+        doc_updated_at=meeting_date,
+        primary_owners=host_email_user_info,
+        secondary_owners=meeting_participants_email_list,
+    )
+
+
+class FirefliesConnector(PollConnector, LoadConnector):
+    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
+        self.batch_size = batch_size
+
+    def load_credentials(self, credentials: dict[str, str]) -> None:
+        api_key = credentials.get("fireflies_api_key")
+
+        if not isinstance(api_key, str):
+            raise ConnectorMissingCredentialError(
+                "The Fireflies API key must be a string"
+            )
+
+        self.api_key = api_key
+
+        return None
+
+    def _fetch_transcripts(
+        self, start_datetime: str | None = None, end_datetime: str | None = None
+    ) -> Iterator[List[dict]]:
+        if self.api_key is None:
+            raise ConnectorMissingCredentialError("Missing API key")
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer " + self.api_key,
+        }
+
+        skip = 0
+        variables: dict[str, int | str] = {
+            "limit": _FIREFLIES_TRANSCRIPT_QUERY_SIZE,
+        }
+
+        if start_datetime:
+            variables["fromDate"] = start_datetime
+        if end_datetime:
+            variables["toDate"] = end_datetime
+
+        while True:
+            variables["skip"] = skip
+            response = requests.post(
+                _FIREFLIES_API_URL,
+                headers=headers,
+                json={"query": _FIREFLIES_API_QUERY, "variables": variables},
+            )
+
+            response.raise_for_status()
+
+            if response.status_code == 204:
+                break
+
+            recieved_transcripts = response.json()
+            parsed_transcripts = recieved_transcripts.get("data", {}).get(
+                "transcripts", []
+            )
+
+            yield parsed_transcripts
+
+            if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_QUERY_SIZE:
+                break
+
+            skip += _FIREFLIES_TRANSCRIPT_QUERY_SIZE
+
+    def _process_transcripts(
+        self, start: str | None = None, end: str | None = None
+    ) -> GenerateDocumentsOutput:
+        doc_batch: List[Document] = []
+
+        for transcript_batch in self._fetch_transcripts(start, end):
+            for transcript in transcript_batch:
+                if doc := _create_doc_from_transcript(transcript):
+                    doc_batch.append(doc)
+
+                if len(doc_batch) >= self.batch_size:
+                    yield doc_batch
+                    doc_batch = []
+
+        if doc_batch:
+            yield doc_batch
+
+    def load_from_state(self) -> GenerateDocumentsOutput:
+        return self._process_transcripts()
+
+    def poll_source(
+        self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
+    ) -> GenerateDocumentsOutput:
+        start_datetime = datetime.fromtimestamp(
+            start_unixtime, tz=timezone.utc
+        ).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+        end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
+            "%Y-%m-%dT%H:%M:%S.000Z"
+        )
+
+        yield from self._process_transcripts(start_datetime, end_datetime)
--- a/backend/danswer/search/postprocessing/reranker.py
+++ b/backend/danswer/search/postprocessing/reranker.py
--- a/backend/danswer/connectors/freshdesk/connector.py
+++ b/backend/danswer/connectors/freshdesk/connector.py
@@ -0,0 +1,239 @@
+import json
+from collections.abc import Iterator
+from datetime import datetime
+from datetime import timezone
+from typing import List
+
+import requests
+
+from danswer.configs.app_configs import INDEX_BATCH_SIZE
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import LoadConnector
+from danswer.connectors.interfaces import PollConnector
+from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.connectors.models import ConnectorMissingCredentialError
+from danswer.connectors.models import Document
+from danswer.connectors.models import Section
+from danswer.file_processing.html_utils import parse_html_page_basic
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_FRESHDESK_ID_PREFIX = "FRESHDESK_"
+
+
+_TICKET_FIELDS_TO_INCLUDE = {
+    "fr_escalated",
+    "spam",
+    "priority",
+    "source",
+    "status",
+    "type",
+    "is_escalated",
+    "tags",
+    "nr_due_by",
+    "nr_escalated",
+    "cc_emails",
+    "fwd_emails",
+    "reply_cc_emails",
+    "ticket_cc_emails",
+    "support_email",
+    "to_emails",
+}
+
+_SOURCE_NUMBER_TYPE_MAP: dict[int, str] = {
+    1: "Email",
+    2: "Portal",
+    3: "Phone",
+    7: "Chat",
+    9: "Feedback Widget",
+    10: "Outbound Email",
+}
+
+_PRIORITY_NUMBER_TYPE_MAP: dict[int, str] = {
+    1: "low",
+    2: "medium",
+    3: "high",
+    4: "urgent",
+}
+
+_STATUS_NUMBER_TYPE_MAP: dict[int, str] = {
+    2: "open",
+    3: "pending",
+    4: "resolved",
+    5: "closed",
+}
+
+
+def _create_metadata_from_ticket(ticket: dict) -> dict:
+    metadata: dict[str, str | list[str]] = {}
+    # Combine all emails into a list so there are no repeated emails
+    email_data: set[str] = set()
+
+    for key, value in ticket.items():
+        # Skip fields that aren't useful for embedding
+        if key not in _TICKET_FIELDS_TO_INCLUDE:
+            continue
+
+        # Skip empty fields
+        if not value or value == "[]":
+            continue
+
+        # Convert strings or lists to strings
+        stringified_value: str | list[str]
+        if isinstance(value, list):
+            stringified_value = [str(item) for item in value]
+        else:
+            stringified_value = str(value)
+
+        if "email" in key:
+            if isinstance(stringified_value, list):
+                email_data.update(stringified_value)
+            else:
+                email_data.add(stringified_value)
+        else:
+            metadata[key] = stringified_value
+
+    if email_data:
+        metadata["emails"] = list(email_data)
+
+    # Convert source numbers to human-parsable string
+    if source_number := ticket.get("source"):
+        metadata["source"] = _SOURCE_NUMBER_TYPE_MAP.get(
+            source_number, "Unknown Source Type"
+        )
+
+    # Convert priority numbers to human-parsable string
+    if priority_number := ticket.get("priority"):
+        metadata["priority"] = _PRIORITY_NUMBER_TYPE_MAP.get(
+            priority_number, "Unknown Priority"
+        )
+
+    # Convert status to human-parsable string
+    if status_number := ticket.get("status"):
+        metadata["status"] = _STATUS_NUMBER_TYPE_MAP.get(
+            status_number, "Unknown Status"
+        )
+
+    due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00"))
+    metadata["overdue"] = str(datetime.now(timezone.utc) > due_by)
+
+    return metadata
+
+
+def _create_doc_from_ticket(ticket: dict, domain: str) -> Document:
+    # Use the ticket description as the text
+    text = f"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}"
+    metadata = _create_metadata_from_ticket(ticket)
+
+    # This is also used in the ID because it is more unique than the just the ticket ID
+    link = f"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}"
+
+    return Document(
+        id=_FRESHDESK_ID_PREFIX + link,
+        sections=[
+            Section(
+                link=link,
+                text=text,
+            )
+        ],
+        source=DocumentSource.FRESHDESK,
+        semantic_identifier=ticket["subject"],
+        metadata=metadata,
+        doc_updated_at=datetime.fromisoformat(
+            ticket["updated_at"].replace("Z", "+00:00")
+        ),
+    )
+
+
+class FreshdeskConnector(PollConnector, LoadConnector):
+    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
+        self.batch_size = batch_size
+
+    def load_credentials(self, credentials: dict[str, str | int]) -> None:
+        api_key = credentials.get("freshdesk_api_key")
+        domain = credentials.get("freshdesk_domain")
+        password = credentials.get("freshdesk_password")
+
+        if not all(isinstance(cred, str) for cred in [domain, api_key, password]):
+            raise ConnectorMissingCredentialError(
+                "All Freshdesk credentials must be strings"
+            )
+
+        self.api_key = str(api_key)
+        self.domain = str(domain)
+        self.password = str(password)
+
+    def _fetch_tickets(
+        self, start: datetime | None = None, end: datetime | None = None
+    ) -> Iterator[List[dict]]:
+        """
+        'end' is not currently used, so we may double fetch tickets created after the indexing
+        starts but before the actual call is made.
+
+        To use 'end' would require us to use the search endpoint but it has limitations,
+        namely having to fetch all IDs and then individually fetch each ticket because there is no
+        'include' field available for this endpoint:
+        https://developers.freshdesk.com/api/#filter_tickets
+        """
+        if self.api_key is None or self.domain is None or self.password is None:
+            raise ConnectorMissingCredentialError("freshdesk")
+
+        base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets"
+        params: dict[str, int | str] = {
+            "include": "description",
+            "per_page": 50,
+            "page": 1,
+        }
+
+        if start:
+            params["updated_since"] = start.isoformat()
+
+        while True:
+            response = requests.get(
+                base_url, auth=(self.api_key, self.password), params=params
+            )
+            response.raise_for_status()
+
+            if response.status_code == 204:
+                break
+
+            tickets = json.loads(response.content)
+            logger.info(
+                f"Fetched {len(tickets)} tickets from Freshdesk API (Page {params['page']})"
+            )
+
+            yield tickets
+
+            if len(tickets) < int(params["per_page"]):
+                break
+
+            params["page"] = int(params["page"]) + 1
+
+    def _process_tickets(
+        self, start: datetime | None = None, end: datetime | None = None
+    ) -> GenerateDocumentsOutput:
+        doc_batch: List[Document] = []
+
+        for ticket_batch in self._fetch_tickets(start, end):
+            for ticket in ticket_batch:
+                doc_batch.append(_create_doc_from_ticket(ticket, self.domain))
+
+                if len(doc_batch) >= self.batch_size:
+                    yield doc_batch
+                    doc_batch = []
+
+        if doc_batch:
+            yield doc_batch
+
+    def load_from_state(self) -> GenerateDocumentsOutput:
+        return self._process_tickets()
+
+    def poll_source(
+        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
+    ) -> GenerateDocumentsOutput:
+        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
+        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
+
+        yield from self._process_tickets(start_datetime, end_datetime)
--- a/backend/danswer/connectors/gmail/connector.py
+++ b/backend/danswer/connectors/gmail/connector.py
@@ -1,221 +1,361 @@
 from base64 import urlsafe_b64decode
 from typing import Any
-from typing import cast
 from typing import Dict

 from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
-from googleapiclient import discovery  # type: ignore

 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
-from danswer.connectors.gmail.connector_auth import (
-    get_gmail_creds_for_authorized_user,
-)
-from danswer.connectors.gmail.connector_auth import (
-    get_gmail_creds_for_service_account,
-)
-from danswer.connectors.gmail.constants import (
-    DB_CREDENTIALS_DICT_DELEGATED_USER_KEY,
-)
-from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY
-from danswer.connectors.gmail.constants import (
-    GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
+from danswer.connectors.google_utils.google_auth import get_google_creds
+from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval
+from danswer.connectors.google_utils.resources import get_admin_service
+from danswer.connectors.google_utils.resources import get_gmail_service
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
 )
+from danswer.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR
+from danswer.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS
+from danswer.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE
+from danswer.connectors.google_utils.shared_constants import USER_FIELDS
 from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import GenerateSlimDocumentOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.connectors.interfaces import SlimConnector
+from danswer.connectors.models import BasicExpertInfo
 from danswer.connectors.models import Document
 from danswer.connectors.models import Section
+from danswer.connectors.models import SlimDocument
 from danswer.utils.logger import setup_logger
+from danswer.utils.retry_wrapper import retry_builder
+

 logger = setup_logger()

+# This is for the initial list call to get the thread ids
+THREAD_LIST_FIELDS = "nextPageToken, threads(id)"

-class GmailConnector(LoadConnector, PollConnector):
+# These are the fields to retrieve using the ID from the initial list call
+PARTS_FIELDS = "parts(body(data), mimeType)"
+PAYLOAD_FIELDS = f"payload(headers, {PARTS_FIELDS})"
+MESSAGES_FIELDS = f"messages(id, {PAYLOAD_FIELDS})"
+THREADS_FIELDS = f"threads(id, {MESSAGES_FIELDS})"
+THREAD_FIELDS = f"id, {MESSAGES_FIELDS}"
+
+EMAIL_FIELDS = [
+    "cc",
+    "bcc",
+    "from",
+    "to",
+]
+
+add_retries = retry_builder(tries=50, max_delay=30)
+
+
+def _build_time_range_query(
+    time_range_start: SecondsSinceUnixEpoch | None = None,
+    time_range_end: SecondsSinceUnixEpoch | None = None,
+) -> str | None:
+    query = ""
+    if time_range_start is not None and time_range_start != 0:
+        query += f"after:{int(time_range_start)}"
+    if time_range_end is not None and time_range_end != 0:
+        query += f" before:{int(time_range_end)}"
+    query = query.strip()
+
+    if len(query) == 0:
+        return None
+
+    return query
+
+
+def _clean_email_and_extract_name(email: str) -> tuple[str, str | None]:
+    email = email.strip()
+    if "<" in email and ">" in email:
+        # Handle format: "Display Name <email@domain.com>"
+        display_name = email[: email.find("<")].strip()
+        email_address = email[email.find("<") + 1 : email.find(">")].strip()
+        return email_address, display_name if display_name else None
+    else:
+        # Handle plain email address
+        return email.strip(), None
+
+
+def _get_owners_from_emails(emails: dict[str, str | None]) -> list[BasicExpertInfo]:
+    owners = []
+    for email, names in emails.items():
+        if names:
+            name_parts = names.split(" ")
+            first_name = " ".join(name_parts[:-1])
+            last_name = name_parts[-1]
+        else:
+            first_name = None
+            last_name = None
+        owners.append(
+            BasicExpertInfo(email=email, first_name=first_name, last_name=last_name)
+        )
+    return owners
+
+
+def _get_message_body(payload: dict[str, Any]) -> str:
+    parts = payload.get("parts", [])
+    message_body = ""
+    for part in parts:
+        mime_type = part.get("mimeType")
+        body = part.get("body")
+        if mime_type == "text/plain" and body:
+            data = body.get("data", "")
+            text = urlsafe_b64decode(data).decode()
+            message_body += text
+    return message_body
+
+
+def message_to_section(message: Dict[str, Any]) -> tuple[Section, dict[str, str]]:
+    link = f"https://mail.google.com/mail/u/0/#inbox/{message['id']}"
+
+    payload = message.get("payload", {})
+    headers = payload.get("headers", [])
+    metadata: dict[str, Any] = {}
+    for header in headers:
+        name = header.get("name").lower()
+        value = header.get("value")
+        if name in EMAIL_FIELDS:
+            metadata[name] = value
+        if name == "subject":
+            metadata["subject"] = value
+        if name == "date":
+            metadata["updated_at"] = value
+
+    if labels := message.get("labelIds"):
+        metadata["labels"] = labels
+
+    message_data = ""
+    for name, value in metadata.items():
+        # updated at isnt super useful for the llm
+        if name != "updated_at":
+            message_data += f"{name}: {value}\n"
+
+    message_body_text: str = _get_message_body(payload)
+
+    return Section(link=link, text=message_body_text + message_data), metadata
+
+
+def thread_to_document(full_thread: Dict[str, Any]) -> Document | None:
+    all_messages = full_thread.get("messages", [])
+    if not all_messages:
+        return None
+
+    sections = []
+    semantic_identifier = ""
+    updated_at = None
+    from_emails: dict[str, str | None] = {}
+    other_emails: dict[str, str | None] = {}
+    for message in all_messages:
+        section, message_metadata = message_to_section(message)
+        sections.append(section)
+
+        for name, value in message_metadata.items():
+            if name in EMAIL_FIELDS:
+                email, display_name = _clean_email_and_extract_name(value)
+                if name == "from":
+                    from_emails[email] = (
+                        display_name if not from_emails.get(email) else None
+                    )
+                else:
+                    other_emails[email] = (
+                        display_name if not other_emails.get(email) else None
+                    )
+
+        # If we haven't set the semantic identifier yet, set it to the subject of the first message
+        if not semantic_identifier:
+            semantic_identifier = message_metadata.get("subject", "")
+
+        if message_metadata.get("updated_at"):
+            updated_at = message_metadata.get("updated_at")
+
+    updated_at_datetime = None
+    if updated_at:
+        updated_at_datetime = time_str_to_utc(updated_at)
+
+    id = full_thread.get("id")
+    if not id:
+        raise ValueError("Thread ID is required")
+
+    primary_owners = _get_owners_from_emails(from_emails)
+    secondary_owners = _get_owners_from_emails(other_emails)
+
+    return Document(
+        id=id,
+        semantic_identifier=semantic_identifier,
+        sections=sections,
+        source=DocumentSource.GMAIL,
+        # This is used to perform permission sync
+        primary_owners=primary_owners,
+        secondary_owners=secondary_owners,
+        doc_updated_at=updated_at_datetime,
+        # Not adding emails to metadata because it's already in the sections
+        metadata={},
+    )
+
+
+class GmailConnector(LoadConnector, PollConnector, SlimConnector):
    def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
        self.batch_size = batch_size
-        self.creds: OAuthCredentials | ServiceAccountCredentials | None = None
+
+        self._creds: OAuthCredentials | ServiceAccountCredentials | None = None
+        self._primary_admin_email: str | None = None
+
+    @property
+    def primary_admin_email(self) -> str:
+        if self._primary_admin_email is None:
+            raise RuntimeError(
+                "Primary admin email missing, "
+                "should not call this property "
+                "before calling load_credentials"
+            )
+        return self._primary_admin_email
+
+    @property
+    def google_domain(self) -> str:
+        if self._primary_admin_email is None:
+            raise RuntimeError(
+                "Primary admin email missing, "
+                "should not call this property "
+                "before calling load_credentials"
+            )
+        return self._primary_admin_email.split("@")[-1]
+
+    @property
+    def creds(self) -> OAuthCredentials | ServiceAccountCredentials:
+        if self._creds is None:
+            raise RuntimeError(
+                "Creds missing, "
+                "should not call this property "
+                "before calling load_credentials"
+            )
+        return self._creds

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
-        """Checks for two different types of credentials.
-        (1) A credential which holds a token acquired via a user going thorugh
-        the Google OAuth flow.
-        (2) A credential which holds a service account key JSON file, which
-        can then be used to impersonate any user in the workspace.
-        """
-        creds: OAuthCredentials | ServiceAccountCredentials | None = None
-        new_creds_dict = None
-        if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
-            access_token_json_str = cast(
-                str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]
-            )
-            creds = get_gmail_creds_for_authorized_user(
-                token_json_str=access_token_json_str
-            )
+        primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
+        self._primary_admin_email = primary_admin_email

-            # tell caller to update token stored in DB if it has changed
-            # (e.g. the token has been refreshed)
-            new_creds_json_str = creds.to_json() if creds else ""
-            if new_creds_json_str != access_token_json_str:
-                new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str}
-
-        if GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
-            service_account_key_json_str = credentials[
-                GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
-            ]
-            creds = get_gmail_creds_for_service_account(
-                service_account_key_json_str=service_account_key_json_str
-            )
-
-            # "Impersonate" a user if one is specified
-            delegated_user_email = cast(
-                str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY)
-            )
-            if delegated_user_email:
-                creds = creds.with_subject(delegated_user_email) if creds else None  # type: ignore
-
-        if creds is None:
-            raise PermissionError(
-                "Unable to access Gmail - unknown credential structure."
-            )
-
-        self.creds = creds
+        self._creds, new_creds_dict = get_google_creds(
+            credentials=credentials,
+            source=DocumentSource.GMAIL,
+        )
        return new_creds_dict

-    def _get_email_body(self, payload: dict[str, Any]) -> str:
-        parts = payload.get("parts", [])
-        email_body = ""
-        for part in parts:
-            mime_type = part.get("mimeType")
-            body = part.get("body")
-            if mime_type == "text/plain":
-                data = body.get("data", "")
-                text = urlsafe_b64decode(data).decode()
-                email_body += text
-        return email_body
+    def _get_all_user_emails(self) -> list[str]:
+        admin_service = get_admin_service(self.creds, self.primary_admin_email)
+        emails = []
+        for user in execute_paginated_retrieval(
+            retrieval_function=admin_service.users().list,
+            list_key="users",
+            fields=USER_FIELDS,
+            domain=self.google_domain,
+        ):
+            if email := user.get("primaryEmail"):
+                emails.append(email)
+        return emails

-    def _email_to_document(self, full_email: Dict[str, Any]) -> Document:
-        email_id = full_email["id"]
-        payload = full_email["payload"]
-        headers = payload.get("headers")
-        labels = full_email.get("labelIds", [])
-        metadata = {}
-        if headers:
-            for header in headers:
-                name = header.get("name").lower()
-                value = header.get("value")
-                if name in ["from", "to", "subject", "date", "cc", "bcc"]:
-                    metadata[name] = value
-        email_data = ""
-        for name, value in metadata.items():
-            email_data += f"{name}: {value}\n"
-        metadata["labels"] = labels
-        logger.debug(f"{email_data}")
-        email_body_text: str = self._get_email_body(payload)
-        date_str = metadata.get("date")
-        email_updated_at = time_str_to_utc(date_str) if date_str else None
-        link = f"https://mail.google.com/mail/u/0/#inbox/{email_id}"
-        return Document(
-            id=email_id,
-            sections=[Section(link=link, text=email_data + email_body_text)],
-            source=DocumentSource.GMAIL,
-            title=metadata.get("subject"),
-            semantic_identifier=metadata.get("subject", "Untitled Email"),
-            doc_updated_at=email_updated_at,
-            metadata=metadata,
-        )
-
-    @staticmethod
-    def _build_time_range_query(
-        time_range_start: SecondsSinceUnixEpoch | None = None,
-        time_range_end: SecondsSinceUnixEpoch | None = None,
-    ) -> str | None:
-        query = ""
-        if time_range_start is not None and time_range_start != 0:
-            query += f"after:{int(time_range_start)}"
-        if time_range_end is not None and time_range_end != 0:
-            query += f" before:{int(time_range_end)}"
-        query = query.strip()
-
-        if len(query) == 0:
-            return None
-
-        return query
-
-    def _fetch_mails_from_gmail(
+    def _fetch_threads(
        self,
        time_range_start: SecondsSinceUnixEpoch | None = None,
        time_range_end: SecondsSinceUnixEpoch | None = None,
    ) -> GenerateDocumentsOutput:
-        if self.creds is None:
-            raise PermissionError("Not logged into Gmail")
-        page_token = ""
-        query = GmailConnector._build_time_range_query(time_range_start, time_range_end)
-        service = discovery.build("gmail", "v1", credentials=self.creds)
-        while page_token is not None:
-            result = (
-                service.users()
-                .messages()
-                .list(
-                    userId="me",
-                    pageToken=page_token,
-                    q=query,
-                    maxResults=self.batch_size,
+        query = _build_time_range_query(time_range_start, time_range_end)
+        doc_batch = []
+        for user_email in self._get_all_user_emails():
+            gmail_service = get_gmail_service(self.creds, user_email)
+            for thread in execute_paginated_retrieval(
+                retrieval_function=gmail_service.users().threads().list,
+                list_key="threads",
+                userId=user_email,
+                fields=THREAD_LIST_FIELDS,
+                q=query,
+            ):
+                full_threads = execute_paginated_retrieval(
+                    retrieval_function=gmail_service.users().threads().get,
+                    list_key=None,
+                    userId=user_email,
+                    fields=THREAD_FIELDS,
+                    id=thread["id"],
                )
-                .execute()
-            )
-            page_token = result.get("nextPageToken")
-            messages = result.get("messages", [])
-            doc_batch = []
-            for message in messages:
-                message_id = message["id"]
-                msg = (
-                    service.users()
-                    .messages()
-                    .get(userId="me", id=message_id, format="full")
-                    .execute()
-                )
-                doc = self._email_to_document(msg)
+                # full_threads is an iterator containing a single thread
+                # so we need to convert it to a list and grab the first element
+                full_thread = list(full_threads)[0]
+                doc = thread_to_document(full_thread)
+                if doc is None:
+                    continue
                doc_batch.append(doc)
-            if len(doc_batch) > 0:
-                yield doc_batch
+                if len(doc_batch) > self.batch_size:
+                    yield doc_batch
+                    doc_batch = []
+        if doc_batch:
+            yield doc_batch
+
+    def _fetch_slim_threads(
+        self,
+        time_range_start: SecondsSinceUnixEpoch | None = None,
+        time_range_end: SecondsSinceUnixEpoch | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        query = _build_time_range_query(time_range_start, time_range_end)
+        doc_batch = []
+        for user_email in self._get_all_user_emails():
+            logger.info(f"Fetching slim threads for user: {user_email}")
+            gmail_service = get_gmail_service(self.creds, user_email)
+            for thread in execute_paginated_retrieval(
+                retrieval_function=gmail_service.users().threads().list,
+                list_key="threads",
+                userId=user_email,
+                fields=THREAD_LIST_FIELDS,
+                q=query,
+            ):
+                doc_batch.append(
+                    SlimDocument(
+                        id=thread["id"],
+                        perm_sync_data={"user_email": user_email},
+                    )
+                )
+                if len(doc_batch) > SLIM_BATCH_SIZE:
+                    yield doc_batch
+                    doc_batch = []
+        if doc_batch:
+            yield doc_batch

    def load_from_state(self) -> GenerateDocumentsOutput:
-        yield from self._fetch_mails_from_gmail()
+        try:
+            yield from self._fetch_threads()
+        except Exception as e:
+            if MISSING_SCOPES_ERROR_STR in str(e):
+                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
+            raise e

    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
-        yield from self._fetch_mails_from_gmail(start, end)
+        try:
+            yield from self._fetch_threads(start, end)
+        except Exception as e:
+            if MISSING_SCOPES_ERROR_STR in str(e):
+                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
+            raise e
+
+    def retrieve_all_slim_documents(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        try:
+            yield from self._fetch_slim_threads(start, end)
+        except Exception as e:
+            if MISSING_SCOPES_ERROR_STR in str(e):
+                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
+            raise e


 if __name__ == "__main__":
-    import json
-    import os
-
-    service_account_json_path = os.environ.get("GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH")
-    if not service_account_json_path:
-        raise ValueError(
-            "Please set GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH environment variable"
-        )
-    with open(service_account_json_path) as f:
-        creds = json.load(f)
-
-    credentials_dict = {
-        DB_CREDENTIALS_DICT_TOKEN_KEY: json.dumps(creds),
-    }
-    delegated_user = os.environ.get("GMAIL_DELEGATED_USER")
-    if delegated_user:
-        credentials_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user
-
-    connector = GmailConnector()
-    connector.load_credentials(
-        json.loads(credentials_dict[DB_CREDENTIALS_DICT_TOKEN_KEY])
-    )
-    document_batch_generator = connector.load_from_state()
-    for document_batch in document_batch_generator:
-        print(document_batch)
-        break
+    pass
--- a/backend/danswer/connectors/gmail/connector_auth.py
+++ b/backend/danswer/connectors/gmail/connector_auth.py
@@ -1,197 +0,0 @@
-import json
-from typing import cast
-from urllib.parse import parse_qs
-from urllib.parse import ParseResult
-from urllib.parse import urlparse
-
-from google.auth.transport.requests import Request  # type: ignore
-from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
-from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
-from google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore
-from sqlalchemy.orm import Session
-
-from danswer.configs.app_configs import WEB_DOMAIN
-from danswer.configs.constants import DocumentSource
-from danswer.configs.constants import KV_CRED_KEY
-from danswer.configs.constants import KV_GMAIL_CRED_KEY
-from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY
-from danswer.connectors.gmail.constants import (
-    DB_CREDENTIALS_DICT_DELEGATED_USER_KEY,
-)
-from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY
-from danswer.connectors.gmail.constants import (
-    GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
-)
-from danswer.connectors.gmail.constants import SCOPES
-from danswer.db.credentials import update_credential_json
-from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
-from danswer.server.documents.models import CredentialBase
-from danswer.server.documents.models import GoogleAppCredentials
-from danswer.server.documents.models import GoogleServiceAccountKey
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def _build_frontend_gmail_redirect() -> str:
-    return f"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback"
-
-
-def get_gmail_creds_for_authorized_user(
-    token_json_str: str,
-) -> OAuthCredentials | None:
-    creds_json = json.loads(token_json_str)
-    creds = OAuthCredentials.from_authorized_user_info(creds_json, SCOPES)
-    if creds.valid:
-        return creds
-
-    if creds.expired and creds.refresh_token:
-        try:
-            creds.refresh(Request())
-            if creds.valid:
-                logger.notice("Refreshed Gmail tokens.")
-                return creds
-        except Exception as e:
-            logger.exception(f"Failed to refresh gmail access token due to: {e}")
-            return None
-
-    return None
-
-
-def get_gmail_creds_for_service_account(
-    service_account_key_json_str: str,
-) -> ServiceAccountCredentials | None:
-    service_account_key = json.loads(service_account_key_json_str)
-    creds = ServiceAccountCredentials.from_service_account_info(
-        service_account_key, scopes=SCOPES
-    )
-    if not creds.valid or not creds.expired:
-        creds.refresh(Request())
-    return creds if creds.valid else None
-
-
-def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
-    if csrf != state:
-        raise PermissionError(
-            "State from Gmail Connector callback does not match expected"
-        )
-
-
-def get_gmail_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
-    credential_json = json.loads(creds_str)
-    flow = InstalledAppFlow.from_client_config(
-        credential_json,
-        scopes=SCOPES,
-        redirect_uri=_build_frontend_gmail_redirect(),
-    )
-    auth_url, _ = flow.authorization_url(prompt="consent")
-
-    parsed_url = cast(ParseResult, urlparse(auth_url))
-    params = parse_qs(parsed_url.query)
-
-    get_kv_store().store(
-        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
-    )  # type: ignore
-    return str(auth_url)
-
-
-def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
-    credential_json = json.loads(creds_str)
-    flow = InstalledAppFlow.from_client_config(
-        credential_json,
-        scopes=SCOPES,
-        redirect_uri=_build_frontend_gmail_redirect(),
-    )
-    auth_url, _ = flow.authorization_url(prompt="consent")
-
-    parsed_url = cast(ParseResult, urlparse(auth_url))
-    params = parse_qs(parsed_url.query)
-
-    get_kv_store().store(
-        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
-    )  # type: ignore
-    return str(auth_url)
-
-
-def update_gmail_credential_access_tokens(
-    auth_code: str,
-    credential_id: int,
-    user: User,
-    db_session: Session,
-) -> OAuthCredentials | None:
-    app_credentials = get_google_app_gmail_cred()
-    flow = InstalledAppFlow.from_client_config(
-        app_credentials.model_dump(),
-        scopes=SCOPES,
-        redirect_uri=_build_frontend_gmail_redirect(),
-    )
-    flow.fetch_token(code=auth_code)
-    creds = flow.credentials
-    token_json_str = creds.to_json()
-    new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str}
-
-    if not update_credential_json(credential_id, new_creds_dict, user, db_session):
-        return None
-    return creds
-
-
-def build_service_account_creds(
-    delegated_user_email: str | None = None,
-) -> CredentialBase:
-    service_account_key = get_gmail_service_account_key()
-
-    credential_dict = {
-        GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(),
-    }
-    if delegated_user_email:
-        credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email
-
-    return CredentialBase(
-        source=DocumentSource.GMAIL,
-        credential_json=credential_dict,
-        admin_public=True,
-    )
-
-
-def get_google_app_gmail_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
-    return GoogleAppCredentials(**json.loads(creds_str))
-
-
-def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)
-
-
-def delete_google_app_gmail_cred() -> None:
-    get_kv_store().delete(KV_GMAIL_CRED_KEY)
-
-
-def get_gmail_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
-    return GoogleServiceAccountKey(**json.loads(creds_str))
-
-
-def upsert_gmail_service_account_key(
-    service_account_key: GoogleServiceAccountKey,
-) -> None:
-    get_kv_store().store(
-        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
-    )
-
-
-def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
-        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
-    )
-
-
-def delete_gmail_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
-
-
-def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/gmail/constants.py
+++ b/backend/danswer/connectors/gmail/constants.py
@@ -1,4 +0,0 @@
-DB_CREDENTIALS_DICT_TOKEN_KEY = "gmail_tokens"
-GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "gmail_service_account_key"
-DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "gmail_delegated_user"
-SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
--- a/backend/danswer/connectors/google_drive/connector.py
+++ b/backend/danswer/connectors/google_drive/connector.py
--- a/backend/danswer/connectors/google_drive/connector_auth.py
+++ b/backend/danswer/connectors/google_drive/connector_auth.py
@@ -1,229 +0,0 @@
-import json
-from typing import cast
-from urllib.parse import parse_qs
-from urllib.parse import ParseResult
-from urllib.parse import urlparse
-
-from google.auth.transport.requests import Request  # type: ignore
-from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
-from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
-from google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore
-from sqlalchemy.orm import Session
-
-from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED
-from danswer.configs.app_configs import WEB_DOMAIN
-from danswer.configs.constants import DocumentSource
-from danswer.configs.constants import KV_CRED_KEY
-from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY
-from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY
-from danswer.connectors.google_drive.constants import BASE_SCOPES
-from danswer.connectors.google_drive.constants import (
-    DB_CREDENTIALS_DICT_DELEGATED_USER_KEY,
-)
-from danswer.connectors.google_drive.constants import (
-    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
-)
-from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY
-from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES
-from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES
-from danswer.db.credentials import update_credential_json
-from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
-from danswer.server.documents.models import CredentialBase
-from danswer.server.documents.models import GoogleAppCredentials
-from danswer.server.documents.models import GoogleServiceAccountKey
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def build_gdrive_scopes() -> list[str]:
-    base_scopes: list[str] = BASE_SCOPES
-    permissions_scopes: list[str] = FETCH_PERMISSIONS_SCOPES
-    groups_scopes: list[str] = FETCH_GROUPS_SCOPES
-
-    if ENTERPRISE_EDITION_ENABLED:
-        return base_scopes + permissions_scopes + groups_scopes
-    return base_scopes + permissions_scopes
-
-
-def _build_frontend_google_drive_redirect() -> str:
-    return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"
-
-
-def get_google_drive_creds_for_authorized_user(
-    token_json_str: str, scopes: list[str] = build_gdrive_scopes()
-) -> OAuthCredentials | None:
-    creds_json = json.loads(token_json_str)
-    creds = OAuthCredentials.from_authorized_user_info(creds_json, scopes)
-    if creds.valid:
-        return creds
-
-    if creds.expired and creds.refresh_token:
-        try:
-            creds.refresh(Request())
-            if creds.valid:
-                logger.notice("Refreshed Google Drive tokens.")
-                return creds
-        except Exception as e:
-            logger.exception(f"Failed to refresh google drive access token due to: {e}")
-            return None
-
-    return None
-
-
-def _get_google_drive_creds_for_service_account(
-    service_account_key_json_str: str, scopes: list[str] = build_gdrive_scopes()
-) -> ServiceAccountCredentials | None:
-    service_account_key = json.loads(service_account_key_json_str)
-    creds = ServiceAccountCredentials.from_service_account_info(
-        service_account_key, scopes=scopes
-    )
-    if not creds.valid or not creds.expired:
-        creds.refresh(Request())
-    return creds if creds.valid else None
-
-
-def get_google_drive_creds(
-    credentials: dict[str, str], scopes: list[str] = build_gdrive_scopes()
-) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:
-    oauth_creds = None
-    service_creds = None
-    new_creds_dict = None
-    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
-        access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY])
-        oauth_creds = get_google_drive_creds_for_authorized_user(
-            token_json_str=access_token_json_str, scopes=scopes
-        )
-
-        # tell caller to update token stored in DB if it has changed
-        # (e.g. the token has been refreshed)
-        new_creds_json_str = oauth_creds.to_json() if oauth_creds else ""
-        if new_creds_json_str != access_token_json_str:
-            new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str}
-
-    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
-        service_account_key_json_str = credentials[
-            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
-        ]
-        service_creds = _get_google_drive_creds_for_service_account(
-            service_account_key_json_str=service_account_key_json_str,
-            scopes=scopes,
-        )
-
-        # "Impersonate" a user if one is specified
-        delegated_user_email = cast(
-            str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY)
-        )
-        if delegated_user_email:
-            service_creds = (
-                service_creds.with_subject(delegated_user_email)
-                if service_creds
-                else None
-            )
-
-    creds: ServiceAccountCredentials | OAuthCredentials | None = (
-        oauth_creds or service_creds
-    )
-    if creds is None:
-        raise PermissionError(
-            "Unable to access Google Drive - unknown credential structure."
-        )
-
-    return creds, new_creds_dict
-
-
-def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
-    if csrf != state:
-        raise PermissionError(
-            "State from Google Drive Connector callback does not match expected"
-        )
-
-
-def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
-    credential_json = json.loads(creds_str)
-    flow = InstalledAppFlow.from_client_config(
-        credential_json,
-        scopes=build_gdrive_scopes(),
-        redirect_uri=_build_frontend_google_drive_redirect(),
-    )
-    auth_url, _ = flow.authorization_url(prompt="consent")
-
-    parsed_url = cast(ParseResult, urlparse(auth_url))
-    params = parse_qs(parsed_url.query)
-
-    get_kv_store().store(
-        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
-    )  # type: ignore
-    return str(auth_url)
-
-
-def update_credential_access_tokens(
-    auth_code: str,
-    credential_id: int,
-    user: User,
-    db_session: Session,
-) -> OAuthCredentials | None:
-    app_credentials = get_google_app_cred()
-    flow = InstalledAppFlow.from_client_config(
-        app_credentials.model_dump(),
-        scopes=build_gdrive_scopes(),
-        redirect_uri=_build_frontend_google_drive_redirect(),
-    )
-    flow.fetch_token(code=auth_code)
-    creds = flow.credentials
-    token_json_str = creds.to_json()
-    new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str}
-
-    if not update_credential_json(credential_id, new_creds_dict, user, db_session):
-        return None
-    return creds
-
-
-def build_service_account_creds(
-    source: DocumentSource,
-    delegated_user_email: str | None = None,
-) -> CredentialBase:
-    service_account_key = get_service_account_key()
-
-    credential_dict = {
-        DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(),
-    }
-    if delegated_user_email:
-        credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email
-
-    return CredentialBase(
-        credential_json=credential_dict,
-        admin_public=True,
-        source=DocumentSource.GOOGLE_DRIVE,
-    )
-
-
-def get_google_app_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
-    return GoogleAppCredentials(**json.loads(creds_str))
-
-
-def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True)
-
-
-def delete_google_app_cred() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)
-
-
-def get_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))
-    return GoogleServiceAccountKey(**json.loads(creds_str))
-
-
-def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
-        KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
-    )
-
-
-def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/google_drive/constants.py
+++ b/backend/danswer/connectors/google_drive/constants.py
@@ -1,7 +1,4 @@
-DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens"
-DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key"
-DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user"
-
-BASE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
-FETCH_PERMISSIONS_SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly"]
-FETCH_GROUPS_SCOPES = ["https://www.googleapis.com/auth/cloud-identity.groups.readonly"]
+UNSUPPORTED_FILE_TYPE_CONTENT = ""  # keep empty for now
+DRIVE_FOLDER_TYPE = "application/vnd.google-apps.folder"
+DRIVE_SHORTCUT_TYPE = "application/vnd.google-apps.shortcut"
+DRIVE_FILE_TYPE = "application/vnd.google-apps.file"
--- a/backend/danswer/connectors/google_drive/doc_conversion.py
+++ b/backend/danswer/connectors/google_drive/doc_conversion.py
@@ -0,0 +1,260 @@
+import io
+from datetime import datetime
+from datetime import timezone
+
+from googleapiclient.discovery import build  # type: ignore
+from googleapiclient.errors import HttpError  # type: ignore
+
+from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
+from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import IGNORE_FOR_QA
+from danswer.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
+from danswer.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
+from danswer.connectors.google_drive.constants import UNSUPPORTED_FILE_TYPE_CONTENT
+from danswer.connectors.google_drive.models import GDriveMimeType
+from danswer.connectors.google_drive.models import GoogleDriveFileType
+from danswer.connectors.google_drive.section_extraction import get_document_sections
+from danswer.connectors.google_utils.resources import GoogleDocsService
+from danswer.connectors.google_utils.resources import GoogleDriveService
+from danswer.connectors.models import Document
+from danswer.connectors.models import Section
+from danswer.connectors.models import SlimDocument
+from danswer.file_processing.extract_file_text import docx_to_text
+from danswer.file_processing.extract_file_text import pptx_to_text
+from danswer.file_processing.extract_file_text import read_pdf_file
+from danswer.file_processing.unstructured import get_unstructured_api_key
+from danswer.file_processing.unstructured import unstructured_to_text
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# these errors don't represent a failure in the connector, but simply files
+# that can't / shouldn't be indexed
+ERRORS_TO_CONTINUE_ON = [
+    "cannotExportFile",
+    "exportSizeLimitExceeded",
+    "cannotDownloadFile",
+]
+
+
+def _extract_sections_basic(
+    file: dict[str, str], service: GoogleDriveService
+) -> list[Section]:
+    mime_type = file["mimeType"]
+    link = file["webViewLink"]
+
+    if mime_type not in set(item.value for item in GDriveMimeType):
+        # Unsupported file types can still have a title, finding this way is still useful
+        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
+
+    try:
+        if mime_type == GDriveMimeType.SPREADSHEET.value:
+            try:
+                sheets_service = build(
+                    "sheets", "v4", credentials=service._http.credentials
+                )
+                spreadsheet = (
+                    sheets_service.spreadsheets()
+                    .get(spreadsheetId=file["id"])
+                    .execute()
+                )
+
+                sections = []
+                for sheet in spreadsheet["sheets"]:
+                    sheet_name = sheet["properties"]["title"]
+                    sheet_id = sheet["properties"]["sheetId"]
+
+                    # Get sheet dimensions
+                    grid_properties = sheet["properties"].get("gridProperties", {})
+                    row_count = grid_properties.get("rowCount", 1000)
+                    column_count = grid_properties.get("columnCount", 26)
+
+                    # Convert column count to letter (e.g., 26 -> Z, 27 -> AA)
+                    end_column = ""
+                    while column_count:
+                        column_count, remainder = divmod(column_count - 1, 26)
+                        end_column = chr(65 + remainder) + end_column
+
+                    range_name = f"'{sheet_name}'!A1:{end_column}{row_count}"
+
+                    try:
+                        result = (
+                            sheets_service.spreadsheets()
+                            .values()
+                            .get(spreadsheetId=file["id"], range=range_name)
+                            .execute()
+                        )
+                        values = result.get("values", [])
+
+                        if values:
+                            text = f"Sheet: {sheet_name}\n"
+                            for row in values:
+                                text += "\t".join(str(cell) for cell in row) + "\n"
+                            sections.append(
+                                Section(
+                                    link=f"{link}#gid={sheet_id}",
+                                    text=text,
+                                )
+                            )
+                    except HttpError as e:
+                        logger.warning(
+                            f"Error fetching data for sheet '{sheet_name}': {e}"
+                        )
+                        continue
+                return sections
+
+            except Exception as e:
+                logger.warning(
+                    f"Ran into exception '{e}' when pulling data from Google Sheet '{file['name']}'."
+                    " Falling back to basic extraction."
+                )
+
+        if mime_type in [
+            GDriveMimeType.DOC.value,
+            GDriveMimeType.PPT.value,
+            GDriveMimeType.SPREADSHEET.value,
+        ]:
+            export_mime_type = (
+                "text/plain"
+                if mime_type != GDriveMimeType.SPREADSHEET.value
+                else "text/csv"
+            )
+            text = (
+                service.files()
+                .export(fileId=file["id"], mimeType=export_mime_type)
+                .execute()
+                .decode("utf-8")
+            )
+            return [Section(link=link, text=text)]
+
+        elif mime_type in [
+            GDriveMimeType.PLAIN_TEXT.value,
+            GDriveMimeType.MARKDOWN.value,
+        ]:
+            return [
+                Section(
+                    link=link,
+                    text=service.files()
+                    .get_media(fileId=file["id"])
+                    .execute()
+                    .decode("utf-8"),
+                )
+            ]
+        if mime_type in [
+            GDriveMimeType.WORD_DOC.value,
+            GDriveMimeType.POWERPOINT.value,
+            GDriveMimeType.PDF.value,
+        ]:
+            response = service.files().get_media(fileId=file["id"]).execute()
+            if get_unstructured_api_key():
+                return [
+                    Section(
+                        link=link,
+                        text=unstructured_to_text(
+                            file=io.BytesIO(response),
+                            file_name=file.get("name", file["id"]),
+                        ),
+                    )
+                ]
+
+            if mime_type == GDriveMimeType.WORD_DOC.value:
+                return [
+                    Section(link=link, text=docx_to_text(file=io.BytesIO(response)))
+                ]
+            elif mime_type == GDriveMimeType.PDF.value:
+                text, _ = read_pdf_file(file=io.BytesIO(response))
+                return [Section(link=link, text=text)]
+            elif mime_type == GDriveMimeType.POWERPOINT.value:
+                return [
+                    Section(link=link, text=pptx_to_text(file=io.BytesIO(response)))
+                ]
+
+        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
+
+    except Exception:
+        return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
+
+
+def convert_drive_item_to_document(
+    file: GoogleDriveFileType,
+    drive_service: GoogleDriveService,
+    docs_service: GoogleDocsService,
+) -> Document | None:
+    try:
+        # Skip files that are shortcuts
+        if file.get("mimeType") == DRIVE_SHORTCUT_TYPE:
+            logger.info("Ignoring Drive Shortcut Filetype")
+            return None
+        # Skip files that are folders
+        if file.get("mimeType") == DRIVE_FOLDER_TYPE:
+            logger.info("Ignoring Drive Folder Filetype")
+            return None
+
+        sections: list[Section] = []
+
+        # Special handling for Google Docs to preserve structure, link
+        # to headers
+        if file.get("mimeType") == GDriveMimeType.DOC.value:
+            try:
+                sections = get_document_sections(docs_service, file["id"])
+            except Exception as e:
+                logger.warning(
+                    f"Ran into exception '{e}' when pulling sections from Google Doc '{file['name']}'."
+                    " Falling back to basic extraction."
+                )
+        # NOTE: this will run for either (1) the above failed or (2) the file is not a Google Doc
+        if not sections:
+            try:
+                # For all other file types just extract the text
+                sections = _extract_sections_basic(file, drive_service)
+
+            except HttpError as e:
+                reason = e.error_details[0]["reason"] if e.error_details else e.reason
+                message = e.error_details[0]["message"] if e.error_details else e.reason
+                if e.status_code == 403 and reason in ERRORS_TO_CONTINUE_ON:
+                    logger.warning(
+                        f"Could not export file '{file['name']}' due to '{message}', skipping..."
+                    )
+                    return None
+
+                raise
+        if not sections:
+            return None
+
+        return Document(
+            id=file["webViewLink"],
+            sections=sections,
+            source=DocumentSource.GOOGLE_DRIVE,
+            semantic_identifier=file["name"],
+            doc_updated_at=datetime.fromisoformat(file["modifiedTime"]).astimezone(
+                timezone.utc
+            ),
+            metadata={}
+            if any(section.text for section in sections)
+            else {IGNORE_FOR_QA: "True"},
+            additional_info=file.get("id"),
+        )
+    except Exception as e:
+        if not CONTINUE_ON_CONNECTOR_FAILURE:
+            raise e
+
+        logger.exception("Ran into exception when pulling a file from Google Drive")
+    return None
+
+
+def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:
+    # Skip files that are folders or shortcuts
+    if file.get("mimeType") in [DRIVE_FOLDER_TYPE, DRIVE_SHORTCUT_TYPE]:
+        return None
+
+    return SlimDocument(
+        id=file["webViewLink"],
+        perm_sync_data={
+            "doc_id": file.get("id"),
+            "permissions": file.get("permissions", []),
+            "permission_ids": file.get("permissionIds", []),
+            "name": file.get("name"),
+            "owner_email": file.get("owners", [{}])[0].get("emailAddress"),
+        },
+    )
--- a/backend/danswer/connectors/google_drive/file_retrieval.py
+++ b/backend/danswer/connectors/google_drive/file_retrieval.py
@@ -0,0 +1,258 @@
+from collections.abc import Callable
+from collections.abc import Iterator
+from datetime import datetime
+from typing import Any
+
+from googleapiclient.discovery import Resource  # type: ignore
+
+from danswer.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
+from danswer.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
+from danswer.connectors.google_drive.models import GoogleDriveFileType
+from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval
+from danswer.connectors.interfaces import SecondsSinceUnixEpoch
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+FILE_FIELDS = (
+    "nextPageToken, files(mimeType, id, name, permissions, modifiedTime, webViewLink, "
+    "shortcutDetails, owners(emailAddress))"
+)
+SLIM_FILE_FIELDS = (
+    "nextPageToken, files(mimeType, id, name, permissions(emailAddress, type), "
+    "permissionIds, webViewLink, owners(emailAddress))"
+)
+FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
+
+
+def _generate_time_range_filter(
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+) -> str:
+    time_range_filter = ""
+    if start is not None:
+        time_start = datetime.utcfromtimestamp(start).isoformat() + "Z"
+        time_range_filter += f" and modifiedTime >= '{time_start}'"
+    if end is not None:
+        time_stop = datetime.utcfromtimestamp(end).isoformat() + "Z"
+        time_range_filter += f" and modifiedTime <= '{time_stop}'"
+    return time_range_filter
+
+
+def _get_folders_in_parent(
+    service: Resource,
+    parent_id: str | None = None,
+) -> Iterator[GoogleDriveFileType]:
+    # Follow shortcuts to folders
+    query = f"(mimeType = '{DRIVE_FOLDER_TYPE}' or mimeType = '{DRIVE_SHORTCUT_TYPE}')"
+    query += " and trashed = false"
+
+    if parent_id:
+        query += f" and '{parent_id}' in parents"
+
+    for file in execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        continue_on_404_or_403=True,
+        corpora="allDrives",
+        supportsAllDrives=True,
+        includeItemsFromAllDrives=True,
+        fields=FOLDER_FIELDS,
+        q=query,
+    ):
+        yield file
+
+
+def _get_files_in_parent(
+    service: Resource,
+    parent_id: str,
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+    is_slim: bool = False,
+) -> Iterator[GoogleDriveFileType]:
+    query = f"mimeType != '{DRIVE_FOLDER_TYPE}' and '{parent_id}' in parents"
+    query += " and trashed = false"
+    query += _generate_time_range_filter(start, end)
+
+    for file in execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        continue_on_404_or_403=True,
+        corpora="allDrives",
+        supportsAllDrives=True,
+        includeItemsFromAllDrives=True,
+        fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
+        q=query,
+    ):
+        yield file
+
+
+def crawl_folders_for_files(
+    service: Resource,
+    parent_id: str,
+    traversed_parent_ids: set[str],
+    update_traversed_ids_func: Callable[[str], None],
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+) -> Iterator[GoogleDriveFileType]:
+    """
+    This function starts crawling from any folder. It is slower though.
+    """
+    if parent_id in traversed_parent_ids:
+        logger.info(f"Skipping subfolder since already traversed: {parent_id}")
+        return
+
+    found_files = False
+    for file in _get_files_in_parent(
+        service=service,
+        start=start,
+        end=end,
+        parent_id=parent_id,
+    ):
+        found_files = True
+        yield file
+
+    if found_files:
+        update_traversed_ids_func(parent_id)
+
+    for subfolder in _get_folders_in_parent(
+        service=service,
+        parent_id=parent_id,
+    ):
+        logger.info("Fetching all files in subfolder: " + subfolder["name"])
+        yield from crawl_folders_for_files(
+            service=service,
+            parent_id=subfolder["id"],
+            traversed_parent_ids=traversed_parent_ids,
+            update_traversed_ids_func=update_traversed_ids_func,
+            start=start,
+            end=end,
+        )
+
+
+def get_files_in_shared_drive(
+    service: Resource,
+    drive_id: str,
+    is_slim: bool = False,
+    update_traversed_ids_func: Callable[[str], None] = lambda _: None,
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+) -> Iterator[GoogleDriveFileType]:
+    # If we know we are going to folder crawl later, we can cache the folders here
+    # Get all folders being queried and add them to the traversed set
+    folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
+    folder_query += " and trashed = false"
+    found_folders = False
+    for file in execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        continue_on_404_or_403=True,
+        corpora="drive",
+        driveId=drive_id,
+        supportsAllDrives=True,
+        includeItemsFromAllDrives=True,
+        fields="nextPageToken, files(id)",
+        q=folder_query,
+    ):
+        update_traversed_ids_func(file["id"])
+        found_folders = True
+    if found_folders:
+        update_traversed_ids_func(drive_id)
+
+    # Get all files in the shared drive
+    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
+    file_query += " and trashed = false"
+    file_query += _generate_time_range_filter(start, end)
+    yield from execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        continue_on_404_or_403=True,
+        corpora="drive",
+        driveId=drive_id,
+        supportsAllDrives=True,
+        includeItemsFromAllDrives=True,
+        fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
+        q=file_query,
+    )
+
+
+def get_all_files_in_my_drive(
+    service: Any,
+    update_traversed_ids_func: Callable,
+    is_slim: bool = False,
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+) -> Iterator[GoogleDriveFileType]:
+    # If we know we are going to folder crawl later, we can cache the folders here
+    # Get all folders being queried and add them to the traversed set
+    folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
+    folder_query += " and trashed = false"
+    folder_query += " and 'me' in owners"
+    found_folders = False
+    for file in execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        corpora="user",
+        fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
+        q=folder_query,
+    ):
+        update_traversed_ids_func(file["id"])
+        found_folders = True
+    if found_folders:
+        update_traversed_ids_func(get_root_folder_id(service))
+
+    # Then get the files
+    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
+    file_query += " and trashed = false"
+    file_query += " and 'me' in owners"
+    file_query += _generate_time_range_filter(start, end)
+    yield from execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        corpora="user",
+        fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
+        q=file_query,
+    )
+
+
+def get_all_files_for_oauth(
+    service: Any,
+    include_files_shared_with_me: bool,
+    include_my_drives: bool,
+    # One of the above 2 should be true
+    include_shared_drives: bool,
+    is_slim: bool = False,
+    start: SecondsSinceUnixEpoch | None = None,
+    end: SecondsSinceUnixEpoch | None = None,
+) -> Iterator[GoogleDriveFileType]:
+    should_get_all = (
+        include_shared_drives and include_my_drives and include_files_shared_with_me
+    )
+    corpora = "allDrives" if should_get_all else "user"
+
+    file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
+    file_query += " and trashed = false"
+    file_query += _generate_time_range_filter(start, end)
+
+    if not should_get_all:
+        if include_files_shared_with_me and not include_my_drives:
+            file_query += " and not 'me' in owners"
+        if not include_files_shared_with_me and include_my_drives:
+            file_query += " and 'me' in owners"
+
+    yield from execute_paginated_retrieval(
+        retrieval_function=service.files().list,
+        list_key="files",
+        corpora=corpora,
+        includeItemsFromAllDrives=should_get_all,
+        supportsAllDrives=should_get_all,
+        fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
+        q=file_query,
+    )
+
+
+# Just in case we need to get the root folder id
+def get_root_folder_id(service: Resource) -> str:
+    # we dont paginate here because there is only one root folder per user
+    # https://developers.google.com/drive/api/guides/v2-to-v3-reference
+    return service.files().get(fileId="root", fields="id").execute()["id"]
--- a/backend/danswer/connectors/google_drive/models.py
+++ b/backend/danswer/connectors/google_drive/models.py
@@ -0,0 +1,18 @@
+from enum import Enum
+from typing import Any
+
+
+class GDriveMimeType(str, Enum):
+    DOC = "application/vnd.google-apps.document"
+    SPREADSHEET = "application/vnd.google-apps.spreadsheet"
+    PDF = "application/pdf"
+    WORD_DOC = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    PPT = "application/vnd.google-apps.presentation"
+    POWERPOINT = (
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+    )
+    PLAIN_TEXT = "text/plain"
+    MARKDOWN = "text/markdown"
+
+
+GoogleDriveFileType = dict[str, Any]
--- a/backend/danswer/connectors/google_drive/section_extraction.py
+++ b/backend/danswer/connectors/google_drive/section_extraction.py
@@ -0,0 +1,105 @@
+from typing import Any
+
+from pydantic import BaseModel
+
+from danswer.connectors.google_utils.resources import GoogleDocsService
+from danswer.connectors.models import Section
+
+
+class CurrentHeading(BaseModel):
+    id: str
+    text: str
+
+
+def _build_gdoc_section_link(doc_id: str, heading_id: str) -> str:
+    """Builds a Google Doc link that jumps to a specific heading"""
+    # NOTE: doesn't support docs with multiple tabs atm, if we need that ask
+    # @Chris
+    return (
+        f"https://docs.google.com/document/d/{doc_id}/edit?tab=t.0#heading={heading_id}"
+    )
+
+
+def _extract_id_from_heading(paragraph: dict[str, Any]) -> str:
+    """Extracts the id from a heading paragraph element"""
+    return paragraph["paragraphStyle"]["headingId"]
+
+
+def _extract_text_from_paragraph(paragraph: dict[str, Any]) -> str:
+    """Extracts the text content from a paragraph element"""
+    text_elements = []
+    for element in paragraph.get("elements", []):
+        if "textRun" in element:
+            text_elements.append(element["textRun"].get("content", ""))
+    return "".join(text_elements)
+
+
+def get_document_sections(
+    docs_service: GoogleDocsService,
+    doc_id: str,
+) -> list[Section]:
+    """Extracts sections from a Google Doc, including their headings and content"""
+    # Fetch the document structure
+    doc = docs_service.documents().get(documentId=doc_id).execute()
+
+    # Get the content
+    content = doc.get("body", {}).get("content", [])
+
+    sections: list[Section] = []
+    current_section: list[str] = []
+    current_heading: CurrentHeading | None = None
+
+    for element in content:
+        if "paragraph" not in element:
+            continue
+
+        paragraph = element["paragraph"]
+
+        # Check if this is a heading
+        if (
+            "paragraphStyle" in paragraph
+            and "namedStyleType" in paragraph["paragraphStyle"]
+        ):
+            style = paragraph["paragraphStyle"]["namedStyleType"]
+            is_heading = style.startswith("HEADING_")
+            is_title = style.startswith("TITLE")
+
+            if is_heading or is_title:
+                # If we were building a previous section, add it to sections list
+                if current_heading is not None and current_section:
+                    heading_text = current_heading.text
+                    section_text = f"{heading_text}\n" + "\n".join(current_section)
+                    sections.append(
+                        Section(
+                            text=section_text.strip(),
+                            link=_build_gdoc_section_link(doc_id, current_heading.id),
+                        )
+                    )
+                    current_section = []
+
+                # Start new heading
+                heading_id = _extract_id_from_heading(paragraph)
+                heading_text = _extract_text_from_paragraph(paragraph)
+                current_heading = CurrentHeading(
+                    id=heading_id,
+                    text=heading_text,
+                )
+                continue
+
+        # Add content to current section
+        if current_heading is not None:
+            text = _extract_text_from_paragraph(paragraph)
+            if text.strip():
+                current_section.append(text)
+
+    # Don't forget to add the last section
+    if current_heading is not None and current_section:
+        section_text = f"{current_heading.text}\n" + "\n".join(current_section)
+        sections.append(
+            Section(
+                text=section_text.strip(),
+                link=_build_gdoc_section_link(doc_id, current_heading.id),
+            )
+        )
+
+    return sections
--- a/backend/danswer/connectors/google_utils/init.py
+++ b/backend/danswer/connectors/google_utils/init.py
--- a/backend/danswer/connectors/google_utils/google_auth.py
+++ b/backend/danswer/connectors/google_utils/google_auth.py
@@ -0,0 +1,107 @@
+import json
+from typing import cast
+
+from google.auth.transport.requests import Request  # type: ignore
+from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
+from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
+
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_DICT_TOKEN_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    GOOGLE_SCOPES,
+)
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def get_google_oauth_creds(
+    token_json_str: str, source: DocumentSource
+) -> OAuthCredentials | None:
+    creds_json = json.loads(token_json_str)
+    creds = OAuthCredentials.from_authorized_user_info(
+        info=creds_json,
+        scopes=GOOGLE_SCOPES[source],
+    )
+    if creds.valid:
+        return creds
+
+    if creds.expired and creds.refresh_token:
+        try:
+            creds.refresh(Request())
+            if creds.valid:
+                logger.notice("Refreshed Google Drive tokens.")
+                return creds
+        except Exception:
+            logger.exception("Failed to refresh google drive access token due to:")
+            return None
+
+    return None
+
+
+def get_google_creds(
+    credentials: dict[str, str],
+    source: DocumentSource,
+) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:
+    """Checks for two different types of credentials.
+    (1) A credential which holds a token acquired via a user going thorough
+    the Google OAuth flow.
+    (2) A credential which holds a service account key JSON file, which
+    can then be used to impersonate any user in the workspace.
+    """
+    oauth_creds = None
+    service_creds = None
+    new_creds_dict = None
+    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
+        # OAUTH
+        access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY])
+        oauth_creds = get_google_oauth_creds(
+            token_json_str=access_token_json_str, source=source
+        )
+
+        # tell caller to update token stored in DB if it has changed
+        # (e.g. the token has been refreshed)
+        new_creds_json_str = oauth_creds.to_json() if oauth_creds else ""
+        if new_creds_json_str != access_token_json_str:
+            new_creds_dict = {
+                DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str,
+                DB_CREDENTIALS_PRIMARY_ADMIN_KEY: credentials[
+                    DB_CREDENTIALS_PRIMARY_ADMIN_KEY
+                ],
+            }
+    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
+        # SERVICE ACCOUNT
+        service_account_key_json_str = credentials[
+            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
+        ]
+        service_account_key = json.loads(service_account_key_json_str)
+
+        service_creds = ServiceAccountCredentials.from_service_account_info(
+            service_account_key, scopes=GOOGLE_SCOPES[source]
+        )
+
+        if not service_creds.valid or not service_creds.expired:
+            service_creds.refresh(Request())
+
+        if not service_creds.valid:
+            raise PermissionError(
+                f"Unable to access {source} - service account credentials are invalid."
+            )
+
+    creds: ServiceAccountCredentials | OAuthCredentials | None = (
+        oauth_creds or service_creds
+    )
+    if creds is None:
+        raise PermissionError(
+            f"Unable to access {source} - unknown credential structure."
+        )
+
+    return creds, new_creds_dict
--- a/backend/danswer/connectors/google_utils/google_kv.py
+++ b/backend/danswer/connectors/google_utils/google_kv.py
@@ -0,0 +1,237 @@
+import json
+from typing import cast
+from urllib.parse import parse_qs
+from urllib.parse import ParseResult
+from urllib.parse import urlparse
+
+from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
+from google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore
+from sqlalchemy.orm import Session
+
+from danswer.configs.app_configs import WEB_DOMAIN
+from danswer.configs.constants import DocumentSource
+from danswer.configs.constants import KV_CRED_KEY
+from danswer.configs.constants import KV_GMAIL_CRED_KEY
+from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY
+from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY
+from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY
+from danswer.connectors.google_utils.resources import get_drive_service
+from danswer.connectors.google_utils.resources import get_gmail_service
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_DICT_TOKEN_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    GOOGLE_SCOPES,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    MISSING_SCOPES_ERROR_STR,
+)
+from danswer.connectors.google_utils.shared_constants import (
+    ONYX_SCOPE_INSTRUCTIONS,
+)
+from danswer.db.credentials import update_credential_json
+from danswer.db.models import User
+from danswer.key_value_store.factory import get_kv_store
+from danswer.server.documents.models import CredentialBase
+from danswer.server.documents.models import GoogleAppCredentials
+from danswer.server.documents.models import GoogleServiceAccountKey
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def _build_frontend_google_drive_redirect(source: DocumentSource) -> str:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"
+    elif source == DocumentSource.GMAIL:
+        return f"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback"
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+
+
+def _get_current_oauth_user(creds: OAuthCredentials, source: DocumentSource) -> str:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        drive_service = get_drive_service(creds)
+        user_info = (
+            drive_service.about()
+            .get(
+                fields="user(emailAddress)",
+            )
+            .execute()
+        )
+        email = user_info.get("user", {}).get("emailAddress")
+    elif source == DocumentSource.GMAIL:
+        gmail_service = get_gmail_service(creds)
+        user_info = (
+            gmail_service.users()
+            .getProfile(
+                userId="me",
+                fields="emailAddress",
+            )
+            .execute()
+        )
+        email = user_info.get("emailAddress")
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+    return email
+
+
+def verify_csrf(credential_id: int, state: str) -> None:
+    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
+    if csrf != state:
+        raise PermissionError(
+            "State from Google Drive Connector callback does not match expected"
+        )
+
+
+def update_credential_access_tokens(
+    auth_code: str,
+    credential_id: int,
+    user: User,
+    db_session: Session,
+    source: DocumentSource,
+) -> OAuthCredentials | None:
+    app_credentials = get_google_app_cred(source)
+    flow = InstalledAppFlow.from_client_config(
+        app_credentials.model_dump(),
+        scopes=GOOGLE_SCOPES[source],
+        redirect_uri=_build_frontend_google_drive_redirect(source),
+    )
+    flow.fetch_token(code=auth_code)
+    creds = flow.credentials
+    token_json_str = creds.to_json()
+
+    # Get user email from Google API so we know who
+    # the primary admin is for this connector
+    try:
+        email = _get_current_oauth_user(creds, source)
+    except Exception as e:
+        if MISSING_SCOPES_ERROR_STR in str(e):
+            raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
+        raise e
+
+    new_creds_dict = {
+        DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str,
+        DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email,
+    }
+
+    if not update_credential_json(credential_id, new_creds_dict, user, db_session):
+        return None
+    return creds
+
+
+def build_service_account_creds(
+    source: DocumentSource,
+    primary_admin_email: str | None = None,
+) -> CredentialBase:
+    service_account_key = get_service_account_key(source=source)
+
+    credential_dict = {
+        DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(),
+    }
+    if primary_admin_email:
+        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = primary_admin_email
+
+    return CredentialBase(
+        credential_json=credential_dict,
+        admin_public=True,
+        source=source,
+    )
+
+
+def get_auth_url(credential_id: int, source: DocumentSource) -> str:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    elif source == DocumentSource.GMAIL:
+        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+    credential_json = json.loads(creds_str)
+    flow = InstalledAppFlow.from_client_config(
+        credential_json,
+        scopes=GOOGLE_SCOPES[source],
+        redirect_uri=_build_frontend_google_drive_redirect(source),
+    )
+    auth_url, _ = flow.authorization_url(prompt="consent")
+
+    parsed_url = cast(ParseResult, urlparse(auth_url))
+    params = parse_qs(parsed_url.query)
+
+    get_kv_store().store(
+        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
+    )  # type: ignore
+    return str(auth_url)
+
+
+def get_google_app_cred(source: DocumentSource) -> GoogleAppCredentials:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    elif source == DocumentSource.GMAIL:
+        creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+    return GoogleAppCredentials(**json.loads(creds_str))
+
+
+def upsert_google_app_cred(
+    app_credentials: GoogleAppCredentials, source: DocumentSource
+) -> None:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        get_kv_store().store(
+            KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True
+        )
+    elif source == DocumentSource.GMAIL:
+        get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+
+
+def delete_google_app_cred(source: DocumentSource) -> None:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)
+    elif source == DocumentSource.GMAIL:
+        get_kv_store().delete(KV_GMAIL_CRED_KEY)
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+
+
+def get_service_account_key(source: DocumentSource) -> GoogleServiceAccountKey:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))
+    elif source == DocumentSource.GMAIL:
+        creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+    return GoogleServiceAccountKey(**json.loads(creds_str))
+
+
+def upsert_service_account_key(
+    service_account_key: GoogleServiceAccountKey, source: DocumentSource
+) -> None:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        get_kv_store().store(
+            KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY,
+            service_account_key.json(),
+            encrypt=True,
+        )
+    elif source == DocumentSource.GMAIL:
+        get_kv_store().store(
+            KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
+        )
+    else:
+        raise ValueError(f"Unsupported source: {source}")
+
+
+def delete_service_account_key(source: DocumentSource) -> None:
+    if source == DocumentSource.GOOGLE_DRIVE:
+        get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
+    elif source == DocumentSource.GMAIL:
+        get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
+    else:
+        raise ValueError(f"Unsupported source: {source}")
--- a/backend/danswer/connectors/google_utils/google_utils.py
+++ b/backend/danswer/connectors/google_utils/google_utils.py
@@ -0,0 +1,125 @@
+import re
+import time
+from collections.abc import Callable
+from collections.abc import Iterator
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+
+from googleapiclient.errors import HttpError  # type: ignore
+
+from danswer.connectors.google_drive.models import GoogleDriveFileType
+from danswer.utils.logger import setup_logger
+from danswer.utils.retry_wrapper import retry_builder
+
+logger = setup_logger()
+
+
+# Google Drive APIs are quite flakey and may 500 for an
+# extended period of time. Trying to combat here by adding a very
+# long retry period (~20 minutes of trying every minute)
+add_retries = retry_builder(tries=50, max_delay=30)
+
+
+def _execute_with_retry(request: Any) -> Any:
+    max_attempts = 10
+    attempt = 1
+
+    while attempt < max_attempts:
+        # Note for reasons unknown, the Google API will sometimes return a 429
+        # and even after waiting the retry period, it will return another 429.
+        # It could be due to a few possibilities:
+        # 1. Other things are also requesting from the Gmail API with the same key
+        # 2. It's a rolling rate limit so the moment we get some amount of requests cleared, we hit it again very quickly
+        # 3. The retry-after has a maximum and we've already hit the limit for the day
+        # or it's something else...
+        try:
+            return request.execute()
+        except HttpError as error:
+            attempt += 1
+
+            if error.resp.status == 429:
+                # Attempt to get 'Retry-After' from headers
+                retry_after = error.resp.get("Retry-After")
+                if retry_after:
+                    sleep_time = int(retry_after)
+                else:
+                    # Extract 'Retry after' timestamp from error message
+                    match = re.search(
+                        r"Retry after (\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)",
+                        str(error),
+                    )
+                    if match:
+                        retry_after_timestamp = match.group(1)
+                        retry_after_dt = datetime.strptime(
+                            retry_after_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ"
+                        ).replace(tzinfo=timezone.utc)
+                        current_time = datetime.now(timezone.utc)
+                        sleep_time = max(
+                            int((retry_after_dt - current_time).total_seconds()),
+                            0,
+                        )
+                    else:
+                        logger.error(
+                            f"No Retry-After header or timestamp found in error message: {error}"
+                        )
+                        sleep_time = 60
+
+                sleep_time += 3  # Add a buffer to be safe
+
+                logger.info(
+                    f"Rate limit exceeded. Attempt {attempt}/{max_attempts}. Sleeping for {sleep_time} seconds."
+                )
+                time.sleep(sleep_time)
+
+            else:
+                raise
+
+    # If we've exhausted all attempts
+    raise Exception(f"Failed to execute request after {max_attempts} attempts")
+
+
+def execute_paginated_retrieval(
+    retrieval_function: Callable,
+    list_key: str | None = None,
+    continue_on_404_or_403: bool = False,
+    **kwargs: Any,
+) -> Iterator[GoogleDriveFileType]:
+    """Execute a paginated retrieval from Google Drive API
+    Args:
+        retrieval_function: The specific list function to call (e.g., service.files().list)
+        **kwargs: Arguments to pass to the list function
+    """
+    next_page_token = ""
+    while next_page_token is not None:
+        request_kwargs = kwargs.copy()
+        if next_page_token:
+            request_kwargs["pageToken"] = next_page_token
+
+        try:
+            results = retrieval_function(**request_kwargs).execute()
+        except HttpError as e:
+            if e.resp.status >= 500:
+                results = add_retries(
+                    lambda: retrieval_function(**request_kwargs).execute()
+                )()
+            elif e.resp.status == 404 or e.resp.status == 403:
+                if continue_on_404_or_403:
+                    logger.debug(f"Error executing request: {e}")
+                    results = {}
+                else:
+                    raise e
+            elif e.resp.status == 429:
+                results = _execute_with_retry(
+                    lambda: retrieval_function(**request_kwargs).execute()
+                )
+            else:
+                logger.exception("Error executing request:")
+                raise e
+
+        next_page_token = results.get("nextPageToken")
+        if list_key:
+            for item in results.get(list_key, []):
+                yield item
+        else:
+            yield results
--- a/backend/danswer/connectors/google_utils/resources.py
+++ b/backend/danswer/connectors/google_utils/resources.py
@@ -0,0 +1,63 @@
+from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
+from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
+from googleapiclient.discovery import build  # type: ignore
+from googleapiclient.discovery import Resource  # type: ignore
+
+
+class GoogleDriveService(Resource):
+    pass
+
+
+class GoogleDocsService(Resource):
+    pass
+
+
+class AdminService(Resource):
+    pass
+
+
+class GmailService(Resource):
+    pass
+
+
+def _get_google_service(
+    service_name: str,
+    service_version: str,
+    creds: ServiceAccountCredentials | OAuthCredentials,
+    user_email: str | None = None,
+) -> GoogleDriveService | GoogleDocsService | AdminService | GmailService:
+    if isinstance(creds, ServiceAccountCredentials):
+        creds = creds.with_subject(user_email)
+        service = build(service_name, service_version, credentials=creds)
+    elif isinstance(creds, OAuthCredentials):
+        service = build(service_name, service_version, credentials=creds)
+
+    return service
+
+
+def get_google_docs_service(
+    creds: ServiceAccountCredentials | OAuthCredentials,
+    user_email: str | None = None,
+) -> GoogleDocsService:
+    return _get_google_service("docs", "v1", creds, user_email)
+
+
+def get_drive_service(
+    creds: ServiceAccountCredentials | OAuthCredentials,
+    user_email: str | None = None,
+) -> GoogleDriveService:
+    return _get_google_service("drive", "v3", creds, user_email)
+
+
+def get_admin_service(
+    creds: ServiceAccountCredentials | OAuthCredentials,
+    user_email: str | None = None,
+) -> AdminService:
+    return _get_google_service("admin", "directory_v1", creds, user_email)
+
+
+def get_gmail_service(
+    creds: ServiceAccountCredentials | OAuthCredentials,
+    user_email: str | None = None,
+) -> GmailService:
+    return _get_google_service("gmail", "v1", creds, user_email)
--- a/backend/danswer/connectors/google_utils/shared_constants.py
+++ b/backend/danswer/connectors/google_utils/shared_constants.py
@@ -0,0 +1,40 @@
+from danswer.configs.constants import DocumentSource
+
+# NOTE: do not need https://www.googleapis.com/auth/documents.readonly
+# this is counted under `/auth/drive.readonly`
+GOOGLE_SCOPES = {
+    DocumentSource.GOOGLE_DRIVE: [
+        "https://www.googleapis.com/auth/drive.readonly",
+        "https://www.googleapis.com/auth/drive.metadata.readonly",
+        "https://www.googleapis.com/auth/admin.directory.group.readonly",
+        "https://www.googleapis.com/auth/admin.directory.user.readonly",
+    ],
+    DocumentSource.GMAIL: [
+        "https://www.googleapis.com/auth/gmail.readonly",
+        "https://www.googleapis.com/auth/admin.directory.user.readonly",
+        "https://www.googleapis.com/auth/admin.directory.group.readonly",
+    ],
+}
+
+# This is the Oauth token
+DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens"
+# This is the service account key
+DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key"
+# The email saved for both auth types
+DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin"
+
+USER_FIELDS = "nextPageToken, users(primaryEmail)"
+
+# Error message substrings
+MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requested"
+
+# Documentation and error messages
+SCOPE_DOC_URL = "https://docs.danswer.dev/connectors/google_drive/overview"
+ONYX_SCOPE_INSTRUCTIONS = (
+    "You have upgraded Danswer without updating the Google Auth scopes. "
+    f"Please refer to the documentation to learn how to update the scopes: {SCOPE_DOC_URL}"
+)
+
+
+# This is the maximum number of threads that can be retrieved at once
+SLIM_BATCH_SIZE = 500
--- a/Show More
+++ b/Show More