gotem

perhaps now this will work
wb this?
2026-02-22 18:25:45 +00:00 · 2024-09-11 11:41:40 -07:00 · 2024-09-11 11:39:26 -07:00 · 2024-09-11 11:37:21 -07:00 · 2024-09-11 11:35:55 -07:00 · 2024-09-11 11:28:03 -07:00
524 changed files with 7536 additions and 49618 deletions
--- a/.github/workflows/check-backend-changes.yml
+++ b/.github/workflows/check-backend-changes.yml
@@ -0,0 +1,23 @@
+name: Check Backend Changes
+
+on:
+  workflow_call:
+    outputs:
+      run-tests:
+        description: "Whether to run tests based on backend changes"
+        value: ${{ jobs.check-run-needed.outputs.run-tests }}
+
+jobs:
+  check-run-needed:
+    runs-on: ubuntu-latest
+    outputs:
+      run-tests: ${{ steps.check.outputs.run-tests }}
+    steps:
+      - uses: actions/checkout@v4
+      - id: check
+        run: |
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q '^backend/'; then
+            echo "run-tests=true" >> $GITHUB_OUTPUT
+          else
+            echo "run-tests=false" >> $GITHUB_OUTPUT
+          fi
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -7,17 +7,16 @@ on:

 env:
  REGISTRY_IMAGE: danswer/danswer-backend
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build-and-push:
-    # TODO: investigate a matrix build like the web container 
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    # TODO: make this a matrix build like the web containers
+    runs-on: 
+      group: amd64-image-builders

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@v2

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
@@ -28,11 +27,6 @@ jobs:
        username: ${{ secrets.DOCKER_USERNAME }}
        password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Install build-essential
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y build-essential
-          
    - name: Backend Image Docker Build and Push
      uses: docker/build-push-action@v5
      with:
@@ -42,20 +36,12 @@ jobs:
        push: true
        tags: |
          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          ${{ env.REGISTRY_IMAGE }}:latest
        build-args: |
          DANSWER_VERSION=${{ github.ref_name }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
      with:
        # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
        image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -5,18 +5,14 @@ on:
    tags:
      - '*'

-env:
-  REGISTRY_IMAGE: danswer/danswer-model-server
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
 jobs:
  build-and-push:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: 
+      group: amd64-image-builders

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@v2

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
@@ -35,21 +31,13 @@ jobs:
        platforms: linux/amd64,linux/arm64
        push: true
        tags: |
-          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          danswer/danswer-model-server:${{ github.ref_name }}
+          danswer/danswer-model-server:latest
        build-args: |
          DANSWER_VERSION=${{ github.ref_name }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
      with:
        image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
        severity: 'CRITICAL,HIGH'
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -7,8 +7,7 @@ on:

 env:
  REGISTRY_IMAGE: danswer/danswer-web-server
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build:
    runs-on: 
@@ -36,7 +35,7 @@ jobs:
          images: ${{ env.REGISTRY_IMAGE }}
          tags: |
            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+            type=raw,value=${{ env.REGISTRY_IMAGE }}:latest
      
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -113,16 +112,8 @@ jobs:
        run: |
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-          TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: 'CRITICAL,HIGH'
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -1,6 +1,3 @@
-# This workflow is set up to be manually triggered via the GitHub Action tab.
-# Given a version, it will tag those backend and webserver images as "latest".
-
 name: Tag Latest Version

 on:
@@ -12,9 +9,7 @@ on:

 jobs:
  tag:
-    # See https://runs-on.com/runners/linux/
-    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest
    steps:
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v1
--- a/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
+++ b/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
@@ -12,8 +12,7 @@ on:

 jobs:
  lint-test:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+    runs-on: Amd64

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
@@ -38,9 +37,9 @@ jobs:
          backend/requirements/model_server.txt
    - run: |
        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+        pip install -r backend/requirements/default.txt
+        pip install -r backend/requirements/dev.txt
+        pip install -r backend/requirements/model_server.txt

    - name: Set up chart-testing
      uses: helm/chart-testing-action@v2.6.1
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -3,14 +3,16 @@ name: Python Checks
 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 jobs:
+  check-changes:
+    uses: ./.github/workflows/check-backend-changes.yml
+
  mypy-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'true'
+    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
@@ -27,9 +29,9 @@ jobs:
          backend/requirements/model_server.txt
    - run: |
        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+        pip install -r backend/requirements/default.txt
+        pip install -r backend/requirements/dev.txt
+        pip install -r backend/requirements/model_server.txt

    - name: Run MyPy
      run: |
@@ -50,3 +52,10 @@ jobs:
      run: |
        cd backend
        black --check .
+
+  skip-tests:
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'false'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No changes in backend, skipping this test."
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -15,14 +15,15 @@ env:
  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  # Jira
-  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

 jobs:
+  check-changes:
+    uses: ./.github/workflows/check-backend-changes.yml
+
  connectors-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'true'
+    runs-on: ubuntu-latest

    env:
      PYTHONPATH: ./backend
@@ -43,8 +44,8 @@ jobs:
      - name: Install Dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install -r backend/requirements/default.txt
+          pip install -r backend/requirements/dev.txt

      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
@@ -59,3 +60,10 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
+
+  skip-tests:
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'false'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No changes in backend, skipping this test."
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,58 +0,0 @@
-name: Connector Tests
-
-on:
-  schedule:
-    # This cron expression runs the job daily at 16:00 UTC (9am PT)
-    - cron: "0 16 * * *"
-
-env:
-  # Bedrock
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
-
-  # OpenAI
-  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-jobs:
-  connectors-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
-
-    env:
-      PYTHONPATH: ./backend
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-
-      - name: Run Tests
-        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
-        run: |
-          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
-          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
-
-      - name: Alert on Failure
-        if: failure() && github.event_name == 'schedule'
-        env:
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-        run: |
-          curl -X POST \
-            -H 'Content-type: application/json' \
-            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
-            $SLACK_WEBHOOK
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -3,19 +3,20 @@ name: Python Unit Tests
 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 jobs:
+  check-changes:
+    uses: ./.github/workflows/check-backend-changes.yml
+
  backend-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'true'
+    runs-on: ubuntu-latest

    env:
      PYTHONPATH: ./backend
-      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
-      
+
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
@@ -32,9 +33,17 @@ jobs:
    - name: Install Dependencies
      run: |
        python -m pip install --upgrade pip
-        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+        pip install -r backend/requirements/default.txt
+        pip install -r backend/requirements/dev.txt

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
      run: py.test -o junit_family=xunit2 -xv --ff backend/tests/unit
+
+
+  skip-tests:
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'false'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No changes in backend, skipping this test."
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -1,6 +1,6 @@
 name: Quality Checks PR
 concurrency:
-  group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Quality-Checks-PR-${{ github.head_ref }}
  cancel-in-progress: true

 on:
@@ -9,8 +9,7 @@ on:

 jobs:
  quality-checks:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
--- a/.github/workflows/pr-Integration-tests.yml
+++ b/.github/workflows/pr-Integration-tests.yml
@@ -1,23 +1,25 @@
-name: Run Integration Tests v2
+name: Run Integration Tests
 concurrency:
-  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Integration-Tests-${{ github.head_ref }}
  cancel-in-progress: true

 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

 jobs:
+  check-changes:
+    uses: ./.github/workflows/check-backend-changes.yml
+
  integration-tests:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'true'
+    runs-on: 
+      group: 'arm64-image-builders'
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -31,59 +33,49 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-      
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds 
-      # successfully.
+      # NOTE: we don't need to build the Web Docker image since it's not used
+      # during the IT for now. We have a separate action to verify it builds 
+      # succesfully
      - name: Pull Web Docker image
        run: |
          docker pull danswer/danswer-web-server:latest
-          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test
+          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it

-      # we use the runs-on cache for docker builds
-      # in conjunction with runs-on runners, it has better speed and unlimited caching
-      # https://runs-on.com/caching/s3-cache-for-github-actions/
-      # https://runs-on.com/caching/docker/
-      # https://github.com/moby/buildkit#s3-cache-experimental
-      
-      # images are built and run locally for testing purposes. Not pushed.
      - name: Build Backend Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: linux/amd64
-          tags: danswer/danswer-backend:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          platforms: linux/arm64
+          tags: danswer/danswer-backend:it
+          cache-from: type=registry,ref=danswer/danswer-backend:it
+          cache-to: |
+            type=registry,ref=danswer/danswer-backend:it,mode=max
+            type=inline

      - name: Build Model Server Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          tags: danswer/danswer-model-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          platforms: linux/arm64
+          tags: danswer/danswer-model-server:it
+          cache-from: type=registry,ref=danswer/danswer-model-server:it
+          cache-to: |
+            type=registry,ref=danswer/danswer-model-server:it,mode=max
+            type=inline

      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/amd64
-          tags: danswer/danswer-integration:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          platforms: linux/arm64
+          tags: danswer/integration-test-runner:it
+          cache-from: type=registry,ref=danswer/integration-test-runner:it
+          cache-to: |
+            type=registry,ref=danswer/integration-test-runner:it,mode=max
+            type=inline

      - name: Start Docker containers
        run: |
@@ -92,7 +84,7 @@ jobs:
          AUTH_TYPE=basic \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          IMAGE_TAG=it \
          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

@@ -100,8 +92,6 @@ jobs:
        run: |
          echo "Starting wait-for-service script..."
          
-          docker logs -f danswer-stack-api_server-1 &
-
          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
          
@@ -134,7 +124,6 @@ jobs:
        run: |
          echo "Running integration tests..."
          docker run --rm --network danswer-stack_default \
-            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
@@ -143,9 +132,7 @@ jobs:
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            danswer/danswer-integration:test
+            danswer/integration-test-runner:it
        continue-on-error: true
        id: run_tests

@@ -176,3 +163,10 @@ jobs:
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+
+  skip-tests:
+    needs: check-changes
+    if: needs.check-changes.outputs.run-tests == 'false'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "No changes in backend, skipping this test."
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -1,54 +0,0 @@
-name: Nightly Tag Push
-
-on:
-  schedule:
-    - cron: '0 10 * * *' # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
-
-permissions:
-  contents: write  # Allows pushing tags to the repository
-
-jobs:
-  create-and-push-tag:
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-
-    steps:
-    # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
-    # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
-    # implement here which needs an actual user's deploy key
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
-
-    - name: Set up Git user
-      run: |
-        git config user.name "Richard Kuo [bot]"
-        git config user.email "rkuo[bot]@danswer.ai"
-
-    - name: Check for existing nightly tag
-      id: check_tag
-      run: |
-        if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
-          echo "A tag starting with 'nightly-latest' already exists on HEAD."
-          echo "tag_exists=true" >> $GITHUB_OUTPUT
-        else
-          echo "No tag starting with 'nightly-latest' exists on HEAD."
-          echo "tag_exists=false" >> $GITHUB_OUTPUT
-        fi
-        
-    # don't tag again if HEAD already has a nightly-latest tag on it
-    - name: Create Nightly Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      env:
-        DATE: ${{ github.run_id }}
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        echo "Creating tag: $TAG_NAME"
-        git tag $TAG_NAME
-
-    - name: Push Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        git push origin $TAG_NAME
-        
--- a/.prettierignore
+++ b/.prettierignore
@@ -1 +0,0 @@
-backend/tests/integration/tests/pruning/website
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,7 +22,7 @@ Your input is vital to making sure that Danswer moves in the right direction.
 Before starting on implementation, please raise a GitHub issue.

 And always feel free to message us (Chris Weaver / Yuhong Sun) on 
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ) / 
+[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ) / 
 [Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all. 


@@ -56,18 +56,19 @@ Danswer being a fully functional app, relies on some external software, specific


 > **Note:**
-> This guide provides instructions to build and run Danswer locally from source with Docker containers providing the above external software. We believe this combination is easier for
-> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Danswer stack within Docker below.
+> This guide provides instructions to set up the Danswer specific services outside of Docker because it's easier for
+> development purposes. However, you can also use the containers and update with local changes by providing the
+> `--build` flag.


 ### Local Set Up
-Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
+Be sure to use Python version 3.11.

 If using a lower version, modifications will have to be made to the code.
 If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).


-#### Backend: Python requirements
+#### Installing Requirements
 Currently, we use pip and recommend creating a virtual environment.

 For convenience here's a command for it:
@@ -97,16 +98,6 @@ pip install -r danswer/backend/requirements/ee.txt
 pip install -r danswer/backend/requirements/model_server.txt
 ```

-Install Playwright for Python (headless browser required by the Web Connector)
-
-In the activated Python virtualenv, install Playwright for Python by running:
-```bash
-playwright install
-```
-
-You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
-
-#### Frontend: Node dependencies

 Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
 Once the above is done, navigate to `danswer/web` run:
@@ -114,7 +105,19 @@ Once the above is done, navigate to `danswer/web` run:
 npm i
 ```

-#### Docker containers for external software
+Install Playwright (headless browser required by the Web Connector)
+
+> **Note:**
+> If you have just run the pip install, open a new terminal and source the python virtual-env again.
+> This will pull the updated PATH to include playwright
+
+Then install Playwright by running:
+```bash
+playwright install
+```
+
+
+#### Dependent Docker Containers
 You will need Docker installed to run these containers.

 First navigate to `danswer/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
@@ -124,7 +127,7 @@ docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational
 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)


-#### Running Danswer locally
+#### Running Danswer
 To start the frontend, navigate to `danswer/web` and run:
 ```bash
 npm run dev
@@ -173,36 +176,6 @@ powershell -Command "
 > **Note:**
 > If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.

-#### Wrapping up
-
-You should now have 4 servers running:
-
- Web server
- Backend API
- Model server
- Background jobs
-
-Now, visit `http://localhost:3000` in your browser. You should see the Danswer onboarding wizard where you can connect your external LLM provider to Danswer.
-
-You've successfully set up a local Danswer instance! 🏁
-
-#### Running the Danswer application in a container
-
-You can run the full Danswer application stack from pre-built images including all external software dependencies.
-
-Navigate to `danswer/deployment/docker_compose` and run:
-
-```bash
-docker compose -f docker-compose.dev.yml -p danswer-stack up -d
-```
-
-After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Danswer.
-
-If you want to make changes to Danswer and run those changes in Docker, you can also build a local version of the Danswer container images that incorporates your changes like so:
-
-```bash
-docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build
-```

 ### Formatting and Linting
 #### Backend
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -9,8 +9,7 @@ founders@danswer.ai for more information. Please visit https://github.com/danswe

 # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
 ARG DANSWER_VERSION=0.3-dev
-ENV DANSWER_VERSION=${DANSWER_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true"
+ENV DANSWER_VERSION=${DANSWER_VERSION}

 RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
 # Install system dependencies
@@ -41,8 +40,6 @@ RUN apt-get update && \
 COPY ./requirements/default.txt /tmp/requirements.txt
 COPY ./requirements/ee.txt /tmp/ee-requirements.txt
 RUN pip install --no-cache-dir --upgrade \
-        --retries 5 \
-        --timeout 30 \
        -r /tmp/requirements.txt \
        -r /tmp/ee-requirements.txt && \
    pip uninstall -y py && \
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -8,17 +8,11 @@ visit https://github.com/danswer-ai/danswer."

 # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
 ARG DANSWER_VERSION=0.3-dev
-ENV DANSWER_VERSION=${DANSWER_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true"
-
-
+ENV DANSWER_VERSION=${DANSWER_VERSION}
 RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"

 COPY ./requirements/model_server.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir --upgrade \
-        --retries 5 \
-        --timeout 30 \
-        -r /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt

 RUN apt-get remove -y --allow-remove-essential perl-base && \
    apt-get autoremove -y
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -9,9 +9,9 @@ from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import create_async_engine
 from celery.backends.database.session import ResultModelBase  # type: ignore
 from sqlalchemy.schema import SchemaItem
-from sqlalchemy.sql import text

-# Alembic Config object
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
 config = context.config

 # Interpret the config file for Python logging.
@@ -21,26 +21,16 @@ if config.config_file_name is not None and config.attributes.get(
 ):
    fileConfig(config.config_file_name)

-# Add your model's MetaData object here
+# add your model's MetaData object here
 # for 'autogenerate' support
 # from myapp import mymodel
 # target_metadata = mymodel.Base.metadata
 target_metadata = [Base.metadata, ResultModelBase.metadata]

-
-def get_schema_options() -> tuple[str, bool]:
-    x_args_raw = context.get_x_argument()
-    x_args = {}
-    for arg in x_args_raw:
-        for pair in arg.split(","):
-            if "=" in pair:
-                key, value = pair.split("=", 1)
-                x_args[key] = value
-
-    schema_name = x_args.get("schema", "public")
-    create_schema = x_args.get("create_schema", "true").lower() == "true"
-    return schema_name, create_schema
-
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.

 EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}

@@ -64,20 +54,17 @@ def run_migrations_offline() -> None:
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
+
    Calls to context.execute() here emit the given string to the
    script output.
+
    """
    url = build_connection_string()
-    schema, _ = get_schema_options()
-
    context.configure(
        url=url,
        target_metadata=target_metadata,  # type: ignore
        literal_binds=True,
-        include_object=include_object,
        dialect_opts={"paramstyle": "named"},
-        version_table_schema=schema,
-        include_schemas=True,
    )

    with context.begin_transaction():
@@ -85,28 +72,22 @@ def run_migrations_offline() -> None:


 def do_run_migrations(connection: Connection) -> None:
-    schema, create_schema = get_schema_options()
-    if create_schema:
-        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"'))
-        connection.execute(text("COMMIT"))
-
-    connection.execute(text(f'SET search_path TO "{schema}"'))
-
    context.configure(
        connection=connection,
        target_metadata=target_metadata,  # type: ignore
-        version_table_schema=schema,
-        include_schemas=True,
-        compare_type=True,
-        compare_server_default=True,
-    )
+        include_object=include_object,
+    )  # type: ignore

    with context.begin_transaction():
        context.run_migrations()


 async def run_async_migrations() -> None:
-    """Run migrations in 'online' mode."""
+    """In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+
    connectable = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
@@ -120,6 +101,7 @@ async def run_async_migrations() -> None:

 def run_migrations_online() -> None:
    """Run migrations in 'online' mode."""
+
    asyncio.run(run_async_migrations())


--- a/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
+++ b/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
@@ -1,27 +0,0 @@
-"""add ccpair deletion failure message
-
-Revision ID: 0ebb1d516877
-Revises: 52a219fb5233
-Create Date: 2024-09-10 15:03:48.233926
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "0ebb1d516877"
-down_revision = "52a219fb5233"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("deletion_failure_message", sa.String(), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("connector_credential_pair", "deletion_failure_message")
--- a/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
+++ b/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
@@ -1,102 +0,0 @@
-"""add_user_delete_cascades
-
-Revision ID: 1b8206b29c5d
-Revises: 35e6853a51d5
-Create Date: 2024-09-18 11:48:59.418726
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "1b8206b29c5d"
-down_revision = "35e6853a51d5"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
-    op.create_foreign_key(
-        "credential_user_id_fkey",
-        "credential",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
-    op.create_foreign_key(
-        "chat_session_user_id_fkey",
-        "chat_session",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
-    op.create_foreign_key(
-        "chat_folder_user_id_fkey",
-        "chat_folder",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
-    op.create_foreign_key(
-        "prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"], ondelete="CASCADE"
-    )
-
-    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
-    op.create_foreign_key(
-        "notification_user_id_fkey",
-        "notification",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
-    op.create_foreign_key(
-        "inputprompt_user_id_fkey",
-        "inputprompt",
-        "user",
-        ["user_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade() -> None:
-    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
-    op.create_foreign_key(
-        "credential_user_id_fkey", "credential", "user", ["user_id"], ["id"]
-    )
-
-    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
-    op.create_foreign_key(
-        "chat_session_user_id_fkey", "chat_session", "user", ["user_id"], ["id"]
-    )
-
-    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
-    op.create_foreign_key(
-        "chat_folder_user_id_fkey", "chat_folder", "user", ["user_id"], ["id"]
-    )
-
-    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
-    op.create_foreign_key("prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"])
-
-    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
-    op.create_foreign_key(
-        "notification_user_id_fkey", "notification", "user", ["user_id"], ["id"]
-    )
-
-    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
-    op.create_foreign_key(
-        "inputprompt_user_id_fkey", "inputprompt", "user", ["user_id"], ["id"]
-    )
--- a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
+++ b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
@@ -30,7 +30,7 @@ def upgrade() -> None:
    op.add_column(
        "search_settings",
        sa.Column(
-            "multipass_indexing", sa.Boolean(), nullable=False, server_default="false"
+            "multipass_indexing", sa.Boolean(), nullable=False, server_default="true"
        ),
    )
    op.add_column(
--- a/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
+++ b/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
@@ -1,64 +0,0 @@
-"""server default chosen assistants
-
-Revision ID: 35e6853a51d5
-Revises: c99d76fcd298
-Create Date: 2024-09-13 13:20:32.885317
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "35e6853a51d5"
-down_revision = "c99d76fcd298"
-branch_labels = None
-depends_on = None
-
-DEFAULT_ASSISTANTS = [-2, -1, 0]
-
-
-def upgrade() -> None:
-    # Step 1: Update any NULL values to the default value
-    # This upgrades existing users without ordered assistant
-    # to have default assistants set to visible assistants which are
-    # accessible by them.
-    op.execute(
-        """
-        UPDATE "user" u
-        SET chosen_assistants = (
-            SELECT jsonb_agg(
-                p.id ORDER BY
-                    COALESCE(p.display_priority, 2147483647) ASC,
-                    p.id ASC
-            )
-            FROM persona p
-            LEFT JOIN persona__user pu ON p.id = pu.persona_id AND pu.user_id = u.id
-            WHERE p.is_visible = true
-            AND (p.is_public = true OR pu.user_id IS NOT NULL)
-        )
-        WHERE chosen_assistants IS NULL
-        OR chosen_assistants = 'null'
-        OR jsonb_typeof(chosen_assistants) = 'null'
-        OR (jsonb_typeof(chosen_assistants) = 'string' AND chosen_assistants = '"null"')
-    """
-    )
-
-    # Step 2: Alter the column to make it non-nullable
-    op.alter_column(
-        "user",
-        "chosen_assistants",
-        type_=postgresql.JSONB(astext_type=sa.Text()),
-        nullable=False,
-        server_default=sa.text(f"'{DEFAULT_ASSISTANTS}'::jsonb"),
-    )
-
-
-def downgrade() -> None:
-    op.alter_column(
-        "user",
-        "chosen_assistants",
-        type_=postgresql.JSONB(astext_type=sa.Text()),
-        nullable=True,
-        server_default=None,
-    )
--- a/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
+++ b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
@@ -1,46 +0,0 @@
-"""fix_user__external_user_group_id_fk
-
-Revision ID: 46b7a812670f
-Revises: f32615f71aeb
-Create Date: 2024-09-23 12:58:03.894038
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "46b7a812670f"
-down_revision = "f32615f71aeb"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Drop the existing primary key
-    op.drop_constraint(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        type_="primary",
-    )
-
-    # Add the new composite primary key
-    op.create_primary_key(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        ["user_id", "external_user_group_id", "cc_pair_id"],
-    )
-
-
-def downgrade() -> None:
-    # Drop the composite primary key
-    op.drop_constraint(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        type_="primary",
-    )
-    # Delete all entries from the table
-    op.execute("DELETE FROM user__external_user_group_id")
-
-    # Recreate the original primary key on user_id
-    op.create_primary_key(
-        "user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"]
-    )
--- a/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
+++ b/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
@@ -1,7 +1,7 @@
 """Add last synced and last modified to document table

 Revision ID: 52a219fb5233
-Revises: f7e58d357687
+Revises: f17bf3b0d9f1
 Create Date: 2024-08-28 17:40:46.077470

 """
--- a/backend/alembic/versions/55546a7967ee_assistant_rework.py
+++ b/backend/alembic/versions/55546a7967ee_assistant_rework.py
@@ -1,79 +0,0 @@
-"""assistant_rework
-
-Revision ID: 55546a7967ee
-Revises: 61ff3651add4
-Create Date: 2024-09-18 17:00:23.755399
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-
-# revision identifiers, used by Alembic.
-revision = "55546a7967ee"
-down_revision = "61ff3651add4"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Reworking persona and user tables for new assistant features
-    # keep track of user's chosen assistants separate from their `ordering`
-    op.add_column("persona", sa.Column("builtin_persona", sa.Boolean(), nullable=True))
-    op.execute("UPDATE persona SET builtin_persona = default_persona")
-    op.alter_column("persona", "builtin_persona", nullable=False)
-    op.drop_index("_default_persona_name_idx", table_name="persona")
-    op.create_index(
-        "_builtin_persona_name_idx",
-        "persona",
-        ["name"],
-        unique=True,
-        postgresql_where=sa.text("builtin_persona = true"),
-    )
-
-    op.add_column(
-        "user", sa.Column("visible_assistants", postgresql.JSONB(), nullable=True)
-    )
-    op.add_column(
-        "user", sa.Column("hidden_assistants", postgresql.JSONB(), nullable=True)
-    )
-    op.execute(
-        "UPDATE \"user\" SET visible_assistants = '[]'::jsonb, hidden_assistants = '[]'::jsonb"
-    )
-    op.alter_column(
-        "user",
-        "visible_assistants",
-        nullable=False,
-        server_default=sa.text("'[]'::jsonb"),
-    )
-    op.alter_column(
-        "user",
-        "hidden_assistants",
-        nullable=False,
-        server_default=sa.text("'[]'::jsonb"),
-    )
-    op.drop_column("persona", "default_persona")
-    op.add_column(
-        "persona", sa.Column("is_default_persona", sa.Boolean(), nullable=True)
-    )
-
-
-def downgrade() -> None:
-    # Reverting changes made in upgrade
-    op.drop_column("user", "hidden_assistants")
-    op.drop_column("user", "visible_assistants")
-    op.drop_index("_builtin_persona_name_idx", table_name="persona")
-
-    op.drop_column("persona", "is_default_persona")
-    op.add_column("persona", sa.Column("default_persona", sa.Boolean(), nullable=True))
-    op.execute("UPDATE persona SET default_persona = builtin_persona")
-    op.alter_column("persona", "default_persona", nullable=False)
-    op.drop_column("persona", "builtin_persona")
-    op.create_index(
-        "_default_persona_name_idx",
-        "persona",
-        ["name"],
-        unique=True,
-        postgresql_where=sa.text("default_persona = true"),
-    )
--- a/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py
+++ b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py
@@ -1,35 +0,0 @@
-"""match_any_keywords flag for standard answers
-
-Revision ID: 5c7fdadae813
-Revises: efb35676026c
-Create Date: 2024-09-13 18:52:59.256478
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "5c7fdadae813"
-down_revision = "efb35676026c"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.add_column(
-        "standard_answer",
-        sa.Column(
-            "match_any_keywords",
-            sa.Boolean(),
-            nullable=False,
-            server_default=sa.false(),
-        ),
-    )
-    # ### end Alembic commands ###
-
-
-def downgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_column("standard_answer", "match_any_keywords")
-    # ### end Alembic commands ###
--- a/backend/alembic/versions/61ff3651add4_add_permission_syncing.py
+++ b/backend/alembic/versions/61ff3651add4_add_permission_syncing.py
@@ -1,162 +0,0 @@
-"""Add Permission Syncing
-
-Revision ID: 61ff3651add4
-Revises: 1b8206b29c5d
-Create Date: 2024-09-05 13:57:11.770413
-
-"""
-import fastapi_users_db_sqlalchemy
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "61ff3651add4"
-down_revision = "1b8206b29c5d"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Admin user who set up connectors will lose access to the docs temporarily
-    # only way currently to give back access is to rerun from beginning
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column(
-            "access_type",
-            sa.String(),
-            nullable=True,
-        ),
-    )
-    op.execute(
-        "UPDATE connector_credential_pair SET access_type = 'PUBLIC' WHERE is_public = true"
-    )
-    op.execute(
-        "UPDATE connector_credential_pair SET access_type = 'PRIVATE' WHERE is_public = false"
-    )
-    op.alter_column("connector_credential_pair", "access_type", nullable=False)
-
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column(
-            "auto_sync_options",
-            postgresql.JSONB(astext_type=sa.Text()),
-            nullable=True,
-        ),
-    )
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("last_time_perm_sync", sa.DateTime(timezone=True), nullable=True),
-    )
-    op.drop_column("connector_credential_pair", "is_public")
-
-    op.add_column(
-        "document",
-        sa.Column("external_user_emails", postgresql.ARRAY(sa.String()), nullable=True),
-    )
-    op.add_column(
-        "document",
-        sa.Column(
-            "external_user_group_ids", postgresql.ARRAY(sa.String()), nullable=True
-        ),
-    )
-    op.add_column(
-        "document",
-        sa.Column("is_public", sa.Boolean(), nullable=True),
-    )
-
-    op.create_table(
-        "user__external_user_group_id",
-        sa.Column(
-            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
-        ),
-        sa.Column("external_user_group_id", sa.String(), nullable=False),
-        sa.Column("cc_pair_id", sa.Integer(), nullable=False),
-        sa.PrimaryKeyConstraint("user_id"),
-    )
-
-    op.drop_column("external_permission", "user_id")
-    op.drop_column("email_to_external_user_cache", "user_id")
-    op.drop_table("permission_sync_run")
-    op.drop_table("external_permission")
-    op.drop_table("email_to_external_user_cache")
-
-
-def downgrade() -> None:
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("is_public", sa.BOOLEAN(), nullable=True),
-    )
-    op.execute(
-        "UPDATE connector_credential_pair SET is_public = (access_type = 'PUBLIC')"
-    )
-    op.alter_column("connector_credential_pair", "is_public", nullable=False)
-
-    op.drop_column("connector_credential_pair", "auto_sync_options")
-    op.drop_column("connector_credential_pair", "access_type")
-    op.drop_column("connector_credential_pair", "last_time_perm_sync")
-    op.drop_column("document", "external_user_emails")
-    op.drop_column("document", "external_user_group_ids")
-    op.drop_column("document", "is_public")
-
-    op.drop_table("user__external_user_group_id")
-
-    # Drop the enum type at the end of the downgrade
-    op.create_table(
-        "permission_sync_run",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column(
-            "source_type",
-            sa.String(),
-            nullable=False,
-        ),
-        sa.Column("update_type", sa.String(), nullable=False),
-        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
-        sa.Column(
-            "status",
-            sa.String(),
-            nullable=False,
-        ),
-        sa.Column("error_msg", sa.Text(), nullable=True),
-        sa.Column(
-            "updated_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.ForeignKeyConstraint(
-            ["cc_pair_id"],
-            ["connector_credential_pair.id"],
-        ),
-        sa.PrimaryKeyConstraint("id"),
-    )
-    op.create_table(
-        "external_permission",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column("user_id", sa.UUID(), nullable=True),
-        sa.Column("user_email", sa.String(), nullable=False),
-        sa.Column(
-            "source_type",
-            sa.String(),
-            nullable=False,
-        ),
-        sa.Column("external_permission_group", sa.String(), nullable=False),
-        sa.ForeignKeyConstraint(
-            ["user_id"],
-            ["user.id"],
-        ),
-        sa.PrimaryKeyConstraint("id"),
-    )
-    op.create_table(
-        "email_to_external_user_cache",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column("external_user_id", sa.String(), nullable=False),
-        sa.Column("user_id", sa.UUID(), nullable=True),
-        sa.Column("user_email", sa.String(), nullable=False),
-        sa.ForeignKeyConstraint(
-            ["user_id"],
-            ["user.id"],
-        ),
-        sa.PrimaryKeyConstraint("id"),
-    )
--- a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
+++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
@@ -9,7 +9,7 @@ import json
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store

 # revision identifiers, used by Alembic.
 revision = "703313b75876"
@@ -54,7 +54,9 @@ def upgrade() -> None:
    )

    try:
-        settings_json = cast(str, get_kv_store().load("token_budget_settings"))
+        settings_json = cast(
+            str, get_dynamic_config_store().load("token_budget_settings")
+        )
        settings = json.loads(settings_json)

        is_enabled = settings.get("enable_token_budget", False)
@@ -69,7 +71,7 @@ def upgrade() -> None:
            )

        # Delete the dynamic config
-        get_kv_store().delete("token_budget_settings")
+        get_dynamic_config_store().delete("token_budget_settings")

    except Exception:
        # Ignore if the dynamic config is not found
--- a/backend/alembic/versions/797089dfb4d2_persona_start_date.py
+++ b/backend/alembic/versions/797089dfb4d2_persona_start_date.py
@@ -1,27 +0,0 @@
-"""persona_start_date
-
-Revision ID: 797089dfb4d2
-Revises: 55546a7967ee
-Create Date: 2024-09-11 14:51:49.785835
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "797089dfb4d2"
-down_revision = "55546a7967ee"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "persona",
-        sa.Column("search_start_date", sa.DateTime(timezone=True), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("persona", "search_start_date")
--- a/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py
+++ b/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py
@@ -1,27 +0,0 @@
-"""add last_pruned to the connector_credential_pair table
-
-Revision ID: ac5eaac849f9
-Revises: 52a219fb5233
-Create Date: 2024-09-10 15:04:26.437118
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "ac5eaac849f9"
-down_revision = "46b7a812670f"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # last pruned represents the last time the connector was pruned
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("last_pruned", sa.DateTime(timezone=True), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("connector_credential_pair", "last_pruned")
--- a/backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py
+++ b/backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py
@@ -1,43 +0,0 @@
-"""non nullable default persona
-
-Revision ID: bd2921608c3a
-Revises: 797089dfb4d2
-Create Date: 2024-09-20 10:28:37.992042
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "bd2921608c3a"
-down_revision = "797089dfb4d2"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Set existing NULL values to False
-    op.execute(
-        "UPDATE persona SET is_default_persona = FALSE WHERE is_default_persona IS NULL"
-    )
-
-    # Alter the column to be not nullable with a default value of False
-    op.alter_column(
-        "persona",
-        "is_default_persona",
-        existing_type=sa.Boolean(),
-        nullable=False,
-        server_default=sa.text("false"),
-    )
-
-
-def downgrade() -> None:
-    # Revert the changes
-    op.alter_column(
-        "persona",
-        "is_default_persona",
-        existing_type=sa.Boolean(),
-        nullable=True,
-        server_default=None,
-    )
--- a/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
+++ b/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py
@@ -1,31 +0,0 @@
-"""add nullable to persona id in Chat Session
-
-Revision ID: c99d76fcd298
-Revises: 5c7fdadae813
-Create Date: 2024-07-09 19:27:01.579697
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "c99d76fcd298"
-down_revision = "5c7fdadae813"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.alter_column(
-        "chat_session", "persona_id", existing_type=sa.INTEGER(), nullable=True
-    )
-
-
-def downgrade() -> None:
-    op.alter_column(
-        "chat_session",
-        "persona_id",
-        existing_type=sa.INTEGER(),
-        nullable=False,
-    )
--- a/backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py
+++ b/backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py
@@ -1,32 +0,0 @@
-"""standard answer match_regex flag
-
-Revision ID: efb35676026c
-Revises: 0ebb1d516877
-Create Date: 2024-09-11 13:55:46.101149
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "efb35676026c"
-down_revision = "0ebb1d516877"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.add_column(
-        "standard_answer",
-        sa.Column(
-            "match_regex", sa.Boolean(), nullable=False, server_default=sa.false()
-        ),
-    )
-    # ### end Alembic commands ###
-
-
-def downgrade() -> None:
-    # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_column("standard_answer", "match_regex")
-    # ### end Alembic commands ###
--- a/backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py
+++ b/backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py
@@ -1,26 +0,0 @@
-"""add custom headers to tools
-
-Revision ID: f32615f71aeb
-Revises: bd2921608c3a
-Create Date: 2024-09-12 20:26:38.932377
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "f32615f71aeb"
-down_revision = "bd2921608c3a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "tool", sa.Column("custom_headers", postgresql.JSONB(), nullable=True)
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("tool", "custom_headers")
--- a/backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py
+++ b/backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py
@@ -1,7 +1,7 @@
 """add has_web_login column to user

 Revision ID: f7e58d357687
-Revises: ba98eba0f66a
+Revises: bceb1e139447
 Create Date: 2024-09-07 20:20:54.522620

 """
--- a/backend/danswer/access/access.py
+++ b/backend/danswer/access/access.py
@@ -1,7 +1,7 @@
 from sqlalchemy.orm import Session

 from danswer.access.models import DocumentAccess
-from danswer.access.utils import prefix_user_email
+from danswer.access.utils import prefix_user
 from danswer.configs.constants import PUBLIC_DOC_PAT
 from danswer.db.document import get_access_info_for_document
 from danswer.db.document import get_access_info_for_documents
@@ -18,13 +18,10 @@ def _get_access_for_document(
        document_id=document_id,
    )

-    return DocumentAccess.build(
-        user_emails=info[1] if info and info[1] else [],
-        user_groups=[],
-        external_user_emails=[],
-        external_user_group_ids=[],
-        is_public=info[2] if info else False,
-    )
+    if not info:
+        return DocumentAccess.build(user_ids=[], user_groups=[], is_public=False)
+
+    return DocumentAccess.build(user_ids=info[1], user_groups=[], is_public=info[2])


 def get_access_for_document(
@@ -37,16 +34,6 @@ def get_access_for_document(
    return versioned_get_access_for_document_fn(document_id, db_session)  # type: ignore


-def get_null_document_access() -> DocumentAccess:
-    return DocumentAccess(
-        user_emails=set(),
-        user_groups=set(),
-        is_public=False,
-        external_user_emails=set(),
-        external_user_group_ids=set(),
-    )
-
-
 def _get_access_for_documents(
    document_ids: list[str],
    db_session: Session,
@@ -55,27 +42,13 @@ def _get_access_for_documents(
        db_session=db_session,
        document_ids=document_ids,
    )
-    doc_access = {
-        document_id: DocumentAccess(
-            user_emails=set([email for email in user_emails if email]),
-            # MIT version will wipe all groups and external groups on update
-            user_groups=set(),
-            is_public=is_public,
-            external_user_emails=set(),
-            external_user_group_ids=set(),
+    return {
+        document_id: DocumentAccess.build(
+            user_ids=user_ids, user_groups=[], is_public=is_public
        )
-        for document_id, user_emails, is_public in document_access_info
+        for document_id, user_ids, is_public in document_access_info
    }

-    # Sometimes the document has not be indexed by the indexing job yet, in those cases
-    # the document does not exist and so we use least permissive. Specifically the EE version
-    # checks the MIT version permissions and creates a superset. This ensures that this flow
-    # does not fail even if the Document has not yet been indexed.
-    for doc_id in document_ids:
-        if doc_id not in doc_access:
-            doc_access[doc_id] = get_null_document_access()
-    return doc_access
-

 def get_access_for_documents(
    document_ids: list[str],
@@ -97,7 +70,7 @@ def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
    matches one entry in the returned set.
    """
    if user:
-        return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
+        return {prefix_user(str(user.id)), PUBLIC_DOC_PAT}
    return {PUBLIC_DOC_PAT}


--- a/backend/danswer/access/models.py
+++ b/backend/danswer/access/models.py
@@ -1,72 +1,30 @@
 from dataclasses import dataclass
+from uuid import UUID

-from danswer.access.utils import prefix_external_group
-from danswer.access.utils import prefix_user_email
+from danswer.access.utils import prefix_user
 from danswer.access.utils import prefix_user_group
 from danswer.configs.constants import PUBLIC_DOC_PAT


@dataclass(frozen=True)
-class ExternalAccess:
-    # Emails of external users with access to the doc externally
-    external_user_emails: set[str]
-    # Names or external IDs of groups with access to the doc
-    external_user_group_ids: set[str]
-    # Whether the document is public in the external system or Danswer
+class DocumentAccess:
+    user_ids: set[str]  # stringified UUIDs
+    user_groups: set[str]  # names of user groups associated with this document
    is_public: bool

-
-@dataclass(frozen=True)
-class DocumentAccess(ExternalAccess):
-    # User emails for Danswer users, None indicates admin
-    user_emails: set[str | None]
-    # Names of user groups associated with this document
-    user_groups: set[str]
-
-    def to_acl(self) -> set[str]:
-        return set(
-            [
-                prefix_user_email(user_email)
-                for user_email in self.user_emails
-                if user_email
-            ]
+    def to_acl(self) -> list[str]:
+        return (
+            [prefix_user(user_id) for user_id in self.user_ids]
            + [prefix_user_group(group_name) for group_name in self.user_groups]
-            + [
-                prefix_user_email(user_email)
-                for user_email in self.external_user_emails
-            ]
-            + [
-                # The group names are already prefixed by the source type
-                # This adds an additional prefix of "external_group:"
-                prefix_external_group(group_name)
-                for group_name in self.external_user_group_ids
-            ]
            + ([PUBLIC_DOC_PAT] if self.is_public else [])
        )

    @classmethod
    def build(
-        cls,
-        user_emails: list[str | None],
-        user_groups: list[str],
-        external_user_emails: list[str],
-        external_user_group_ids: list[str],
-        is_public: bool,
+        cls, user_ids: list[UUID | None], user_groups: list[str], is_public: bool
    ) -> "DocumentAccess":
        return cls(
-            external_user_emails={
-                prefix_user_email(external_email)
-                for external_email in external_user_emails
-            },
-            external_user_group_ids={
-                prefix_external_group(external_group_id)
-                for external_group_id in external_user_group_ids
-            },
-            user_emails={
-                prefix_user_email(user_email)
-                for user_email in user_emails
-                if user_email
-            },
+            user_ids={str(user_id) for user_id in user_ids if user_id},
            user_groups=set(user_groups),
            is_public=is_public,
        )
--- a/backend/danswer/access/utils.py
+++ b/backend/danswer/access/utils.py
@@ -1,24 +1,10 @@
-from danswer.configs.constants import DocumentSource
-
-
-def prefix_user_email(user_email: str) -> str:
-    """Prefixes a user email to eliminate collision with group names.
-    This applies to both a Danswer user and an External user, this is to make the query time
-    more efficient"""
-    return f"user_email:{user_email}"
+def prefix_user(user_id: str) -> str:
+    """Prefixes a user ID to eliminate collision with group names.
+    This assumes that groups are prefixed with a different prefix."""
+    return f"user_id:{user_id}"


 def prefix_user_group(user_group_name: str) -> str:
-    """Prefixes a user group name to eliminate collision with user emails.
+    """Prefixes a user group name to eliminate collision with user IDs.
    This assumes that user ids are prefixed with a different prefix."""
    return f"group:{user_group_name}"
-
-
-def prefix_external_group(ext_group_name: str) -> str:
-    """Prefixes an external group name to eliminate collision with user emails / Danswer groups."""
-    return f"external_group:{ext_group_name}"
-
-
-def prefix_group_w_source(ext_group_name: str, source: DocumentSource) -> str:
-    """External groups may collide across sources, every source needs its own prefix."""
-    return f"{source.value.upper()}_{ext_group_name}"
--- a/backend/danswer/auth/invited_users.py
+++ b/backend/danswer/auth/invited_users.py
@@ -1,20 +1,20 @@
 from typing import cast

 from danswer.configs.constants import KV_USER_STORE_KEY
-from danswer.key_value_store.factory import get_kv_store
-from danswer.key_value_store.interface import JSON_ro
-from danswer.key_value_store.interface import KvKeyNotFoundError
+from danswer.dynamic_configs.factory import get_dynamic_config_store
+from danswer.dynamic_configs.interface import ConfigNotFoundError
+from danswer.dynamic_configs.interface import JSON_ro


 def get_invited_users() -> list[str]:
    try:
-        store = get_kv_store()
+        store = get_dynamic_config_store()
        return cast(list, store.load(KV_USER_STORE_KEY))
-    except KvKeyNotFoundError:
+    except ConfigNotFoundError:
        return list()


 def write_invited_users(emails: list[str]) -> int:
-    store = get_kv_store()
+    store = get_dynamic_config_store()
    store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails))
    return len(emails)
--- a/backend/danswer/auth/noauth_user.py
+++ b/backend/danswer/auth/noauth_user.py
@@ -4,29 +4,29 @@ from typing import cast

 from danswer.auth.schemas import UserRole
 from danswer.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY
-from danswer.key_value_store.store import KeyValueStore
-from danswer.key_value_store.store import KvKeyNotFoundError
+from danswer.dynamic_configs.store import ConfigNotFoundError
+from danswer.dynamic_configs.store import DynamicConfigStore
 from danswer.server.manage.models import UserInfo
 from danswer.server.manage.models import UserPreferences


 def set_no_auth_user_preferences(
-    store: KeyValueStore, preferences: UserPreferences
+    store: DynamicConfigStore, preferences: UserPreferences
 ) -> None:
    store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump())


-def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
+def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences:
    try:
        preferences_data = cast(
            Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY)
        )
        return UserPreferences(**preferences_data)
-    except KvKeyNotFoundError:
+    except ConfigNotFoundError:
        return UserPreferences(chosen_assistants=None, default_model=None)


-def fetch_no_auth_user(store: KeyValueStore) -> UserInfo:
+def fetch_no_auth_user(store: DynamicConfigStore) -> UserInfo:
    return UserInfo(
        id="__no_auth_user__",
        email="anonymous@danswer.ai",
--- a/backend/danswer/auth/users.py
+++ b/backend/danswer/auth/users.py
@@ -8,7 +8,6 @@ from email.mime.text import MIMEText
 from typing import Optional
 from typing import Tuple

-import jwt
 from email_validator import EmailNotValidError
 from email_validator import validate_email
 from fastapi import APIRouter
@@ -38,10 +37,8 @@ from danswer.auth.schemas import UserCreate
 from danswer.auth.schemas import UserRole
 from danswer.auth.schemas import UserUpdate
 from danswer.configs.app_configs import AUTH_TYPE
-from danswer.configs.app_configs import DATA_PLANE_SECRET
 from danswer.configs.app_configs import DISABLE_AUTH
 from danswer.configs.app_configs import EMAIL_FROM
-from danswer.configs.app_configs import EXPECTED_API_KEY
 from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION
 from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
 from danswer.configs.app_configs import SMTP_PASS
@@ -271,7 +268,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            )
            user.is_verified = is_verified_by_default
            user.has_web_login = True
-
        return user

    async def on_after_register(
@@ -303,27 +299,17 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    async def authenticate(
        self, credentials: OAuth2PasswordRequestForm
    ) -> Optional[User]:
-        try:
-            user = await self.get_by_email(credentials.username)
-        except exceptions.UserNotExists:
-            self.password_helper.hash(credentials.password)
-            return None
-
-        if not user.has_web_login:
-            raise HTTPException(
-                status_code=status.HTTP_403_FORBIDDEN,
-                detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
-            )
-
-        verified, updated_password_hash = self.password_helper.verify_and_update(
-            credentials.password, user.hashed_password
-        )
-        if not verified:
-            return None
-
-        if updated_password_hash is not None:
-            await self.user_db.update(user, {"hashed_password": updated_password_hash})
-
+        user = await super().authenticate(credentials)
+        if user is None:
+            try:
+                user = await self.get_by_email(credentials.username)
+                if not user.has_web_login:
+                    raise HTTPException(
+                        status_code=status.HTTP_403_FORBIDDEN,
+                        detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
+                    )
+            except exceptions.UserNotExists:
+                pass
        return user


@@ -345,6 +331,7 @@ def get_database_strategy(
    strategy = DatabaseStrategy(
        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS  # type: ignore
    )
+
    return strategy


@@ -427,7 +414,6 @@ async def optional_user(
 async def double_check_user(
    user: User | None,
    optional: bool = DISABLE_AUTH,
-    include_expired: bool = False,
 ) -> User | None:
    if optional:
        return None
@@ -444,11 +430,7 @@ async def double_check_user(
            detail="Access denied. User is not verified.",
        )

-    if (
-        user.oidc_expiry
-        and user.oidc_expiry < datetime.now(timezone.utc)
-        and not include_expired
-    ):
+    if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc):
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Access denied. User's OIDC token has expired.",
@@ -457,12 +439,6 @@ async def double_check_user(
    return user


-async def current_user_with_expired_token(
-    user: User | None = Depends(optional_user),
-) -> User | None:
-    return await double_check_user(user, include_expired=True)
-
-
 async def current_user(
    user: User | None = Depends(optional_user),
 ) -> User | None:
@@ -507,28 +483,3 @@ async def current_admin_user(user: User | None = Depends(current_user)) -> User
 def get_default_admin_user_emails_() -> list[str]:
    # No default seeding available for Danswer MIT
    return []
-
-
-async def control_plane_dep(request: Request) -> None:
-    api_key = request.headers.get("X-API-KEY")
-    if api_key != EXPECTED_API_KEY:
-        logger.warning("Invalid API key")
-        raise HTTPException(status_code=401, detail="Invalid API key")
-
-    auth_header = request.headers.get("Authorization")
-    if not auth_header or not auth_header.startswith("Bearer "):
-        logger.warning("Invalid authorization header")
-        raise HTTPException(status_code=401, detail="Invalid authorization header")
-
-    token = auth_header.split(" ")[1]
-    try:
-        payload = jwt.decode(token, DATA_PLANE_SECRET, algorithms=["HS256"])
-        if payload.get("scope") != "tenant:create":
-            logger.warning("Insufficient permissions")
-            raise HTTPException(status_code=403, detail="Insufficient permissions")
-    except jwt.ExpiredSignatureError:
-        logger.warning("Token has expired")
-        raise HTTPException(status_code=401, detail="Token has expired")
-    except jwt.InvalidTokenError:
-        logger.warning("Invalid token")
-        raise HTTPException(status_code=401, detail="Invalid token")
--- a/backend/danswer/background/celery/celery_app.py
+++ b/backend/danswer/background/celery/celery_app.py
--- a/backend/danswer/background/celery/celery_redis.py
+++ b/backend/danswer/background/celery/celery_redis.py
@@ -15,7 +15,6 @@ from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
 from danswer.configs.constants import DanswerCeleryPriority
 from danswer.configs.constants import DanswerCeleryQueues
 from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
-from danswer.db.document import construct_document_select_for_connector_credential_pair
 from danswer.db.document import (
    construct_document_select_for_connector_credential_pair_by_needs_sync,
 )
@@ -125,7 +124,7 @@ class RedisDocumentSet(RedisObjectHelper):
        last_lock_time = time.monotonic()

        async_results = []
-        stmt = construct_document_select_by_docset(self._id, current_only=False)
+        stmt = construct_document_select_by_docset(self._id)
        for doc in db_session.scalars(stmt).yield_per(1):
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
@@ -135,7 +134,7 @@ class RedisDocumentSet(RedisObjectHelper):
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
+            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
@@ -190,7 +189,7 @@ class RedisUserGroup(RedisObjectHelper):
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
+            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
@@ -212,9 +211,6 @@ class RedisUserGroup(RedisObjectHelper):


 class RedisConnectorCredentialPair(RedisObjectHelper):
-    """This class differs from the default in that the taskset used spans
-    all connectors and is not per connector."""
-
    PREFIX = "connectorsync"
    FENCE_PREFIX = PREFIX + "_fence"
    TASKSET_PREFIX = PREFIX + "_taskset"
@@ -260,7 +256,7 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
                last_lock_time = current_time

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
+            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
            # we prefix the task id so it's easier to keep track of who created the task
            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
@@ -285,183 +281,6 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
        return len(async_results)


-class RedisConnectorDeletion(RedisObjectHelper):
-    PREFIX = "connectordeletion"
-    FENCE_PREFIX = PREFIX + "_fence"
-    TASKSET_PREFIX = PREFIX + "_taskset"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(self._id, db_session)
-        if not cc_pair:
-            return None
-
-        stmt = construct_document_select_for_connector_credential_pair(
-            cc_pair.connector_id, cc_pair.credential_id
-        )
-        for doc in db_session.scalars(stmt).yield_per(1):
-            current_time = time.monotonic()
-            if current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.task_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "document_by_cc_pair_cleanup_task",
-                kwargs=dict(
-                    document_id=doc.id,
-                    connector_id=cc_pair.connector_id,
-                    credential_id=cc_pair.credential_id,
-                ),
-                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-
-class RedisConnectorPruning(RedisObjectHelper):
-    """Celery will kick off a long running generator task to crawl the connector and
-    find any missing docs, which will each then get a new cleanup task. The progress of
-    those tasks will then be monitored to completion.
-
-    Example rough happy path order:
-    Check connectorpruning_fence_1
-    Send generator task with id connectorpruning+generator_1_{uuid}
-
-    generator runs connector with callbacks that increment connectorpruning_generator_progress_1
-    generator creates many subtasks with id connectorpruning+sub_1_{uuid}
-      in taskset connectorpruning_taskset_1
-    on completion, generator sets connectorpruning_generator_complete_1
-
-    celery postrun removes subtasks from taskset
-    monitor beat task cleans up when taskset reaches 0 items
-    """
-
-    PREFIX = "connectorpruning"
-    FENCE_PREFIX = PREFIX + "_fence"  # a fence for the entire pruning process
-    GENERATOR_TASK_PREFIX = PREFIX + "+generator"
-
-    TASKSET_PREFIX = PREFIX + "_taskset"  # stores a list of prune tasks id's
-    SUBTASK_PREFIX = PREFIX + "+sub"
-
-    GENERATOR_PROGRESS_PREFIX = (
-        PREFIX + "_generator_progress"
-    )  # a signal that contains generator progress
-    GENERATOR_COMPLETE_PREFIX = (
-        PREFIX + "_generator_complete"
-    )  # a signal that the generator has finished
-
-    def __init__(self, id: int) -> None:
-        """id: the cc_pair_id of the connector credential pair"""
-
-        super().__init__(id)
-        self.documents_to_prune: set[str] = set()
-
-    @property
-    def generator_task_id_prefix(self) -> str:
-        return f"{self.GENERATOR_TASK_PREFIX}_{self._id}"
-
-    @property
-    def generator_progress_key(self) -> str:
-        # example: connectorpruning_generator_progress_1
-        return f"{self.GENERATOR_PROGRESS_PREFIX}_{self._id}"
-
-    @property
-    def generator_complete_key(self) -> str:
-        # example: connectorpruning_generator_complete_1
-        return f"{self.GENERATOR_COMPLETE_PREFIX}_{self._id}"
-
-    @property
-    def subtask_id_prefix(self) -> str:
-        return f"{self.SUBTASK_PREFIX}_{self._id}"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(self._id, db_session)
-        if not cc_pair:
-            return None
-
-        for doc_id in self.documents_to_prune:
-            current_time = time.monotonic()
-            if lock and current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.subtask_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "document_by_cc_pair_cleanup_task",
-                kwargs=dict(
-                    document_id=doc_id,
-                    connector_id=cc_pair.connector_id,
-                    credential_id=cc_pair.credential_id,
-                ),
-                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-    def is_pruning(self, db_session: Session, redis_client: Redis) -> bool:
-        """A single example of a helper method being refactored into the redis helper"""
-        cc_pair = get_connector_credential_pair_from_id(
-            cc_pair_id=self._id, db_session=db_session
-        )
-        if not cc_pair:
-            raise ValueError(f"cc_pair_id {self._id} does not exist.")
-
-        if redis_client.exists(self.fence_key):
-            return True
-
-        return False
-
-
 def celery_get_queue_length(queue: str, r: Redis) -> int:
    """This is a redis specific way to get the length of a celery queue.
    It is priority aware and knows how to count across the multiple redis lists
--- a/backend/danswer/background/celery/celery_utils.py
+++ b/backend/danswer/background/celery/celery_utils.py
@@ -1,11 +1,11 @@
-from collections.abc import Callable
 from datetime import datetime
 from datetime import timezone
-from typing import Any

 from sqlalchemy.orm import Session

-from danswer.background.celery.celery_redis import RedisConnectorDeletion
+from danswer.background.task_utils import name_cc_cleanup_task
+from danswer.background.task_utils import name_cc_prune_task
+from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
 from danswer.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
@@ -15,38 +15,29 @@ from danswer.connectors.interfaces import IdConnector
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.models import Document
-from danswer.db.connector_credential_pair import get_connector_credential_pair
-from danswer.db.enums import TaskStatus
+from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
+from danswer.db.engine import get_db_current_time
+from danswer.db.enums import ConnectorCredentialPairStatus
+from danswer.db.models import Connector
+from danswer.db.models import ConnectorCredentialPair
+from danswer.db.models import Credential
 from danswer.db.models import TaskQueueState
-from danswer.redis.redis_pool import get_redis_client
+from danswer.db.tasks import check_task_is_live_and_not_timed_out
+from danswer.db.tasks import get_latest_task
+from danswer.db.tasks import get_latest_task_by_type
 from danswer.server.documents.models import DeletionAttemptSnapshot
 from danswer.utils.logger import setup_logger

-
 logger = setup_logger()


 def _get_deletion_status(
    connector_id: int, credential_id: int, db_session: Session
 ) -> TaskQueueState | None:
-    """We no longer store TaskQueueState in the DB for a deletion attempt.
-    This function populates TaskQueueState by just checking redis.
-    """
-    cc_pair = get_connector_credential_pair(
-        connector_id=connector_id, credential_id=credential_id, db_session=db_session
-    )
-    if not cc_pair:
-        return None
-
-    rcd = RedisConnectorDeletion(cc_pair.id)
-
-    r = get_redis_client()
-    if not r.exists(rcd.fence_key):
-        return None
-
-    return TaskQueueState(
-        task_id="", task_name=rcd.fence_key, status=TaskStatus.STARTED
+    cleanup_task_name = name_cc_cleanup_task(
+        connector_id=connector_id, credential_id=credential_id
    )
+    return get_latest_task(task_name=cleanup_task_name, db_session=db_session)


 def get_deletion_attempt_snapshot(
@@ -63,19 +54,78 @@ def get_deletion_attempt_snapshot(
    )


+def should_kick_off_deletion_of_cc_pair(
+    cc_pair: ConnectorCredentialPair, db_session: Session
+) -> bool:
+    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:
+        return False
+
+    if check_deletion_attempt_is_allowed(cc_pair, db_session):
+        return False
+
+    deletion_task = _get_deletion_status(
+        connector_id=cc_pair.connector_id,
+        credential_id=cc_pair.credential_id,
+        db_session=db_session,
+    )
+    if deletion_task and check_task_is_live_and_not_timed_out(
+        deletion_task,
+        db_session,
+        # 1 hour timeout
+        timeout=60 * 60,
+    ):
+        return False
+
+    return True
+
+
+def should_prune_cc_pair(
+    connector: Connector, credential: Credential, db_session: Session
+) -> bool:
+    if not connector.prune_freq:
+        return False
+
+    pruning_task_name = name_cc_prune_task(
+        connector_id=connector.id, credential_id=credential.id
+    )
+    last_pruning_task = get_latest_task(pruning_task_name, db_session)
+    current_db_time = get_db_current_time(db_session)
+
+    if not last_pruning_task:
+        time_since_initialization = current_db_time - connector.time_created
+        if time_since_initialization.total_seconds() >= connector.prune_freq:
+            return True
+        return False
+
+    if not ALLOW_SIMULTANEOUS_PRUNING:
+        pruning_type_task_name = name_cc_prune_task()
+        last_pruning_type_task = get_latest_task_by_type(
+            pruning_type_task_name, db_session
+        )
+
+        if last_pruning_type_task and check_task_is_live_and_not_timed_out(
+            last_pruning_type_task, db_session
+        ):
+            return False
+
+    if check_task_is_live_and_not_timed_out(last_pruning_task, db_session):
+        return False
+
+    if not last_pruning_task.start_time:
+        return False
+
+    time_since_last_pruning = current_db_time - last_pruning_task.start_time
+    return time_since_last_pruning.total_seconds() >= connector.prune_freq
+
+
 def document_batch_to_ids(doc_batch: list[Document]) -> set[str]:
    return {doc.id for doc in doc_batch}


-def extract_ids_from_runnable_connector(
-    runnable_connector: BaseConnector,
-    progress_callback: Callable[[int], None] | None = None,
-) -> set[str]:
+def extract_ids_from_runnable_connector(runnable_connector: BaseConnector) -> set[str]:
    """
    If the PruneConnector hasnt been implemented for the given connector, just pull
-    all docs using the load_from_state and grab out the IDs.
-
-    Optionally, a callback can be passed to handle the length of each document batch.
+    all docs using the load_from_state and grab out the IDs
    """
    all_connector_doc_ids: set[str] = set()

@@ -98,36 +148,6 @@ def extract_ids_from_runnable_connector(
                max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
            )(document_batch_to_ids)
        for doc_batch in doc_batch_generator:
-            if progress_callback:
-                progress_callback(len(doc_batch))
            all_connector_doc_ids.update(doc_batch_processing_func(doc_batch))

    return all_connector_doc_ids
-
-
-def celery_is_listening_to_queue(worker: Any, name: str) -> bool:
-    """Checks to see if we're listening to the named queue"""
-
-    # how to get a list of queues this worker is listening to
-    # https://stackoverflow.com/questions/29790523/how-to-determine-which-queues-a-celery-worker-is-consuming-at-runtime
-    queue_names = list(worker.app.amqp.queues.consume_from.keys())
-    for queue_name in queue_names:
-        if queue_name == name:
-            return True
-
-    return False
-
-
-def celery_is_worker_primary(worker: Any) -> bool:
-    """There are multiple approaches that could be taken to determine if a celery worker
-    is 'primary', as defined by us. But the way we do it is to check the hostname set
-    for the celery worker, which can be done either in celeryconfig.py or on the
-    command line with '--hostname'."""
-    hostname = worker.hostname
-    if hostname.startswith("light"):
-        return False
-
-    if hostname.startswith("heavy"):
-        return False
-
-    return True
--- a/backend/danswer/background/celery/celeryconfig.py
+++ b/backend/danswer/background/celery/celeryconfig.py
@@ -1,17 +1,9 @@
 # docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html
-from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT
-from danswer.configs.app_configs import CELERY_RESULT_EXPIRES
 from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY
-from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND
-from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
 from danswer.configs.app_configs import REDIS_HOST
 from danswer.configs.app_configs import REDIS_PASSWORD
 from danswer.configs.app_configs import REDIS_PORT
-from danswer.configs.app_configs import REDIS_SSL
-from danswer.configs.app_configs import REDIS_SSL_CA_CERTS
-from danswer.configs.app_configs import REDIS_SSL_CERT_REQS
 from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS

 CELERY_SEPARATOR = ":"

@@ -19,79 +11,25 @@ CELERY_PASSWORD_PART = ""
 if REDIS_PASSWORD:
    CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@"

-REDIS_SCHEME = "redis"
-
-# SSL-specific query parameters for Redis URL
-SSL_QUERY_PARAMS = ""
-if REDIS_SSL:
-    REDIS_SCHEME = "rediss"
-    SSL_QUERY_PARAMS = f"?ssl_cert_reqs={REDIS_SSL_CERT_REQS}"
-    if REDIS_SSL_CA_CERTS:
-        SSL_QUERY_PARAMS += f"&ssl_ca_certs={REDIS_SSL_CA_CERTS}"
-
 # example celery_broker_url: "redis://:password@localhost:6379/15"
-broker_url = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}{SSL_QUERY_PARAMS}"
+broker_url = (
+    f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
+)

-result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}"
+result_backend = (
+    f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
+)

-# NOTE: prefetch 4 is significantly faster than prefetch 1 for small tasks
+# NOTE: prefetch 4 is significantly faster than prefetch 1
 # however, prefetching is bad when tasks are lengthy as those tasks
 # can stall other tasks.
 worker_prefetch_multiplier = 4

-broker_connection_retry_on_startup = True
-broker_pool_limit = CELERY_BROKER_POOL_LIMIT
-
-# redis broker settings
-# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html
 broker_transport_options = {
    "priority_steps": list(range(len(DanswerCeleryPriority))),
    "sep": CELERY_SEPARATOR,
    "queue_order_strategy": "priority",
-    "retry_on_timeout": True,
-    "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
-    "socket_keepalive": True,
-    "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS,
 }

-# redis backend settings
-# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
-
-# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend
-redis_socket_keepalive = True
-redis_retry_on_timeout = True
-redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL
-
-
 task_default_priority = DanswerCeleryPriority.MEDIUM
 task_acks_late = True
-
-# It's possible we don't even need celery's result backend, in which case all of the optimization below
-# might be irrelevant
-result_expires = CELERY_RESULT_EXPIRES  # 86400 seconds is the default
-
-# Option 0: Defaults (json serializer, no compression)
-# about 1.5 KB per queued task. 1KB in queue, 400B for result, 100 as a child entry in generator result
-
-# Option 1: Reduces generator task result sizes by roughly 20%
-# task_compression = "bzip2"
-# task_serializer = "pickle"
-# result_compression = "bzip2"
-# result_serializer = "pickle"
-# accept_content=["pickle"]
-
-# Option 2: this significantly reduces the size of the result for generator tasks since the list of children
-# can be large. small tasks change very little
-# def pickle_bz2_encoder(data):
-#     return bz2.compress(pickle.dumps(data))
-
-# def pickle_bz2_decoder(data):
-#     return pickle.loads(bz2.decompress(data))
-
-# from kombu import serialization  # To register custom serialization with Celery/Kombu
-
-# serialization.register('pickle-bzip2', pickle_bz2_encoder, pickle_bz2_decoder, 'application/x-pickle-bz2', 'binary')
-
-# task_serializer = "pickle-bzip2"
-# result_serializer = "pickle-bzip2"
-# accept_content=["pickle", "pickle-bzip2"]
--- a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
@@ -1,110 +0,0 @@
-import redis
-from celery import shared_task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-from sqlalchemy.orm.exc import ObjectDeletedError
-
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.enums import ConnectorCredentialPairStatus
-from danswer.db.models import ConnectorCredentialPair
-from danswer.redis.redis_pool import get_redis_client
-
-
-@shared_task(
-    name="check_for_connector_deletion_task",
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-)
-def check_for_connector_deletion_task() -> None:
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with Session(get_sqlalchemy_engine()) as db_session:
-            cc_pairs = get_connector_credential_pairs(db_session)
-            for cc_pair in cc_pairs:
-                try_generate_document_cc_pair_cleanup_tasks(
-                    cc_pair, db_session, r, lock_beat
-                )
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def try_generate_document_cc_pair_cleanup_tasks(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-) -> int | None:
-    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
-    Note that syncing can still be required even if the number of sync tasks generated is zero.
-    Returns None if no syncing is required.
-    """
-
-    lock_beat.reacquire()
-
-    rcd = RedisConnectorDeletion(cc_pair.id)
-
-    # don't generate sync tasks if tasks are still pending
-    if r.exists(rcd.fence_key):
-        return None
-
-    # we need to refresh the state of the object inside the fence
-    # to avoid a race condition with db.commit/fence deletion
-    # at the end of this taskset
-    try:
-        db_session.refresh(cc_pair)
-    except ObjectDeletedError:
-        return None
-
-    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rcd.taskset_key)
-
-    # Add all documents that need to be updated into the queue
-    task_logger.info(
-        f"RedisConnectorDeletion.generate_tasks starting. cc_pair_id={cc_pair.id}"
-    )
-    tasks_generated = rcd.generate_tasks(celery_app, db_session, r, lock_beat)
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisConnectorDeletion.generate_tasks finished. "
-        f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rcd.fence_key, tasks_generated)
-    return tasks_generated
--- a/backend/danswer/background/celery/tasks/periodic/tasks.py
+++ b/backend/danswer/background/celery/tasks/periodic/tasks.py
@@ -1,137 +0,0 @@
-#####
-# Periodic Tasks
-#####
-import json
-from typing import Any
-
-from celery import shared_task
-from celery.contrib.abortable import AbortableTask  # type: ignore
-from celery.exceptions import TaskRevokedError
-from sqlalchemy import inspect
-from sqlalchemy import text
-from sqlalchemy.orm import Session
-
-from danswer.background.celery.celery_app import task_logger
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import PostgresAdvisoryLocks
-from danswer.db.engine import get_sqlalchemy_engine  # type: ignore
-
-
-@shared_task(
-    name="kombu_message_cleanup_task",
-    soft_time_limit=JOB_TIMEOUT,
-    bind=True,
-    base=AbortableTask,
-)
-def kombu_message_cleanup_task(self: Any) -> int:
-    """Runs periodically to clean up the kombu_message table"""
-
-    # we will select messages older than this amount to clean up
-    KOMBU_MESSAGE_CLEANUP_AGE = 7  # days
-    KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000
-
-    ctx = {}
-    ctx["last_processed_id"] = 0
-    ctx["deleted"] = 0
-    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
-    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
-    with Session(get_sqlalchemy_engine()) as db_session:
-        # Exit the task if we can't take the advisory lock
-        result = db_session.execute(
-            text("SELECT pg_try_advisory_lock(:id)"),
-            {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value},
-        ).scalar()
-        if not result:
-            return 0
-
-        while True:
-            if self.is_aborted():
-                raise TaskRevokedError("kombu_message_cleanup_task was aborted.")
-
-            b = kombu_message_cleanup_task_helper(ctx, db_session)
-            if not b:
-                break
-
-            db_session.commit()
-
-    if ctx["deleted"] > 0:
-        task_logger.info(
-            f"Deleted {ctx['deleted']} orphaned messages from kombu_message."
-        )
-
-    return ctx["deleted"]
-
-
-def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:
-    """
-    Helper function to clean up old messages from the `kombu_message` table that are no longer relevant.
-
-    This function retrieves messages from the `kombu_message` table that are no longer visible and
-    older than a specified interval. It checks if the corresponding task_id exists in the
-    `celery_taskmeta` table. If the task_id does not exist, the message is deleted.
-
-    Args:
-        ctx (dict): A context dictionary containing configuration parameters such as:
-            - 'cleanup_age' (int): The age in days after which messages are considered old.
-            - 'page_limit' (int): The maximum number of messages to process in one batch.
-            - 'last_processed_id' (int): The ID of the last processed message to handle pagination.
-            - 'deleted' (int): A counter to track the number of deleted messages.
-        db_session (Session): The SQLAlchemy database session for executing queries.
-
-    Returns:
-        bool: Returns True if there are more rows to process, False if not.
-    """
-
-    inspector = inspect(db_session.bind)
-    if not inspector:
-        return False
-
-    # With the move to redis as celery's broker and backend, kombu tables may not even exist.
-    # We can fail silently.
-    if not inspector.has_table("kombu_message"):
-        return False
-
-    query = text(
-        """
-    SELECT id, timestamp, payload
-    FROM kombu_message WHERE visible = 'false'
-    AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days
-    AND id > :last_processed_id
-    ORDER BY id
-    LIMIT :page_limit
-"""
-    )
-    kombu_messages = db_session.execute(
-        query,
-        {
-            "interval_days": f"{ctx['cleanup_age']} days",
-            "page_limit": ctx["page_limit"],
-            "last_processed_id": ctx["last_processed_id"],
-        },
-    ).fetchall()
-
-    if len(kombu_messages) == 0:
-        return False
-
-    for msg in kombu_messages:
-        payload = json.loads(msg[2])
-        task_id = payload["headers"]["id"]
-
-        # Check if task_id exists in celery_taskmeta
-        task_exists = db_session.execute(
-            text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"),
-            {"task_id": task_id},
-        ).fetchone()
-
-        # If task_id does not exist, delete the message
-        if not task_exists:
-            result = db_session.execute(
-                text("DELETE FROM kombu_message WHERE id = :message_id"),
-                {"message_id": msg[0]},
-            )
-            if result.rowcount > 0:  # type: ignore
-                ctx["deleted"] += 1
-
-        ctx["last_processed_id"] = msg[0]
-
-    return True
--- a/backend/danswer/background/celery/tasks/pruning/tasks.py
+++ b/backend/danswer/background/celery/tasks/pruning/tasks.py
@@ -1,239 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-from uuid import uuid4
-
-import redis
-from celery import shared_task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector
-from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import DanswerCeleryQueues
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.connectors.factory import instantiate_connector
-from danswer.connectors.models import InputType
-from danswer.db.connector_credential_pair import get_connector_credential_pair
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.document import get_documents_for_connector_credential_pair
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.enums import ConnectorCredentialPairStatus
-from danswer.db.models import ConnectorCredentialPair
-from danswer.redis.redis_pool import get_redis_client
-
-
-@shared_task(
-    name="check_for_prune_task_2",
-    soft_time_limit=JOB_TIMEOUT,
-)
-def check_for_prune_task_2() -> None:
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_PRUNE_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with Session(get_sqlalchemy_engine()) as db_session:
-            cc_pairs = get_connector_credential_pairs(db_session)
-            for cc_pair in cc_pairs:
-                tasks_created = ccpair_pruning_generator_task_creation_helper(
-                    cc_pair, db_session, r, lock_beat
-                )
-                if not tasks_created:
-                    continue
-
-                task_logger.info(f"Pruning started: cc_pair_id={cc_pair.id}")
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def ccpair_pruning_generator_task_creation_helper(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-) -> int | None:
-    """Returns an int if pruning is triggered.
-    The int represents the number of prune tasks generated (in this case, only one
-    because the task is a long running generator task.)
-    Returns None if no pruning is triggered (due to not being needed or
-    other reasons such as simultaneous pruning restrictions.
-
-    Checks for scheduling related conditions, then delegates the rest of the checks to
-    try_creating_prune_generator_task.
-    """
-
-    lock_beat.reacquire()
-
-    # skip pruning if no prune frequency is set
-    # pruning can still be forced via the API which will run a pruning task directly
-    if not cc_pair.connector.prune_freq:
-        return None
-
-    # skip pruning if the next scheduled prune time hasn't been reached yet
-    last_pruned = cc_pair.last_pruned
-    if not last_pruned:
-        # if never pruned, use the connector time created as the last_pruned time
-        last_pruned = cc_pair.connector.time_created
-
-    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)
-    if datetime.now(timezone.utc) < next_prune:
-        return None
-
-    return try_creating_prune_generator_task(cc_pair, db_session, r)
-
-
-def try_creating_prune_generator_task(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-) -> int | None:
-    """Checks for any conditions that should block the pruning generator task from being
-    created, then creates the task.
-
-    Does not check for scheduling related conditions as this function
-    is used to trigger prunes immediately.
-    """
-
-    if not ALLOW_SIMULTANEOUS_PRUNING:
-        for key in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
-            return None
-
-    rcp = RedisConnectorPruning(cc_pair.id)
-
-    # skip pruning if already pruning
-    if r.exists(rcp.fence_key):
-        return None
-
-    # skip pruning if the cc_pair is deleting
-    db_session.refresh(cc_pair)
-    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
-        return None
-
-    # add a long running generator task to the queue
-    r.delete(rcp.generator_complete_key)
-    r.delete(rcp.taskset_key)
-
-    custom_task_id = f"{rcp.generator_task_id_prefix}_{uuid4()}"
-
-    celery_app.send_task(
-        "connector_pruning_generator_task",
-        kwargs=dict(
-            connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id
-        ),
-        queue=DanswerCeleryQueues.CONNECTOR_PRUNING,
-        task_id=custom_task_id,
-        priority=DanswerCeleryPriority.LOW,
-    )
-
-    # set this only after all tasks have been added
-    r.set(rcp.fence_key, 1)
-    return 1
-
-
-@shared_task(name="connector_pruning_generator_task", soft_time_limit=JOB_TIMEOUT)
-def connector_pruning_generator_task(connector_id: int, credential_id: int) -> None:
-    """connector pruning task. For a cc pair, this task pulls all document IDs from the source
-    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
-    from the most recently pulled document ID list"""
-
-    r = get_redis_client()
-
-    with Session(get_sqlalchemy_engine()) as db_session:
-        try:
-            cc_pair = get_connector_credential_pair(
-                db_session=db_session,
-                connector_id=connector_id,
-                credential_id=credential_id,
-            )
-
-            if not cc_pair:
-                task_logger.warning(
-                    f"ccpair not found for {connector_id} {credential_id}"
-                )
-                return
-
-            rcp = RedisConnectorPruning(cc_pair.id)
-
-            # Define the callback function
-            def redis_increment_callback(amount: int) -> None:
-                r.incrby(rcp.generator_progress_key, amount)
-
-            runnable_connector = instantiate_connector(
-                db_session,
-                cc_pair.connector.source,
-                InputType.PRUNE,
-                cc_pair.connector.connector_specific_config,
-                cc_pair.credential,
-            )
-
-            # a list of docs in the source
-            all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector(
-                runnable_connector, redis_increment_callback
-            )
-
-            # a list of docs in our local index
-            all_indexed_document_ids = {
-                doc.id
-                for doc in get_documents_for_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=connector_id,
-                    credential_id=credential_id,
-                )
-            }
-
-            # generate list of docs to remove (no longer in the source)
-            doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
-
-            task_logger.info(
-                f"Pruning set collected: "
-                f"cc_pair_id={cc_pair.id} "
-                f"docs_to_remove={len(doc_ids_to_remove)} "
-                f"doc_source={cc_pair.connector.source}"
-            )
-
-            rcp.documents_to_prune = set(doc_ids_to_remove)
-
-            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks starting. cc_pair_id={cc_pair.id}"
-            )
-            tasks_generated = rcp.generate_tasks(celery_app, db_session, r, None)
-            if tasks_generated is None:
-                return None
-
-            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks finished. "
-                f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-            )
-
-            r.set(rcp.generator_complete_key, tasks_generated)
-        except Exception as e:
-            task_logger.exception(
-                f"Failed to run pruning for connector id {connector_id}."
-            )
-
-            r.delete(rcp.generator_progress_key)
-            r.delete(rcp.taskset_key)
-            r.delete(rcp.fence_key)
-            raise e
--- a/backend/danswer/background/celery/tasks/shared/tasks.py
+++ b/backend/danswer/background/celery/tasks/shared/tasks.py
@@ -1,113 +0,0 @@
-from celery import shared_task
-from celery import Task
-from celery.exceptions import SoftTimeLimitExceeded
-from sqlalchemy.orm import Session
-
-from danswer.access.access import get_access_for_document
-from danswer.background.celery.celery_app import task_logger
-from danswer.db.document import delete_document_by_connector_credential_pair__no_commit
-from danswer.db.document import delete_documents_complete__no_commit
-from danswer.db.document import get_document
-from danswer.db.document import get_document_connector_count
-from danswer.db.document import mark_document_as_synced
-from danswer.db.document_set import fetch_document_sets_for_document
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.document_index.document_index_utils import get_both_index_names
-from danswer.document_index.factory import get_default_document_index
-from danswer.document_index.interfaces import VespaDocumentFields
-from danswer.server.documents.models import ConnectorCredentialPairIdentifier
-
-
-@shared_task(
-    name="document_by_cc_pair_cleanup_task",
-    bind=True,
-    soft_time_limit=45,
-    time_limit=60,
-    max_retries=3,
-)
-def document_by_cc_pair_cleanup_task(
-    self: Task, document_id: str, connector_id: int, credential_id: int
-) -> bool:
-    """A lightweight subtask used to clean up document to cc pair relationships.
-    Created by connection deletion and connector pruning parent tasks."""
-
-    """
-    To delete a connector / credential pair:
-    (1) find all documents associated with connector / credential pair where there
-    this the is only connector / credential pair that has indexed it
-    (2) delete all documents from document stores
-    (3) delete all entries from postgres
-    (4) find all documents associated with connector / credential pair where there
-    are multiple connector / credential pairs that have indexed it
-    (5) update document store entries to remove access associated with the
-    connector / credential pair from the access list
-    (6) delete all relevant entries from postgres
-    """
-    task_logger.info(f"document_id={document_id}")
-
-    try:
-        with Session(get_sqlalchemy_engine()) as db_session:
-            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
-            document_index = get_default_document_index(
-                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
-            )
-
-            count = get_document_connector_count(db_session, document_id)
-            if count == 1:
-                # count == 1 means this is the only remaining cc_pair reference to the doc
-                # delete it from vespa and the db
-                document_index.delete(doc_ids=[document_id])
-                delete_documents_complete__no_commit(
-                    db_session=db_session,
-                    document_ids=[document_id],
-                )
-            elif count > 1:
-                # count > 1 means the document still has cc_pair references
-                doc = get_document(document_id, db_session)
-                if not doc:
-                    return False
-
-                # the below functions do not include cc_pairs being deleted.
-                # i.e. they will correctly omit access for the current cc_pair
-                doc_access = get_access_for_document(
-                    document_id=document_id, db_session=db_session
-                )
-
-                doc_sets = fetch_document_sets_for_document(document_id, db_session)
-                update_doc_sets: set[str] = set(doc_sets)
-
-                fields = VespaDocumentFields(
-                    document_sets=update_doc_sets,
-                    access=doc_access,
-                    boost=doc.boost,
-                    hidden=doc.hidden,
-                )
-
-                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                document_index.update_single(document_id, fields=fields)
-
-                # there are still other cc_pair references to the doc, so just resync to Vespa
-                delete_document_by_connector_credential_pair__no_commit(
-                    db_session=db_session,
-                    document_id=document_id,
-                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
-                        connector_id=connector_id,
-                        credential_id=credential_id,
-                    ),
-                )
-
-                mark_document_as_synced(document_id, db_session)
-            else:
-                pass
-
-            db_session.commit()
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}")
-    except Exception as e:
-        task_logger.exception("Unexpected exception")
-
-        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-        countdown = 2 ** (self.request.retries + 4)
-        self.retry(exc=e, countdown=countdown)
-
-    return True
--- a/backend/danswer/background/celery/tasks/vespa/tasks.py
+++ b/backend/danswer/background/celery/tasks/vespa/tasks.py
@@ -1,576 +0,0 @@
-import traceback
-from typing import cast
-
-import redis
-from celery import shared_task
-from celery import Task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-
-from danswer.access.access import get_access_for_document
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorCredentialPair
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisDocumentSet
-from danswer.background.celery.celery_redis import RedisUserGroup
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.db.connector import fetch_connector_by_id
-from danswer.db.connector import mark_ccpair_as_pruned
-from danswer.db.connector_credential_pair import add_deletion_failure_message
-from danswer.db.connector_credential_pair import (
-    delete_connector_credential_pair__no_commit,
-)
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.document import count_documents_by_needs_sync
-from danswer.db.document import get_document
-from danswer.db.document import mark_document_as_synced
-from danswer.db.document_set import delete_document_set
-from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit
-from danswer.db.document_set import fetch_document_sets
-from danswer.db.document_set import fetch_document_sets_for_document
-from danswer.db.document_set import get_document_set_by_id
-from danswer.db.document_set import mark_document_set_as_synced
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.index_attempt import delete_index_attempts
-from danswer.db.models import DocumentSet
-from danswer.db.models import UserGroup
-from danswer.document_index.document_index_utils import get_both_index_names
-from danswer.document_index.factory import get_default_document_index
-from danswer.document_index.interfaces import UpdateRequest
-from danswer.redis.redis_pool import get_redis_client
-from danswer.utils.variable_functionality import fetch_versioned_implementation
-from danswer.utils.variable_functionality import (
-    fetch_versioned_implementation_with_fallback,
-)
-from danswer.utils.variable_functionality import noop_fallback
-
-
-# celery auto associates tasks created inside another task,
-# which bloats the result metadata considerably. trail=False prevents this.
-@shared_task(
-    name="check_for_vespa_sync_task",
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-)
-def check_for_vespa_sync_task() -> None:
-    """Runs periodically to check if any document needs syncing.
-    Generates sets of tasks for Celery if syncing is needed."""
-
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with Session(get_sqlalchemy_engine()) as db_session:
-            try_generate_stale_document_sync_tasks(db_session, r, lock_beat)
-
-            # check if any document sets are not synced
-            document_set_info = fetch_document_sets(
-                user_id=None, db_session=db_session, include_outdated=True
-            )
-            for document_set, _ in document_set_info:
-                try_generate_document_set_sync_tasks(
-                    document_set, db_session, r, lock_beat
-                )
-
-            # check if any user groups are not synced
-            try:
-                fetch_user_groups = fetch_versioned_implementation(
-                    "danswer.db.user_group", "fetch_user_groups"
-                )
-
-                user_groups = fetch_user_groups(
-                    db_session=db_session, only_up_to_date=False
-                )
-                for usergroup in user_groups:
-                    try_generate_user_group_sync_tasks(
-                        usergroup, db_session, r, lock_beat
-                    )
-            except ModuleNotFoundError:
-                # Always exceptions on the MIT version, which is expected
-                pass
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def try_generate_stale_document_sync_tasks(
-    db_session: Session, r: Redis, lock_beat: redis.lock.Lock
-) -> int | None:
-    # the fence is up, do nothing
-    if r.exists(RedisConnectorCredentialPair.get_fence_key()):
-        return None
-
-    r.delete(RedisConnectorCredentialPair.get_taskset_key())  # delete the taskset
-
-    # add tasks to celery and build up the task set to monitor in redis
-    stale_doc_count = count_documents_by_needs_sync(db_session)
-    if stale_doc_count == 0:
-        return None
-
-    task_logger.info(
-        f"Stale documents found (at least {stale_doc_count}). Generating sync tasks by cc pair."
-    )
-
-    task_logger.info("RedisConnector.generate_tasks starting by cc_pair.")
-
-    # rkuo: we could technically sync all stale docs in one big pass.
-    # but I feel it's more understandable to group the docs by cc_pair
-    total_tasks_generated = 0
-    cc_pairs = get_connector_credential_pairs(db_session)
-    for cc_pair in cc_pairs:
-        rc = RedisConnectorCredentialPair(cc_pair.id)
-        tasks_generated = rc.generate_tasks(celery_app, db_session, r, lock_beat)
-
-        if tasks_generated is None:
-            continue
-
-        if tasks_generated == 0:
-            continue
-
-        task_logger.info(
-            f"RedisConnector.generate_tasks finished for single cc_pair. "
-            f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-        )
-
-        total_tasks_generated += tasks_generated
-
-    task_logger.info(
-        f"RedisConnector.generate_tasks finished for all cc_pairs. total_tasks_generated={total_tasks_generated}"
-    )
-
-    r.set(RedisConnectorCredentialPair.get_fence_key(), total_tasks_generated)
-    return total_tasks_generated
-
-
-def try_generate_document_set_sync_tasks(
-    document_set: DocumentSet, db_session: Session, r: Redis, lock_beat: redis.lock.Lock
-) -> int | None:
-    lock_beat.reacquire()
-
-    rds = RedisDocumentSet(document_set.id)
-
-    # don't generate document set sync tasks if tasks are still pending
-    if r.exists(rds.fence_key):
-        return None
-
-    # don't generate sync tasks if we're up to date
-    # race condition with the monitor/cleanup function if we use a cached result!
-    db_session.refresh(document_set)
-    if document_set.is_up_to_date:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rds.taskset_key)
-
-    task_logger.info(
-        f"RedisDocumentSet.generate_tasks starting. document_set_id={document_set.id}"
-    )
-
-    # Add all documents that need to be updated into the queue
-    tasks_generated = rds.generate_tasks(celery_app, db_session, r, lock_beat)
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisDocumentSet.generate_tasks finished. "
-        f"document_set_id={document_set.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rds.fence_key, tasks_generated)
-    return tasks_generated
-
-
-def try_generate_user_group_sync_tasks(
-    usergroup: UserGroup, db_session: Session, r: Redis, lock_beat: redis.lock.Lock
-) -> int | None:
-    lock_beat.reacquire()
-
-    rug = RedisUserGroup(usergroup.id)
-
-    # don't generate sync tasks if tasks are still pending
-    if r.exists(rug.fence_key):
-        return None
-
-    # race condition with the monitor/cleanup function if we use a cached result!
-    db_session.refresh(usergroup)
-    if usergroup.is_up_to_date:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rug.taskset_key)
-
-    # Add all documents that need to be updated into the queue
-    task_logger.info(
-        f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}"
-    )
-    tasks_generated = rug.generate_tasks(celery_app, db_session, r, lock_beat)
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisUserGroup.generate_tasks finished. "
-        f"usergroup_id={usergroup.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rug.fence_key, tasks_generated)
-    return tasks_generated
-
-
-def monitor_connector_taskset(r: Redis) -> None:
-    fence_value = r.get(RedisConnectorCredentialPair.get_fence_key())
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = r.scard(RedisConnectorCredentialPair.get_taskset_key())
-    task_logger.info(
-        f"Stale document sync progress: remaining={count} initial={initial_count}"
-    )
-    if count == 0:
-        r.delete(RedisConnectorCredentialPair.get_taskset_key())
-        r.delete(RedisConnectorCredentialPair.get_fence_key())
-        task_logger.info(f"Successfully synced stale documents. count={initial_count}")
-
-
-def monitor_document_set_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    document_set_id = RedisDocumentSet.get_id_from_fence_key(fence_key)
-    if document_set_id is None:
-        task_logger.warning(f"could not parse document set id from {fence_key}")
-        return
-
-    rds = RedisDocumentSet(document_set_id)
-
-    fence_value = r.get(rds.fence_key)
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rds.taskset_key))
-    task_logger.info(
-        f"Document set sync progress: document_set_id={document_set_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    document_set = cast(
-        DocumentSet,
-        get_document_set_by_id(db_session=db_session, document_set_id=document_set_id),
-    )  # casting since we "know" a document set with this ID exists
-    if document_set:
-        if not document_set.connector_credential_pairs:
-            # if there are no connectors, then delete the document set.
-            delete_document_set(document_set_row=document_set, db_session=db_session)
-            task_logger.info(
-                f"Successfully deleted document set with ID: '{document_set_id}'!"
-            )
-        else:
-            mark_document_set_as_synced(document_set_id, db_session)
-            task_logger.info(
-                f"Successfully synced document set with ID: '{document_set_id}'!"
-            )
-
-    r.delete(rds.taskset_key)
-    r.delete(rds.fence_key)
-
-
-def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id = RedisConnectorDeletion.get_id_from_fence_key(fence_key)
-    if cc_pair_id is None:
-        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
-        return
-
-    rcd = RedisConnectorDeletion(cc_pair_id)
-
-    fence_value = r.get(rcd.fence_key)
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rcd.taskset_key))
-    task_logger.info(
-        f"Connector deletion progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    with Session(get_sqlalchemy_engine()) as db_session:
-        cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session)
-        if not cc_pair:
-            task_logger.warning(
-                f"monitor_connector_deletion_taskset - cc_pair_id not found: cc_pair_id={cc_pair_id}"
-            )
-            return
-
-        try:
-            # clean up the rest of the related Postgres entities
-            # index attempts
-            delete_index_attempts(
-                db_session=db_session,
-                cc_pair_id=cc_pair.id,
-            )
-
-            # document sets
-            delete_document_set_cc_pair_relationship__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-
-            # user groups
-            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
-                "danswer.db.user_group",
-                "delete_user_group_cc_pair_relationship__no_commit",
-                noop_fallback,
-            )
-            cleanup_user_groups(
-                cc_pair_id=cc_pair.id,
-                db_session=db_session,
-            )
-
-            # finally, delete the cc-pair
-            delete_connector_credential_pair__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-            # if there are no credentials left, delete the connector
-            connector = fetch_connector_by_id(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-            )
-            if not connector or not len(connector.credentials):
-                task_logger.info(
-                    "Found no credentials left for connector, deleting connector"
-                )
-                db_session.delete(connector)
-            db_session.commit()
-        except Exception as e:
-            stack_trace = traceback.format_exc()
-            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
-            add_deletion_failure_message(db_session, cc_pair.id, error_message)
-            task_logger.exception(
-                f"Failed to run connector_deletion. "
-                f"cc_pair_id={cc_pair_id} connector_id={cc_pair.connector_id} credential_id={cc_pair.credential_id}"
-            )
-            raise e
-
-    task_logger.info(
-        f"Successfully deleted cc_pair: "
-        f"cc_pair_id={cc_pair_id} "
-        f"connector_id={cc_pair.connector_id} "
-        f"credential_id={cc_pair.credential_id} "
-        f"docs_deleted={initial_count}"
-    )
-
-    r.delete(rcd.taskset_key)
-    r.delete(rcd.fence_key)
-
-
-def monitor_ccpair_pruning_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id = RedisConnectorPruning.get_id_from_fence_key(fence_key)
-    if cc_pair_id is None:
-        task_logger.warning(
-            f"monitor_connector_pruning_taskset: could not parse cc_pair_id from {fence_key}"
-        )
-        return
-
-    rcp = RedisConnectorPruning(cc_pair_id)
-
-    fence_value = r.get(rcp.fence_key)
-    if fence_value is None:
-        return
-
-    generator_value = r.get(rcp.generator_complete_key)
-    if generator_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, generator_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rcp.taskset_key))
-    task_logger.info(
-        f"Connector pruning progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    mark_ccpair_as_pruned(cc_pair_id, db_session)
-    task_logger.info(
-        f"Successfully pruned connector credential pair. cc_pair_id={cc_pair_id}"
-    )
-
-    r.delete(rcp.taskset_key)
-    r.delete(rcp.generator_progress_key)
-    r.delete(rcp.generator_complete_key)
-    r.delete(rcp.fence_key)
-
-
-@shared_task(name="monitor_vespa_sync", soft_time_limit=300)
-def monitor_vespa_sync() -> None:
-    """This is a celery beat task that monitors and finalizes metadata sync tasksets.
-    It scans for fence values and then gets the counts of any associated tasksets.
-    If the count is 0, that means all tasks finished and we should clean up.
-
-    This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't
-    do anything too expensive in this function!
-    """
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # prevent overlapping tasks
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        if r.exists(RedisConnectorCredentialPair.get_fence_key()):
-            monitor_connector_taskset(r)
-
-        for key_bytes in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"):
-            monitor_connector_deletion_taskset(key_bytes, r)
-
-        with Session(get_sqlalchemy_engine()) as db_session:
-            for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
-                monitor_document_set_taskset(key_bytes, r, db_session)
-
-            for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
-                monitor_usergroup_taskset = (
-                    fetch_versioned_implementation_with_fallback(
-                        "danswer.background.celery.tasks.vespa.tasks",
-                        "monitor_usergroup_taskset",
-                        noop_fallback,
-                    )
-                )
-                monitor_usergroup_taskset(key_bytes, r, db_session)
-
-            for key_bytes in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
-                monitor_ccpair_pruning_taskset(key_bytes, r, db_session)
-
-        # uncomment for debugging if needed
-        # r_celery = celery_app.broker_connection().channel().client
-        # length = celery_get_queue_length(DanswerCeleryQueues.VESPA_METADATA_SYNC, r_celery)
-        # task_logger.warning(f"queue={DanswerCeleryQueues.VESPA_METADATA_SYNC} length={length}")
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-@shared_task(
-    name="vespa_metadata_sync_task",
-    bind=True,
-    soft_time_limit=45,
-    time_limit=60,
-    max_retries=3,
-)
-def vespa_metadata_sync_task(self: Task, document_id: str) -> bool:
-    task_logger.info(f"document_id={document_id}")
-
-    try:
-        with Session(get_sqlalchemy_engine()) as db_session:
-            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
-            document_index = get_default_document_index(
-                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
-            )
-
-            doc = get_document(document_id, db_session)
-            if not doc:
-                return False
-
-            # document set sync
-            doc_sets = fetch_document_sets_for_document(document_id, db_session)
-            update_doc_sets: set[str] = set(doc_sets)
-
-            # User group sync
-            doc_access = get_access_for_document(
-                document_id=document_id, db_session=db_session
-            )
-            update_request = UpdateRequest(
-                document_ids=[document_id],
-                document_sets=update_doc_sets,
-                access=doc_access,
-                boost=doc.boost,
-                hidden=doc.hidden,
-            )
-
-            # update Vespa
-            document_index.update(update_requests=[update_request])
-
-            # update db last. Worst case = we crash right before this and
-            # the sync might repeat again later
-            mark_document_as_synced(document_id, db_session)
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}")
-    except Exception as e:
-        task_logger.exception("Unexpected exception")
-
-        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-        countdown = 2 ** (self.request.retries + 4)
-        self.retry(exc=e, countdown=countdown)
-
-    return True
--- a/backend/danswer/background/connector_deletion.py
+++ b/backend/danswer/background/connector_deletion.py
@@ -0,0 +1,195 @@
+"""
+To delete a connector / credential pair:
+(1) find all documents associated with connector / credential pair where there
+this the is only connector / credential pair that has indexed it
+(2) delete all documents from document stores
+(3) delete all entries from postgres
+(4) find all documents associated with connector / credential pair where there
+are multiple connector / credential pairs that have indexed it
+(5) update document store entries to remove access associated with the
+connector / credential pair from the access list
+(6) delete all relevant entries from postgres
+"""
+from sqlalchemy.orm import Session
+
+from danswer.access.access import get_access_for_documents
+from danswer.db.connector import fetch_connector_by_id
+from danswer.db.connector_credential_pair import (
+    delete_connector_credential_pair__no_commit,
+)
+from danswer.db.document import delete_document_by_connector_credential_pair__no_commit
+from danswer.db.document import delete_documents_complete__no_commit
+from danswer.db.document import get_document_connector_cnts
+from danswer.db.document import get_documents_for_connector_credential_pair
+from danswer.db.document import prepare_to_modify_documents
+from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit
+from danswer.db.document_set import fetch_document_sets_for_documents
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.index_attempt import delete_index_attempts
+from danswer.db.models import ConnectorCredentialPair
+from danswer.document_index.interfaces import DocumentIndex
+from danswer.document_index.interfaces import UpdateRequest
+from danswer.server.documents.models import ConnectorCredentialPairIdentifier
+from danswer.utils.logger import setup_logger
+from danswer.utils.variable_functionality import (
+    fetch_versioned_implementation_with_fallback,
+)
+from danswer.utils.variable_functionality import noop_fallback
+
+logger = setup_logger()
+
+_DELETION_BATCH_SIZE = 1000
+
+
+def delete_connector_credential_pair_batch(
+    document_ids: list[str],
+    connector_id: int,
+    credential_id: int,
+    document_index: DocumentIndex,
+) -> None:
+    """
+    Removes a batch of documents ids from a cc-pair. If no other cc-pair uses a document anymore
+    it gets permanently deleted.
+    """
+    with Session(get_sqlalchemy_engine()) as db_session:
+        # acquire lock for all documents in this batch so that indexing can't
+        # override the deletion
+        with prepare_to_modify_documents(
+            db_session=db_session, document_ids=document_ids
+        ):
+            document_connector_cnts = get_document_connector_cnts(
+                db_session=db_session, document_ids=document_ids
+            )
+
+            # figure out which docs need to be completely deleted
+            document_ids_to_delete = [
+                document_id for document_id, cnt in document_connector_cnts if cnt == 1
+            ]
+            logger.debug(f"Deleting documents: {document_ids_to_delete}")
+
+            document_index.delete(doc_ids=document_ids_to_delete)
+
+            delete_documents_complete__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_delete,
+            )
+
+            # figure out which docs need to be updated
+            document_ids_to_update = [
+                document_id for document_id, cnt in document_connector_cnts if cnt > 1
+            ]
+
+            # maps document id to list of document set names
+            new_doc_sets_for_documents: dict[str, set[str]] = {
+                document_id_and_document_set_names_tuple[0]: set(
+                    document_id_and_document_set_names_tuple[1]
+                )
+                for document_id_and_document_set_names_tuple in fetch_document_sets_for_documents(
+                    db_session=db_session,
+                    document_ids=document_ids_to_update,
+                )
+            }
+
+            # determine future ACLs for documents in batch
+            access_for_documents = get_access_for_documents(
+                document_ids=document_ids_to_update,
+                db_session=db_session,
+            )
+
+            # update Vespa
+            logger.debug(f"Updating documents: {document_ids_to_update}")
+            update_requests = [
+                UpdateRequest(
+                    document_ids=[document_id],
+                    access=access,
+                    document_sets=new_doc_sets_for_documents[document_id],
+                )
+                for document_id, access in access_for_documents.items()
+            ]
+            document_index.update(update_requests=update_requests)
+
+            # clean up Postgres
+            delete_document_by_connector_credential_pair__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_update,
+                connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
+                    connector_id=connector_id,
+                    credential_id=credential_id,
+                ),
+            )
+            db_session.commit()
+
+
+def delete_connector_credential_pair(
+    db_session: Session,
+    document_index: DocumentIndex,
+    cc_pair: ConnectorCredentialPair,
+) -> int:
+    connector_id = cc_pair.connector_id
+    credential_id = cc_pair.credential_id
+
+    num_docs_deleted = 0
+    while True:
+        documents = get_documents_for_connector_credential_pair(
+            db_session=db_session,
+            connector_id=connector_id,
+            credential_id=credential_id,
+            limit=_DELETION_BATCH_SIZE,
+        )
+        if not documents:
+            break
+
+        delete_connector_credential_pair_batch(
+            document_ids=[document.id for document in documents],
+            connector_id=connector_id,
+            credential_id=credential_id,
+            document_index=document_index,
+        )
+        num_docs_deleted += len(documents)
+
+    # clean up the rest of the related Postgres entities
+    # index attempts
+    delete_index_attempts(
+        db_session=db_session,
+        cc_pair_id=cc_pair.id,
+    )
+
+    # document sets
+    delete_document_set_cc_pair_relationship__no_commit(
+        db_session=db_session,
+        connector_id=connector_id,
+        credential_id=credential_id,
+    )
+
+    # user groups
+    cleanup_user_groups = fetch_versioned_implementation_with_fallback(
+        "danswer.db.user_group",
+        "delete_user_group_cc_pair_relationship__no_commit",
+        noop_fallback,
+    )
+    cleanup_user_groups(
+        cc_pair_id=cc_pair.id,
+        db_session=db_session,
+    )
+
+    # finally, delete the cc-pair
+    delete_connector_credential_pair__no_commit(
+        db_session=db_session,
+        connector_id=connector_id,
+        credential_id=credential_id,
+    )
+    # if there are no credentials left, delete the connector
+    connector = fetch_connector_by_id(
+        db_session=db_session,
+        connector_id=connector_id,
+    )
+    if not connector or not len(connector.credentials):
+        logger.info("Found no credentials left for connector, deleting connector")
+        db_session.delete(connector)
+    db_session.commit()
+
+    logger.notice(
+        "Successfully deleted connector_credential_pair with connector_id:"
+        f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs."
+    )
+    return num_docs_deleted
--- a/backend/danswer/background/indexing/run_indexing.py
+++ b/backend/danswer/background/indexing/run_indexing.py
@@ -14,7 +14,6 @@ from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
 from danswer.connectors.connector_runner import ConnectorRunner
 from danswer.connectors.factory import instantiate_connector
 from danswer.connectors.models import IndexAttemptMetadata
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
 from danswer.db.connector_credential_pair import get_last_successful_attempt_time
 from danswer.db.connector_credential_pair import update_connector_credential_pair
 from danswer.db.engine import get_sqlalchemy_engine
@@ -30,7 +29,6 @@ from danswer.db.models import IndexingStatus
 from danswer.db.models import IndexModelStatus
 from danswer.document_index.factory import get_default_document_index
 from danswer.indexing.embedder import DefaultIndexingEmbedder
-from danswer.indexing.indexing_heartbeat import IndexingHeartbeat
 from danswer.indexing.indexing_pipeline import build_indexing_pipeline
 from danswer.utils.logger import IndexAttemptSingleton
 from danswer.utils.logger import setup_logger
@@ -50,7 +48,7 @@ def _get_connector_runner(
    """
    NOTE: `start_time` and `end_time` are only used for poll connectors

-    Returns an iterator of document batches and whether the returned documents
+    Returns an interator of document batches and whether the returned documents
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """
@@ -58,27 +56,22 @@ def _get_connector_runner(

    try:
        runnable_connector = instantiate_connector(
-            db_session=db_session,
-            source=attempt.connector_credential_pair.connector.source,
-            input_type=task,
-            connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config,
-            credential=attempt.connector_credential_pair.credential,
+            attempt.connector_credential_pair.connector.source,
+            task,
+            attempt.connector_credential_pair.connector.connector_specific_config,
+            attempt.connector_credential_pair.credential,
+            db_session,
        )
    except Exception as e:
        logger.exception(f"Unable to instantiate connector due to {e}")
        # since we failed to even instantiate the connector, we pause the CCPair since
        # it will never succeed
-
-        cc_pair = get_connector_credential_pair_from_id(
-            attempt.connector_credential_pair.id, db_session
+        update_connector_credential_pair(
+            db_session=db_session,
+            connector_id=attempt.connector_credential_pair.connector.id,
+            credential_id=attempt.connector_credential_pair.credential.id,
+            status=ConnectorCredentialPairStatus.PAUSED,
        )
-        if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:
-            update_connector_credential_pair(
-                db_session=db_session,
-                connector_id=attempt.connector_credential_pair.connector.id,
-                credential_id=attempt.connector_credential_pair.credential.id,
-                status=ConnectorCredentialPairStatus.PAUSED,
-            )
        raise e

    return ConnectorRunner(
@@ -110,24 +103,15 @@ def _run_indexing(
    )

    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
-        search_settings=search_settings,
-        heartbeat=IndexingHeartbeat(
-            index_attempt_id=index_attempt.id,
-            db_session=db_session,
-            # let the world know we're still making progress after
-            # every 10 batches
-            freq=10,
-        ),
+        search_settings=search_settings
    )

    indexing_pipeline = build_indexing_pipeline(
        attempt_id=index_attempt.id,
        embedder=embedding_model,
        document_index=document_index,
-        ignore_time_skip=(
-            index_attempt.from_beginning
-            or (search_settings.status == IndexModelStatus.FUTURE)
-        ),
+        ignore_time_skip=index_attempt.from_beginning
+        or (search_settings.status == IndexModelStatus.FUTURE),
        db_session=db_session,
    )

--- a/backend/danswer/background/task_utils.py
+++ b/backend/danswer/background/task_utils.py
@@ -14,6 +14,14 @@ from danswer.db.tasks import mark_task_start
 from danswer.db.tasks import register_task


+def name_cc_cleanup_task(connector_id: int, credential_id: int) -> str:
+    return f"cleanup_connector_credential_pair_{connector_id}_{credential_id}"
+
+
+def name_document_set_sync_task(document_set_id: int) -> str:
+    return f"sync_doc_set_{document_set_id}"
+
+
 def name_cc_prune_task(
    connector_id: int | None = None, credential_id: int | None = None
 ) -> str:
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -23,7 +23,7 @@ from danswer.db.connector import fetch_connectors
 from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
 from danswer.db.engine import get_db_current_time
 from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.engine import SqlEngine
+from danswer.db.engine import init_sqlalchemy_engine
 from danswer.db.index_attempt import create_index_attempt
 from danswer.db.index_attempt import get_index_attempt
 from danswer.db.index_attempt import get_inprogress_index_attempts
@@ -96,20 +96,14 @@ def _should_create_new_indexing(
            if last_index.status == IndexingStatus.IN_PROGRESS:
                return False
        else:
-            if (
-                connector.id == 0 or connector.source == DocumentSource.INGESTION_API
-            ):  # Ingestion API
+            if connector.id == 0:  # Ingestion API
                return False
        return True

    # If the connector is paused or is the ingestion API, don't index
    # NOTE: during an embedding model switch over, the following logic
    # is bypassed by the above check for a future model
-    if (
-        not cc_pair.status.is_active()
-        or connector.id == 0
-        or connector.source == DocumentSource.INGESTION_API
-    ):
+    if not cc_pair.status.is_active() or connector.id == 0:
        return False

    if not last_index:
@@ -217,6 +211,7 @@ def cleanup_indexing_jobs(
    timeout_hours: int = CLEANUP_INDEXING_JOBS_TIMEOUT,
 ) -> dict[int, Future | SimpleJob]:
    existing_jobs_copy = existing_jobs.copy()
+
    # clean up completed jobs
    with Session(get_sqlalchemy_engine()) as db_session:
        for attempt_id, job in existing_jobs.items():
@@ -317,12 +312,7 @@ def kickoff_indexing_jobs(

    indexing_attempt_count = 0

-    primary_client_full = False
-    secondary_client_full = False
    for attempt, search_settings in new_indexing_attempts:
-        if primary_client_full and secondary_client_full:
-            break
-
        use_secondary_index = (
            search_settings.status == IndexModelStatus.FUTURE
            if search_settings is not None
@@ -347,28 +337,22 @@ def kickoff_indexing_jobs(
                )
            continue

-        if not use_secondary_index:
-            if not primary_client_full:
-                run = client.submit(
-                    run_indexing_entrypoint,
-                    attempt.id,
-                    attempt.connector_credential_pair_id,
-                    global_version.get_is_ee_version(),
-                    pure=False,
-                )
-                if not run:
-                    primary_client_full = True
+        if use_secondary_index:
+            run = secondary_client.submit(
+                run_indexing_entrypoint,
+                attempt.id,
+                attempt.connector_credential_pair_id,
+                global_version.get_is_ee_version(),
+                pure=False,
+            )
        else:
-            if not secondary_client_full:
-                run = secondary_client.submit(
-                    run_indexing_entrypoint,
-                    attempt.id,
-                    attempt.connector_credential_pair_id,
-                    global_version.get_is_ee_version(),
-                    pure=False,
-                )
-                if not run:
-                    secondary_client_full = True
+            run = client.submit(
+                run_indexing_entrypoint,
+                attempt.id,
+                attempt.connector_credential_pair_id,
+                global_version.get_is_ee_version(),
+                pure=False,
+            )

        if run:
            if indexing_attempt_count == 0:
@@ -422,7 +406,6 @@ def update_loop(
            warm_up_bi_encoder(
                embedding_model=embedding_model,
            )
-            logger.notice("First inference complete.")

    client_primary: Client | SimpleJobClient
    client_secondary: Client | SimpleJobClient
@@ -451,7 +434,6 @@ def update_loop(

    existing_jobs: dict[int, Future | SimpleJob] = {}

-    logger.notice("Startup complete. Waiting for indexing jobs...")
    while True:
        start = time.time()
        start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
@@ -483,9 +465,7 @@ def update_loop(

 def update__main() -> None:
    set_is_ee_based_on_env_variable()
-
-    # initialize the Postgres connection pool
-    SqlEngine.set_app_name(POSTGRES_INDEXER_APP_NAME)
+    init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)

    logger.notice("Starting indexing service")
    update_loop()
--- a/backend/danswer/chat/load_yamls.py
+++ b/backend/danswer/chat/load_yamls.py
@@ -6,6 +6,7 @@ from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from danswer.configs.chat_configs import PERSONAS_YAML
 from danswer.configs.chat_configs import PROMPTS_YAML
 from danswer.db.document_set import get_or_create_document_set_by_name
+from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.input_prompt import insert_input_prompt_if_not_exists
 from danswer.db.models import DocumentSet as DocumentSetDBModel
 from danswer.db.models import Persona
@@ -17,32 +18,30 @@ from danswer.db.persona import upsert_prompt
 from danswer.search.enums import RecencyBiasSetting


-def load_prompts_from_yaml(
-    db_session: Session, prompts_yaml: str = PROMPTS_YAML
-) -> None:
+def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None:
    with open(prompts_yaml, "r") as file:
        data = yaml.safe_load(file)

    all_prompts = data.get("prompts", [])
-    for prompt in all_prompts:
-        upsert_prompt(
-            user=None,
-            prompt_id=prompt.get("id"),
-            name=prompt["name"],
-            description=prompt["description"].strip(),
-            system_prompt=prompt["system"].strip(),
-            task_prompt=prompt["task"].strip(),
-            include_citations=prompt["include_citations"],
-            datetime_aware=prompt.get("datetime_aware", True),
-            default_prompt=True,
-            personas=None,
-            db_session=db_session,
-            commit=True,
-        )
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for prompt in all_prompts:
+            upsert_prompt(
+                user=None,
+                prompt_id=prompt.get("id"),
+                name=prompt["name"],
+                description=prompt["description"].strip(),
+                system_prompt=prompt["system"].strip(),
+                task_prompt=prompt["task"].strip(),
+                include_citations=prompt["include_citations"],
+                datetime_aware=prompt.get("datetime_aware", True),
+                default_prompt=True,
+                personas=None,
+                db_session=db_session,
+                commit=True,
+            )


 def load_personas_from_yaml(
-    db_session: Session,
    personas_yaml: str = PERSONAS_YAML,
    default_chunks: float = MAX_CHUNKS_FED_TO_CHAT,
 ) -> None:
@@ -50,117 +49,117 @@ def load_personas_from_yaml(
        data = yaml.safe_load(file)

    all_personas = data.get("personas", [])
-    for persona in all_personas:
-        doc_set_names = persona["document_sets"]
-        doc_sets: list[DocumentSetDBModel] = [
-            get_or_create_document_set_by_name(db_session, name)
-            for name in doc_set_names
-        ]
-
-        # Assume if user hasn't set any document sets for the persona, the user may want
-        # to later attach document sets to the persona manually, therefore, don't overwrite/reset
-        # the document sets for the persona
-        doc_set_ids: list[int] | None = None
-        if doc_sets:
-            doc_set_ids = [doc_set.id for doc_set in doc_sets]
-        else:
-            doc_set_ids = None
-
-        prompt_ids: list[int] | None = None
-        prompt_set_names = persona["prompts"]
-        if prompt_set_names:
-            prompts: list[PromptDBModel | None] = [
-                get_prompt_by_name(prompt_name, user=None, db_session=db_session)
-                for prompt_name in prompt_set_names
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for persona in all_personas:
+            doc_set_names = persona["document_sets"]
+            doc_sets: list[DocumentSetDBModel] = [
+                get_or_create_document_set_by_name(db_session, name)
+                for name in doc_set_names
            ]
-            if any([prompt is None for prompt in prompts]):
-                raise ValueError("Invalid Persona configs, not all prompts exist")

-            if prompts:
-                prompt_ids = [prompt.id for prompt in prompts if prompt is not None]
+            # Assume if user hasn't set any document sets for the persona, the user may want
+            # to later attach document sets to the persona manually, therefore, don't overwrite/reset
+            # the document sets for the persona
+            doc_set_ids: list[int] | None = None
+            if doc_sets:
+                doc_set_ids = [doc_set.id for doc_set in doc_sets]
+            else:
+                doc_set_ids = None

-        p_id = persona.get("id")
-        tool_ids = []
-        if persona.get("image_generation"):
-            image_gen_tool = (
-                db_session.query(ToolDBModel)
-                .filter(ToolDBModel.name == "ImageGenerationTool")
+            prompt_ids: list[int] | None = None
+            prompt_set_names = persona["prompts"]
+            if prompt_set_names:
+                prompts: list[PromptDBModel | None] = [
+                    get_prompt_by_name(prompt_name, user=None, db_session=db_session)
+                    for prompt_name in prompt_set_names
+                ]
+                if any([prompt is None for prompt in prompts]):
+                    raise ValueError("Invalid Persona configs, not all prompts exist")
+
+                if prompts:
+                    prompt_ids = [prompt.id for prompt in prompts if prompt is not None]
+
+            p_id = persona.get("id")
+            tool_ids = []
+            if persona.get("image_generation"):
+                image_gen_tool = (
+                    db_session.query(ToolDBModel)
+                    .filter(ToolDBModel.name == "ImageGenerationTool")
+                    .first()
+                )
+                if image_gen_tool:
+                    tool_ids.append(image_gen_tool.id)
+
+            llm_model_provider_override = persona.get("llm_model_provider_override")
+            llm_model_version_override = persona.get("llm_model_version_override")
+
+            # Set specific overrides for image generation persona
+            if persona.get("image_generation"):
+                llm_model_version_override = "gpt-4o"
+
+            existing_persona = (
+                db_session.query(Persona)
+                .filter(Persona.name == persona["name"])
                .first()
            )
-            if image_gen_tool:
-                tool_ids.append(image_gen_tool.id)

-        llm_model_provider_override = persona.get("llm_model_provider_override")
-        llm_model_version_override = persona.get("llm_model_version_override")
-
-        # Set specific overrides for image generation persona
-        if persona.get("image_generation"):
-            llm_model_version_override = "gpt-4o"
-
-        existing_persona = (
-            db_session.query(Persona).filter(Persona.name == persona["name"]).first()
-        )
-
-        upsert_persona(
-            user=None,
-            persona_id=(-1 * p_id) if p_id is not None else None,
-            name=persona["name"],
-            description=persona["description"],
-            num_chunks=persona.get("num_chunks")
-            if persona.get("num_chunks") is not None
-            else default_chunks,
-            llm_relevance_filter=persona.get("llm_relevance_filter"),
-            starter_messages=persona.get("starter_messages"),
-            llm_filter_extraction=persona.get("llm_filter_extraction"),
-            icon_shape=persona.get("icon_shape"),
-            icon_color=persona.get("icon_color"),
-            llm_model_provider_override=llm_model_provider_override,
-            llm_model_version_override=llm_model_version_override,
-            recency_bias=RecencyBiasSetting(persona["recency_bias"]),
-            prompt_ids=prompt_ids,
-            document_set_ids=doc_set_ids,
-            tool_ids=tool_ids,
-            builtin_persona=True,
-            is_public=True,
-            display_priority=existing_persona.display_priority
-            if existing_persona is not None
-            else persona.get("display_priority"),
-            is_visible=existing_persona.is_visible
-            if existing_persona is not None
-            else persona.get("is_visible"),
-            db_session=db_session,
-        )
+            upsert_persona(
+                user=None,
+                persona_id=(-1 * p_id) if p_id is not None else None,
+                name=persona["name"],
+                description=persona["description"],
+                num_chunks=persona.get("num_chunks")
+                if persona.get("num_chunks") is not None
+                else default_chunks,
+                llm_relevance_filter=persona.get("llm_relevance_filter"),
+                starter_messages=persona.get("starter_messages"),
+                llm_filter_extraction=persona.get("llm_filter_extraction"),
+                icon_shape=persona.get("icon_shape"),
+                icon_color=persona.get("icon_color"),
+                llm_model_provider_override=llm_model_provider_override,
+                llm_model_version_override=llm_model_version_override,
+                recency_bias=RecencyBiasSetting(persona["recency_bias"]),
+                prompt_ids=prompt_ids,
+                document_set_ids=doc_set_ids,
+                tool_ids=tool_ids,
+                default_persona=True,
+                is_public=True,
+                display_priority=existing_persona.display_priority
+                if existing_persona is not None
+                else persona.get("display_priority"),
+                is_visible=existing_persona.is_visible
+                if existing_persona is not None
+                else persona.get("is_visible"),
+                db_session=db_session,
+            )


-def load_input_prompts_from_yaml(
-    db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
-) -> None:
+def load_input_prompts_from_yaml(input_prompts_yaml: str = INPUT_PROMPT_YAML) -> None:
    with open(input_prompts_yaml, "r") as file:
        data = yaml.safe_load(file)

    all_input_prompts = data.get("input_prompts", [])
-    for input_prompt in all_input_prompts:
-        # If these prompts are deleted (which is a hard delete in the DB), on server startup
-        # they will be recreated, but the user can always just deactivate them, just a light inconvenience
-
-        insert_input_prompt_if_not_exists(
-            user=None,
-            input_prompt_id=input_prompt.get("id"),
-            prompt=input_prompt["prompt"],
-            content=input_prompt["content"],
-            is_public=input_prompt["is_public"],
-            active=input_prompt.get("active", True),
-            db_session=db_session,
-            commit=True,
-        )
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for input_prompt in all_input_prompts:
+            # If these prompts are deleted (which is a hard delete in the DB), on server startup
+            # they will be recreated, but the user can always just deactivate them, just a light inconvenience
+            insert_input_prompt_if_not_exists(
+                user=None,
+                input_prompt_id=input_prompt.get("id"),
+                prompt=input_prompt["prompt"],
+                content=input_prompt["content"],
+                is_public=input_prompt["is_public"],
+                active=input_prompt.get("active", True),
+                db_session=db_session,
+                commit=True,
+            )


 def load_chat_yamls(
-    db_session: Session,
    prompt_yaml: str = PROMPTS_YAML,
    personas_yaml: str = PERSONAS_YAML,
    input_prompts_yaml: str = INPUT_PROMPT_YAML,
 ) -> None:
-    load_prompts_from_yaml(db_session, prompt_yaml)
-    load_personas_from_yaml(db_session, personas_yaml)
-    load_input_prompts_from_yaml(db_session, input_prompts_yaml)
+    load_prompts_from_yaml(prompt_yaml)
+    load_personas_from_yaml(personas_yaml)
+    load_input_prompts_from_yaml(input_prompts_yaml)
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@@ -60,11 +60,7 @@ class StreamStopInfo(BaseModel):


 class LLMRelevanceFilterResponse(BaseModel):
-    llm_selected_doc_indices: list[int]
-
-
-class FinalUsedContextDocsResponse(BaseModel):
-    final_context_docs: list[LlmDoc]
+    relevant_chunk_indices: list[int]


 class RelevanceAnalysis(BaseModel):
@@ -97,16 +93,6 @@ class CitationInfo(BaseModel):
    document_id: str


-class AllCitations(BaseModel):
-    citations: list[CitationInfo]
-
-
-# This is a mapping of the citation number to the document index within
-# the result search doc set
-class MessageSpecificCitations(BaseModel):
-    citation_map: dict[int, int]
-
-
 class MessageResponseIDInfo(BaseModel):
    user_message_id: int | None
    reserved_assistant_message_id: int
@@ -152,7 +138,7 @@ class QAResponse(SearchResponse, DanswerAnswer):
    predicted_flow: QueryFlow
    predicted_search: SearchType
    eval_res_valid: bool | None = None
-    llm_selected_doc_indices: list[int] | None = None
+    llm_chunks_indices: list[int] | None = None
    error_msg: str | None = None


--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -7,15 +7,12 @@ from typing import cast
 from sqlalchemy.orm import Session

 from danswer.chat.chat_utils import create_chat_chain
-from danswer.chat.models import AllCitations
 from danswer.chat.models import CitationInfo
 from danswer.chat.models import CustomToolResponse
 from danswer.chat.models import DanswerAnswerPiece
-from danswer.chat.models import FinalUsedContextDocsResponse
 from danswer.chat.models import ImageGenerationDisplay
 from danswer.chat.models import LLMRelevanceFilterResponse
 from danswer.chat.models import MessageResponseIDInfo
-from danswer.chat.models import MessageSpecificCitations
 from danswer.chat.models import QADocsResponse
 from danswer.chat.models import StreamingError
 from danswer.configs.chat_configs import BING_API_KEY
@@ -73,9 +70,7 @@ from danswer.server.query_and_chat.models import ChatMessageDetail
 from danswer.server.query_and_chat.models import CreateChatMessageRequest
 from danswer.server.utils import get_json_line
 from danswer.tools.built_in_tools import get_built_in_tool_by_id
-from danswer.tools.custom.custom_tool import (
-    build_custom_tools_from_openapi_schema_and_headers,
-)
+from danswer.tools.custom.custom_tool import build_custom_tools_from_openapi_schema
 from danswer.tools.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID
 from danswer.tools.custom.custom_tool import CustomToolCallSummary
 from danswer.tools.force import ForceUseTool
@@ -90,8 +85,6 @@ from danswer.tools.internet_search.internet_search_tool import (
 )
 from danswer.tools.internet_search.internet_search_tool import InternetSearchResponse
 from danswer.tools.internet_search.internet_search_tool import InternetSearchTool
-from danswer.tools.models import DynamicSchemaInfo
-from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID
 from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID
 from danswer.tools.search.search_tool import SearchResponseSummary
 from danswer.tools.search.search_tool import SearchTool
@@ -107,9 +100,9 @@ from danswer.utils.timing import log_generator_function_time
 logger = setup_logger()


-def _translate_citations(
+def translate_citations(
    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
-) -> MessageSpecificCitations:
+) -> dict[int, int]:
    """Always cites the first instance of the document_id, assumes the db_docs
    are sorted in the order displayed in the UI"""
    doc_id_to_saved_doc_id_map: dict[str, int] = {}
@@ -124,7 +117,7 @@ def _translate_citations(
                citation.citation_num
            ] = doc_id_to_saved_doc_id_map[citation.document_id]

-    return MessageSpecificCitations(citation_map=citation_to_saved_doc_id_map)
+    return citation_to_saved_doc_id_map


 def _handle_search_tool_response_summary(
@@ -246,14 +239,11 @@ ChatPacket = (
    StreamingError
    | QADocsResponse
    | LLMRelevanceFilterResponse
-    | FinalUsedContextDocsResponse
    | ChatMessageDetail
    | DanswerAnswerPiece
-    | AllCitations
    | CitationInfo
    | ImageGenerationDisplay
    | CustomToolResponse
-    | MessageSpecificCitations
    | MessageResponseIDInfo
 )
 ChatPacketStream = Iterator[ChatPacket]
@@ -273,7 +263,6 @@ def stream_chat_message_objects(
    use_existing_user_message: bool = False,
    litellm_additional_headers: dict[str, str] | None = None,
    is_connected: Callable[[], bool] | None = None,
-    enforce_chat_session_id_for_search_docs: bool = True,
 ) -> ChatPacketStream:
    """Streams in order:
    1. [conditional] Retrieved documents if a search needs to be run
@@ -445,7 +434,6 @@ def stream_chat_message_objects(
                chat_session=chat_session,
                user_id=user_id,
                db_session=db_session,
-                enforce_chat_session_id_for_search_docs=enforce_chat_session_id_for_search_docs,
            )

            # Generates full documents currently
@@ -609,13 +597,8 @@ def stream_chat_message_objects(
            if db_tool_model.openapi_schema:
                tool_dict[db_tool_model.id] = cast(
                    list[Tool],
-                    build_custom_tools_from_openapi_schema_and_headers(
-                        db_tool_model.openapi_schema,
-                        dynamic_schema_info=DynamicSchemaInfo(
-                            chat_session_id=chat_session_id,
-                            message_id=user_message.id if user_message else None,
-                        ),
-                        custom_headers=db_tool_model.custom_headers,
+                    build_custom_tools_from_openapi_schema(
+                        db_tool_model.openapi_schema
                    ),
                )

@@ -680,11 +663,9 @@ def stream_chat_message_objects(
                        db_session=db_session,
                        selected_search_docs=selected_db_search_docs,
                        # Deduping happens at the last step to avoid harming quality by dropping content early on
-                        dedupe_docs=(
-                            retrieval_options.dedupe_docs
-                            if retrieval_options
-                            else False
-                        ),
+                        dedupe_docs=retrieval_options.dedupe_docs
+                        if retrieval_options
+                        else False,
                    )
                    yield qa_docs_response
                elif packet.id == SECTION_RELEVANCE_LIST_ID:
@@ -707,14 +688,9 @@ def stream_chat_message_objects(
                            )

                        yield LLMRelevanceFilterResponse(
-                            llm_selected_doc_indices=llm_indices
+                            relevant_chunk_indices=llm_indices
                        )

-                elif packet.id == FINAL_CONTEXT_DOCUMENTS_ID:
-                    yield FinalUsedContextDocsResponse(
-                        final_context_docs=packet.response
-                    )
-
                elif packet.id == IMAGE_GENERATION_RESPONSE_ID:
                    img_generation_response = cast(
                        list[ImageGenerationResponse], packet.response
@@ -751,18 +727,10 @@ def stream_chat_message_objects(
                    tool_result = packet
                yield cast(ChatPacket, packet)
        logger.debug("Reached end of stream")
-    except ValueError as e:
-        logger.exception("Failed to process chat message.")
-
-        error_msg = str(e)
-        yield StreamingError(error=error_msg)
-        db_session.rollback()
-        return
-
    except Exception as e:
-        logger.exception("Failed to process chat message.")
-
        error_msg = str(e)
+        logger.exception(f"Failed to process chat message: {error_msg}")
+
        stack_trace = traceback.format_exc()
        client_error_msg = litellm_exception_to_error_msg(e, llm)
        if llm.config.api_key and len(llm.config.api_key) > 2:
@@ -775,13 +743,12 @@ def stream_chat_message_objects(

    # Post-LLM answer processing
    try:
-        message_specific_citations: MessageSpecificCitations | None = None
+        db_citations = None
        if reference_db_search_docs:
-            message_specific_citations = _translate_citations(
+            db_citations = translate_citations(
                citations_list=answer.citations,
                db_docs=reference_db_search_docs,
            )
-            yield AllCitations(citations=answer.citations)

        # Saving Gen AI answer and responding with message info
        tool_name_to_tool_id: dict[str, int] = {}
@@ -798,22 +765,18 @@ def stream_chat_message_objects(
            reference_docs=reference_db_search_docs,
            files=ai_message_files,
            token_count=len(llm_tokenizer_encode_func(answer.llm_answer)),
-            citations=message_specific_citations.citation_map
-            if message_specific_citations
-            else None,
+            citations=db_citations,
            error=None,
-            tool_calls=(
-                [
-                    ToolCall(
-                        tool_id=tool_name_to_tool_id[tool_result.tool_name],
-                        tool_name=tool_result.tool_name,
-                        tool_arguments=tool_result.tool_args,
-                        tool_result=tool_result.tool_result,
-                    )
-                ]
-                if tool_result
-                else []
-            ),
+            tool_calls=[
+                ToolCall(
+                    tool_id=tool_name_to_tool_id[tool_result.tool_name],
+                    tool_name=tool_result.tool_name,
+                    tool_arguments=tool_result.tool_args,
+                    tool_result=tool_result.tool_result,
+                )
+            ]
+            if tool_result
+            else [],
        )

        logger.debug("Committing messages")
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -126,7 +126,6 @@ try:
 except ValueError:
    INDEX_BATCH_SIZE = 16

-
 # Below are intended to match the env variables names used by the official postgres docker image
 # https://hub.docker.com/_/postgres
 POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres"
@@ -138,12 +137,6 @@ POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"

-POSTGRES_API_SERVER_POOL_SIZE = int(
-    os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40
-)
-POSTGRES_API_SERVER_POOL_OVERFLOW = int(
-    os.environ.get("POSTGRES_API_SERVER_POOL_OVERFLOW") or 10
-)
 # defaults to False
 POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true"

@@ -156,7 +149,6 @@ try:
 except ValueError:
    POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT

-REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true"
 REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
 REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
 REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""
@@ -165,33 +157,7 @@ REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""
 REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER", 0))

 # Used by celery as broker and backend
-REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int(
-    os.environ.get("REDIS_DB_NUMBER_CELERY_RESULT_BACKEND", 14)
-)
-REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))  # broker
-
-# will propagate to both our redis client as well as celery's redis client
-REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60))
-
-# our redis client only, not celery's
-REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128))
-
-# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
-# should be one of "required", "optional", or "none"
-REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none")
-REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None)
-
-CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400))  # seconds
-
-# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit
-# Setting to None may help when there is a proxy in the way closing idle connections
-CELERY_BROKER_POOL_LIMIT_DEFAULT = 10
-try:
-    CELERY_BROKER_POOL_LIMIT = int(
-        os.environ.get("CELERY_BROKER_POOL_LIMIT", CELERY_BROKER_POOL_LIMIT_DEFAULT)
-    )
-except ValueError:
-    CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT
+REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))

 #####
 # Connector Configs
@@ -269,10 +235,6 @@ JIRA_CONNECTOR_LABELS_TO_SKIP = [
    for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
    if ignored_tag
 ]
-# Maximum size for Jira tickets in bytes (default: 100KB)
-JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
-    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
-)

 GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

@@ -296,7 +258,7 @@ ALLOW_SIMULTANEOUS_PRUNING = (
    os.environ.get("ALLOW_SIMULTANEOUS_PRUNING", "").lower() == "true"
 )

-# This is the maximum rate at which documents are queried for a pruning job. 0 disables the limitation.
+# This is the maxiumum rate at which documents are queried for a pruning job. 0 disables the limitation.
 MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int(
    os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0)
 )
@@ -360,10 +322,12 @@ INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL", 0))
 # exception without aborting the attempt.
 INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT", 0))

-
 #####
 # Miscellaneous
 #####
+# File based Key Value store no longer used
+DYNAMIC_CONFIG_STORE = "PostgresBackedDynamicConfigStore"
+
 JOB_TIMEOUT = 60 * 60 * 6  # 6 hours default
 # used to allow the background indexing jobs to use a different embedding
 # model server than the API server
@@ -412,11 +376,3 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
 ENTERPRISE_EDITION_ENABLED = (
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
 )
-
-
-MULTI_TENANT = os.environ.get("MULTI_TENANT", "").lower() == "true"
-SECRET_JWT_KEY = os.environ.get("SECRET_JWT_KEY", "")
-
-
-DATA_PLANE_SECRET = os.environ.get("DATA_PLANE_SECRET", "")
-EXPECTED_API_KEY = os.environ.get("EXPECTED_API_KEY", "")
--- a/backend/danswer/configs/chat_configs.py
+++ b/backend/danswer/configs/chat_configs.py
@@ -83,15 +83,11 @@ DISABLE_LLM_DOC_RELEVANCE = (
 # Stops streaming answers back to the UI if this pattern is seen:
 STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None

-# Set this to "true" to hard delete chats
-# This will make chats unviewable by admins after a user deletes them
-# As opposed to soft deleting them, which just hides them from non-admin users
-HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"
+# The backend logic for this being True isn't fully supported yet
+HARD_DELETE_CHATS = False

 # Internet Search
 BING_API_KEY = os.environ.get("BING_API_KEY") or None

 # Enable in-house model for detecting connector-based filtering in queries
 ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)
-
-VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -1,5 +1,3 @@
-import platform
-import socket
 from enum import auto
 from enum import Enum

@@ -36,12 +34,9 @@ POSTGRES_WEB_APP_NAME = "web"
 POSTGRES_INDEXER_APP_NAME = "indexer"
 POSTGRES_CELERY_APP_NAME = "celery"
 POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat"
-POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = "celery_worker_primary"
-POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
-POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
+POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker"
 POSTGRES_PERMISSIONS_APP_NAME = "permissions"
 POSTGRES_UNKNOWN_APP_NAME = "unknown"
-POSTGRES_DEFAULT_SCHEMA = "public"

 # API Keys
 DANSWER_API_KEY_PREFIX = "API_KEY__"
@@ -51,7 +46,6 @@ UNNAMED_KEY_PLACEHOLDER = "Unnamed"
 # Key-Value store keys
 KV_REINDEX_KEY = "needs_reindexing"
 KV_SEARCH_SETTINGS = "search_settings"
-KV_UNSTRUCTURED_API_KEY = "unstructured_api_key"
 KV_USER_STORE_KEY = "INVITED_USERS"
 KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences"
 KV_CRED_KEY = "credential_id_{}"
@@ -68,7 +62,6 @@ KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings"
 KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__"

 CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 60
-CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120


 class DocumentSource(str, Enum):
@@ -106,12 +99,10 @@ class DocumentSource(str, Enum):
    CLICKUP = "clickup"
    MEDIAWIKI = "mediawiki"
    WIKIPEDIA = "wikipedia"
-    ASANA = "asana"
    S3 = "s3"
    R2 = "r2"
    GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
    OCI_STORAGE = "oci_storage"
-    XENFORO = "xenforo"
    NOT_APPLICABLE = "not_applicable"


@@ -142,12 +133,6 @@ class AuthType(str, Enum):
    SAML = "saml"


-class SessionType(str, Enum):
-    CHAT = "Chat"
-    SEARCH = "Search"
-    SLACK = "Slack"
-
-
 class QAFeedbackType(str, Enum):
    LIKE = "like"  # User likes the answer, used for metrics
    DISLIKE = "dislike"  # User dislikes the answer, used for metrics
@@ -187,17 +172,15 @@ class PostgresAdvisoryLocks(Enum):


 class DanswerCeleryQueues:
+    VESPA_DOCSET_SYNC_GENERATOR = "vespa_docset_sync_generator"
+    VESPA_USERGROUP_SYNC_GENERATOR = "vespa_usergroup_sync_generator"
    VESPA_METADATA_SYNC = "vespa_metadata_sync"
    CONNECTOR_DELETION = "connector_deletion"
-    CONNECTOR_PRUNING = "connector_pruning"


 class DanswerRedisLocks:
-    PRIMARY_WORKER = "da_lock:primary_worker"
    CHECK_VESPA_SYNC_BEAT_LOCK = "da_lock:check_vespa_sync_beat"
    MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat"
-    CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat"
-    CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"


 class DanswerCeleryPriority(int, Enum):
@@ -206,13 +189,3 @@ class DanswerCeleryPriority(int, Enum):
    MEDIUM = auto()
    LOW = auto()
    LOWEST = auto()
-
-
-REDIS_SOCKET_KEEPALIVE_OPTIONS = {}
-REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15
-REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3
-
-if platform.system() == "Darwin":
-    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60  # type: ignore
-else:
-    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60  # type: ignore
--- a/backend/danswer/configs/model_configs.py
+++ b/backend/danswer/configs/model_configs.py
@@ -39,13 +39,9 @@ SIM_SCORE_RANGE_HIGH = float(os.environ.get("SIM_SCORE_RANGE_HIGH") or 1.0)
 ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ")
 ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ")
 # Purely an optimization, memory limitation consideration
-
-# User's set embedding batch size overrides the default encoding batch sizes
-EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None
-
-BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8
+BATCH_SIZE_ENCODE_CHUNKS = 8
 # don't send over too many chunks at once, as sending too many could cause timeouts
-BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512
+BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512
 # For score display purposes, only way is to know the expected ranges
 CROSS_ENCODER_RANGE_MAX = 1
 CROSS_ENCODER_RANGE_MIN = 0
--- a/backend/danswer/connectors/asana/asana_api.py
+++ b/backend/danswer/connectors/asana/asana_api.py
@@ -1,233 +0,0 @@
-import time
-from collections.abc import Iterator
-from datetime import datetime
-from typing import Dict
-
-import asana  # type: ignore
-
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-# https://github.com/Asana/python-asana/tree/master?tab=readme-ov-file#documentation-for-api-endpoints
-class AsanaTask:
-    def __init__(
-        self,
-        id: str,
-        title: str,
-        text: str,
-        link: str,
-        last_modified: datetime,
-        project_gid: str,
-        project_name: str,
-    ) -> None:
-        self.id = id
-        self.title = title
-        self.text = text
-        self.link = link
-        self.last_modified = last_modified
-        self.project_gid = project_gid
-        self.project_name = project_name
-
-    def __str__(self) -> str:
-        return f"ID: {self.id}\nTitle: {self.title}\nLast modified: {self.last_modified}\nText: {self.text}"
-
-
-class AsanaAPI:
-    def __init__(
-        self, api_token: str, workspace_gid: str, team_gid: str | None
-    ) -> None:
-        self._user = None  # type: ignore
-        self.workspace_gid = workspace_gid
-        self.team_gid = team_gid
-
-        self.configuration = asana.Configuration()
-        self.api_client = asana.ApiClient(self.configuration)
-        self.tasks_api = asana.TasksApi(self.api_client)
-        self.stories_api = asana.StoriesApi(self.api_client)
-        self.users_api = asana.UsersApi(self.api_client)
-        self.project_api = asana.ProjectsApi(self.api_client)
-        self.workspaces_api = asana.WorkspacesApi(self.api_client)
-
-        self.api_error_count = 0
-        self.configuration.access_token = api_token
-        self.task_count = 0
-
-    def get_tasks(
-        self, project_gids: list[str] | None, start_date: str
-    ) -> Iterator[AsanaTask]:
-        """Get all tasks from the projects with the given gids that were modified since the given date.
-        If project_gids is None, get all tasks from all projects in the workspace."""
-        logger.info("Starting to fetch Asana projects")
-        projects = self.project_api.get_projects(
-            opts={
-                "workspace": self.workspace_gid,
-                "opt_fields": "gid,name,archived,modified_at",
-            }
-        )
-        start_seconds = int(time.mktime(datetime.now().timetuple()))
-        projects_list = []
-        project_count = 0
-        for project_info in projects:
-            project_gid = project_info["gid"]
-            if project_gids is None or project_gid in project_gids:
-                projects_list.append(project_gid)
-            else:
-                logger.debug(
-                    f"Skipping project: {project_gid} - not in accepted project_gids"
-                )
-            project_count += 1
-            if project_count % 100 == 0:
-                logger.info(f"Processed {project_count} projects")
-
-        logger.info(f"Found {len(projects_list)} projects to process")
-        for project_gid in projects_list:
-            for task in self._get_tasks_for_project(
-                project_gid, start_date, start_seconds
-            ):
-                yield task
-        logger.info(f"Completed fetching {self.task_count} tasks from Asana")
-        if self.api_error_count > 0:
-            logger.warning(
-                f"Encountered {self.api_error_count} API errors during task fetching"
-            )
-
-    def _get_tasks_for_project(
-        self, project_gid: str, start_date: str, start_seconds: int
-    ) -> Iterator[AsanaTask]:
-        project = self.project_api.get_project(project_gid, opts={})
-        if project["archived"]:
-            logger.info(f"Skipping archived project: {project['name']} ({project_gid})")
-            return []
-        if not project["team"] or not project["team"]["gid"]:
-            logger.info(
-                f"Skipping project without a team: {project['name']} ({project_gid})"
-            )
-            return []
-        if project["privacy_setting"] == "private":
-            if self.team_gid and project["team"]["gid"] != self.team_gid:
-                logger.info(
-                    f"Skipping private project not in configured team: {project['name']} ({project_gid})"
-                )
-                return []
-            else:
-                logger.info(
-                    f"Processing private project in configured team: {project['name']} ({project_gid})"
-                )
-
-        simple_start_date = start_date.split(".")[0].split("+")[0]
-        logger.info(
-            f"Fetching tasks modified since {simple_start_date} for project: {project['name']} ({project_gid})"
-        )
-
-        opts = {
-            "opt_fields": "name,memberships,memberships.project,completed_at,completed_by,created_at,"
-            "created_by,custom_fields,dependencies,due_at,due_on,external,html_notes,liked,likes,"
-            "modified_at,notes,num_hearts,parent,projects,resource_subtype,resource_type,start_on,"
-            "workspace,permalink_url",
-            "modified_since": start_date,
-        }
-        tasks_from_api = self.tasks_api.get_tasks_for_project(project_gid, opts)
-        for data in tasks_from_api:
-            self.task_count += 1
-            if self.task_count % 10 == 0:
-                end_seconds = time.mktime(datetime.now().timetuple())
-                runtime_seconds = end_seconds - start_seconds
-                if runtime_seconds > 0:
-                    logger.info(
-                        f"Processed {self.task_count} tasks in {runtime_seconds:.0f} seconds "
-                        f"({self.task_count / runtime_seconds:.2f} tasks/second)"
-                    )
-
-            logger.debug(f"Processing Asana task: {data['name']}")
-
-            text = self._construct_task_text(data)
-
-            try:
-                text += self._fetch_and_add_comments(data["gid"])
-
-                last_modified_date = self.format_date(data["modified_at"])
-                text += f"Last modified: {last_modified_date}\n"
-
-                task = AsanaTask(
-                    id=data["gid"],
-                    title=data["name"],
-                    text=text,
-                    link=data["permalink_url"],
-                    last_modified=datetime.fromisoformat(data["modified_at"]),
-                    project_gid=project_gid,
-                    project_name=project["name"],
-                )
-                yield task
-            except Exception:
-                logger.error(
-                    f"Error processing task {data['gid']} in project {project_gid}",
-                    exc_info=True,
-                )
-                self.api_error_count += 1
-
-    def _construct_task_text(self, data: Dict) -> str:
-        text = f"{data['name']}\n\n"
-
-        if data["notes"]:
-            text += f"{data['notes']}\n\n"
-
-        if data["created_by"] and data["created_by"]["gid"]:
-            creator = self.get_user(data["created_by"]["gid"])["name"]
-            created_date = self.format_date(data["created_at"])
-            text += f"Created by: {creator} on {created_date}\n"
-
-        if data["due_on"]:
-            due_date = self.format_date(data["due_on"])
-            text += f"Due date: {due_date}\n"
-
-        if data["completed_at"]:
-            completed_date = self.format_date(data["completed_at"])
-            text += f"Completed on: {completed_date}\n"
-
-        text += "\n"
-        return text
-
-    def _fetch_and_add_comments(self, task_gid: str) -> str:
-        text = ""
-        stories_opts: Dict[str, str] = {}
-        story_start = time.time()
-        stories = self.stories_api.get_stories_for_task(task_gid, stories_opts)
-
-        story_count = 0
-        comment_count = 0
-
-        for story in stories:
-            story_count += 1
-            if story["resource_subtype"] == "comment_added":
-                comment = self.stories_api.get_story(
-                    story["gid"], opts={"opt_fields": "text,created_by,created_at"}
-                )
-                commenter = self.get_user(comment["created_by"]["gid"])["name"]
-                text += f"Comment by {commenter}: {comment['text']}\n\n"
-                comment_count += 1
-
-        story_duration = time.time() - story_start
-        logger.debug(
-            f"Processed {story_count} stories (including {comment_count} comments) in {story_duration:.2f} seconds"
-        )
-
-        return text
-
-    def get_user(self, user_gid: str) -> Dict:
-        if self._user is not None:
-            return self._user
-        self._user = self.users_api.get_user(user_gid, {"opt_fields": "name,email"})
-
-        if not self._user:
-            logger.warning(f"Unable to fetch user information for user_gid: {user_gid}")
-            return {"name": "Unknown"}
-        return self._user
-
-    def format_date(self, date_str: str) -> str:
-        date = datetime.fromisoformat(date_str)
-        return time.strftime("%Y-%m-%d", date.timetuple())
-
-    def get_time(self) -> str:
-        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
--- a/backend/danswer/connectors/asana/connector.py
+++ b/backend/danswer/connectors/asana/connector.py
@@ -1,120 +0,0 @@
-import datetime
-from typing import Any
-
-from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
-from danswer.configs.app_configs import INDEX_BATCH_SIZE
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.asana import asana_api
-from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import LoadConnector
-from danswer.connectors.interfaces import PollConnector
-from danswer.connectors.interfaces import SecondsSinceUnixEpoch
-from danswer.connectors.models import Document
-from danswer.connectors.models import Section
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class AsanaConnector(LoadConnector, PollConnector):
-    def __init__(
-        self,
-        asana_workspace_id: str,
-        asana_project_ids: str | None = None,
-        asana_team_id: str | None = None,
-        batch_size: int = INDEX_BATCH_SIZE,
-        continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
-    ) -> None:
-        self.workspace_id = asana_workspace_id
-        self.project_ids_to_index: list[str] | None = (
-            asana_project_ids.split(",") if asana_project_ids is not None else None
-        )
-        self.asana_team_id = asana_team_id
-        self.batch_size = batch_size
-        self.continue_on_failure = continue_on_failure
-        logger.info(
-            f"AsanaConnector initialized with workspace_id: {asana_workspace_id}"
-        )
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        self.api_token = credentials["asana_api_token_secret"]
-        self.asana_client = asana_api.AsanaAPI(
-            api_token=self.api_token,
-            workspace_gid=self.workspace_id,
-            team_gid=self.asana_team_id,
-        )
-        logger.info("Asana credentials loaded and API client initialized")
-        return None
-
-    def poll_source(
-        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch | None
-    ) -> GenerateDocumentsOutput:
-        start_time = datetime.datetime.fromtimestamp(start).isoformat()
-        logger.info(f"Starting Asana poll from {start_time}")
-        asana = asana_api.AsanaAPI(
-            api_token=self.api_token,
-            workspace_gid=self.workspace_id,
-            team_gid=self.asana_team_id,
-        )
-        docs_batch: list[Document] = []
-        tasks = asana.get_tasks(self.project_ids_to_index, start_time)
-
-        for task in tasks:
-            doc = self._message_to_doc(task)
-            docs_batch.append(doc)
-
-            if len(docs_batch) >= self.batch_size:
-                logger.info(f"Yielding batch of {len(docs_batch)} documents")
-                yield docs_batch
-                docs_batch = []
-
-        if docs_batch:
-            logger.info(f"Yielding final batch of {len(docs_batch)} documents")
-            yield docs_batch
-
-        logger.info("Asana poll completed")
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        logger.notice("Starting full index of all Asana tasks")
-        return self.poll_source(start=0, end=None)
-
-    def _message_to_doc(self, task: asana_api.AsanaTask) -> Document:
-        logger.debug(f"Converting Asana task {task.id} to Document")
-        return Document(
-            id=task.id,
-            sections=[Section(link=task.link, text=task.text)],
-            doc_updated_at=task.last_modified,
-            source=DocumentSource.ASANA,
-            semantic_identifier=task.title,
-            metadata={
-                "group": task.project_gid,
-                "project": task.project_name,
-            },
-        )
-
-
-if __name__ == "__main__":
-    import time
-    import os
-
-    logger.notice("Starting Asana connector test")
-    connector = AsanaConnector(
-        os.environ["WORKSPACE_ID"],
-        os.environ["PROJECT_IDS"],
-        os.environ["TEAM_ID"],
-    )
-    connector.load_credentials(
-        {
-            "asana_api_token_secret": os.environ["API_TOKEN"],
-        }
-    )
-    logger.info("Loading all documents from Asana")
-    all_docs = connector.load_from_state()
-    current = time.time()
-    one_day_ago = current - 24 * 60 * 60  # 1 day
-    logger.info("Polling for documents updated in the last 24 hours")
-    latest_docs = connector.poll_source(one_day_ago, current)
-    for docs in latest_docs:
-        for doc in docs:
-            print(doc.id)
-    logger.notice("Asana connector test completed")
--- a/backend/danswer/connectors/blob/connector.py
+++ b/backend/danswer/connectors/blob/connector.py
@@ -194,8 +194,8 @@ class BlobStorageConnector(LoadConnector, PollConnector):

                try:
                    text = extract_file_text(
+                        name,
                        BytesIO(downloaded_file),
-                        file_name=name,
                        break_on_unprocessable=False,
                    )
                    batch.append(
--- a/backend/danswer/connectors/confluence/confluence_utils.py
+++ b/backend/danswer/connectors/confluence/confluence_utils.py
@@ -1,32 +0,0 @@
-import bs4
-
-
-def build_confluence_document_id(base_url: str, content_url: str) -> str:
-    """For confluence, the document id is the page url for a page based document
-        or the attachment download url for an attachment based document
-
-    Args:
-        base_url (str): The base url of the Confluence instance
-        content_url (str): The url of the page or attachment download url
-
-    Returns:
-        str: The document id
-    """
-    return f"{base_url}{content_url}"
-
-
-def get_used_attachments(text: str) -> list[str]:
-    """Parse a Confluence html page to generate a list of current
-        attachment in used
-
-    Args:
-        text (str): The page content
-
-    Returns:
-        list[str]: List of filenames currently in use by the page text
-    """
-    files_in_used = []
-    soup = bs4.BeautifulSoup(text, "html.parser")
-    for attachment in soup.findAll("ri:attachment"):
-        files_in_used.append(attachment.attrs["ri:filename"])
-    return files_in_used
--- a/backend/danswer/connectors/confluence/connector.py
+++ b/backend/danswer/connectors/confluence/connector.py
@@ -22,10 +22,6 @@ from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING
 from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
-from danswer.connectors.confluence.confluence_utils import (
-    build_confluence_document_id,
-)
-from danswer.connectors.confluence.confluence_utils import get_used_attachments
 from danswer.connectors.confluence.rate_limit_handler import (
    make_confluence_call_handle_rate_limit,
 )
@@ -109,6 +105,24 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str:
    return format_document_soup(soup)


+def get_used_attachments(text: str, confluence_client: Confluence) -> list[str]:
+    """Parse a Confluence html page to generate a list of current
+        attachment in used
+
+    Args:
+        text (str): The page content
+        confluence_client (Confluence): Confluence client
+
+    Returns:
+        list[str]: List of filename currently in used
+    """
+    files_in_used = []
+    soup = bs4.BeautifulSoup(text, "html.parser")
+    for attachment in soup.findAll("ri:attachment"):
+        files_in_used.append(attachment.attrs["ri:filename"])
+    return files_in_used
+
+
 def _comment_dfs(
    comments_str: str,
    comment_pages: Collection[dict[str, Any]],
@@ -519,9 +533,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            return None

        extracted_text = extract_file_text(
-            io.BytesIO(response.content),
-            file_name=attachment["title"],
-            break_on_unprocessable=False,
+            attachment["title"], io.BytesIO(response.content), False
        )
        if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
            logger.warning(
@@ -612,22 +624,19 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            page_html = (
                page["body"].get("storage", page["body"].get("view", {})).get("value")
            )
-            # The url and the id are the same
-            page_url = build_confluence_document_id(
-                self.wiki_base, page["_links"]["webui"]
-            )
+            page_url = self.wiki_base + page["_links"]["webui"]
            if not page_html:
                logger.debug("Page is empty, skipping: %s", page_url)
                continue
            page_text = parse_html_page(page_html, self.confluence_client)

-            files_in_used = get_used_attachments(page_html)
+            files_in_used = get_used_attachments(page_html, self.confluence_client)
            attachment_text, unused_page_attachments = self._fetch_attachments(
                self.confluence_client, page_id, files_in_used
            )
            unused_attachments.extend(unused_page_attachments)

-            page_text += "\n" + attachment_text if attachment_text else ""
+            page_text += attachment_text
            comments_text = self._fetch_comments(self.confluence_client, page_id)
            page_text += comments_text
            doc_metadata: dict[str, str | list[str]] = {"Wiki Space Name": self.space}
@@ -674,9 +683,8 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            if time_filter and not time_filter(last_updated):
                continue

-            # The url and the id are the same
-            attachment_url = build_confluence_document_id(
-                self.wiki_base, attachment["_links"]["download"]
+            attachment_url = self._attachment_to_download_link(
+                self.confluence_client, attachment
            )
            attachment_content = self._attachment_to_content(
                self.confluence_client, attachment
--- a/backend/danswer/connectors/confluence/rate_limit_handler.py
+++ b/backend/danswer/connectors/confluence/rate_limit_handler.py
@@ -50,12 +50,6 @@ def make_confluence_call_handle_rate_limit(confluence_call: F) -> F:
                            pass

                    if retry_after is not None:
-                        if retry_after > 600:
-                            logger.warning(
-                                f"Clamping retry_after from {retry_after} to {max_delay} seconds..."
-                            )
-                            retry_after = max_delay
-
                        logger.warning(
                            f"Rate limit hit. Retrying after {retry_after} seconds..."
                        )
--- a/backend/danswer/connectors/danswer_jira/connector.py
+++ b/backend/danswer/connectors/danswer_jira/connector.py
@@ -9,7 +9,6 @@ from jira.resources import Issue

 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
-from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -135,18 +134,10 @@ def fetch_jira_issues_batch(
            else extract_text_from_adf(jira.raw["fields"]["description"])
        )
        comments = _get_comment_strs(jira, comment_email_blacklist)
-        ticket_content = f"{description}\n" + "\n".join(
+        semantic_rep = f"{description}\n" + "\n".join(
            [f"Comment: {comment}" for comment in comments if comment]
        )

-        # Check ticket size
-        if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
-            logger.info(
-                f"Skipping {jira.key} because it exceeds the maximum size of "
-                f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
-            )
-            continue
-
        page_url = f"{jira_client.client_info()}/browse/{jira.key}"

        people = set()
@@ -189,7 +180,7 @@ def fetch_jira_issues_batch(
        doc_batch.append(
            Document(
                id=page_url,
-                sections=[Section(link=page_url, text=ticket_content)],
+                sections=[Section(link=page_url, text=semantic_rep)],
                source=DocumentSource.JIRA,
                semantic_identifier=jira.fields.summary,
                doc_updated_at=time_str_to_utc(jira.fields.updated),
@@ -245,12 +236,10 @@ class JiraConnector(LoadConnector, PollConnector):
        if self.jira_client is None:
            raise ConnectorMissingCredentialError("Jira")

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
        start_ind = 0
        while True:
            doc_batch, fetched_batch_size = fetch_jira_issues_batch(
-                jql=f"project = {quoted_project}",
+                jql=f"project = {self.jira_project}",
                start_index=start_ind,
                jira_client=self.jira_client,
                batch_size=self.batch_size,
@@ -278,10 +267,8 @@ class JiraConnector(LoadConnector, PollConnector):
            "%Y-%m-%d %H:%M"
        )

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
        jql = (
-            f"project = {quoted_project} AND "
+            f"project = {self.jira_project} AND "
            f"updated >= '{start_date_str}' AND "
            f"updated <= '{end_date_str}'"
        )
--- a/backend/danswer/connectors/dropbox/connector.py
+++ b/backend/danswer/connectors/dropbox/connector.py
@@ -97,8 +97,8 @@ class DropboxConnector(LoadConnector, PollConnector):
                    link = self._get_shared_link(entry.path_display)
                    try:
                        text = extract_file_text(
+                            entry.name,
                            BytesIO(downloaded_file),
-                            file_name=entry.name,
                            break_on_unprocessable=False,
                        )
                        batch.append(
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -4,7 +4,6 @@ from typing import Type
 from sqlalchemy.orm import Session

 from danswer.configs.constants import DocumentSource
-from danswer.connectors.asana.connector import AsanaConnector
 from danswer.connectors.axero.connector import AxeroConnector
 from danswer.connectors.blob.connector import BlobStorageConnector
 from danswer.connectors.bookstack.connector import BookstackConnector
@@ -42,7 +41,6 @@ from danswer.connectors.slack.load_connector import SlackLoadConnector
 from danswer.connectors.teams.connector import TeamsConnector
 from danswer.connectors.web.connector import WebConnector
 from danswer.connectors.wikipedia.connector import WikipediaConnector
-from danswer.connectors.xenforo.connector import XenforoConnector
 from danswer.connectors.zendesk.connector import ZendeskConnector
 from danswer.connectors.zulip.connector import ZulipConnector
 from danswer.db.credentials import backend_update_credential_json
@@ -63,7 +61,6 @@ def identify_connector_class(
        DocumentSource.SLACK: {
            InputType.LOAD_STATE: SlackLoadConnector,
            InputType.POLL: SlackPollConnector,
-            InputType.PRUNE: SlackPollConnector,
        },
        DocumentSource.GITHUB: GithubConnector,
        DocumentSource.GMAIL: GmailConnector,
@@ -94,12 +91,10 @@ def identify_connector_class(
        DocumentSource.CLICKUP: ClickupConnector,
        DocumentSource.MEDIAWIKI: MediaWikiConnector,
        DocumentSource.WIKIPEDIA: WikipediaConnector,
-        DocumentSource.ASANA: AsanaConnector,
        DocumentSource.S3: BlobStorageConnector,
        DocumentSource.R2: BlobStorageConnector,
        DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
        DocumentSource.OCI_STORAGE: BlobStorageConnector,
-        DocumentSource.XENFORO: XenforoConnector,
    }
    connector_by_source = connector_map.get(source, {})

@@ -129,11 +124,11 @@ def identify_connector_class(


 def instantiate_connector(
-    db_session: Session,
    source: DocumentSource,
    input_type: InputType,
    connector_specific_config: dict[str, Any],
    credential: Credential,
+    db_session: Session,
 ) -> BaseConnector:
    connector_class = identify_connector_class(source, input_type)
    connector = connector_class(**connector_specific_config)
--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -74,14 +74,13 @@ def _process_file(
        )

    # Using the PDF reader function directly to pass in password cleanly
-    elif extension == ".pdf" and pdf_pass is not None:
+    elif extension == ".pdf":
        file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass)

    else:
        file_content_raw = extract_file_text(
-            file=file,
            file_name=file_name,
-            break_on_unprocessable=True,
+            file=file,
        )

    all_metadata = {**metadata, **file_metadata} if metadata else file_metadata
--- a/backend/danswer/connectors/gmail/connector_auth.py
+++ b/backend/danswer/connectors/gmail/connector_auth.py
@@ -25,7 +25,7 @@ from danswer.connectors.gmail.constants import (
 from danswer.connectors.gmail.constants import SCOPES
 from danswer.db.credentials import update_credential_json
 from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.documents.models import CredentialBase
 from danswer.server.documents.models import GoogleAppCredentials
 from danswer.server.documents.models import GoogleServiceAccountKey
@@ -72,7 +72,7 @@ def get_gmail_creds_for_service_account(


 def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
+    csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id)))
    if csrf != state:
        raise PermissionError(
            "State from Gmail Connector callback does not match expected"
@@ -80,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None:


 def get_gmail_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
@@ -92,14 +92,14 @@ def get_gmail_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)


 def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
@@ -111,7 +111,7 @@ def get_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)
@@ -158,40 +158,42 @@ def build_service_account_creds(


 def get_google_app_gmail_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    return GoogleAppCredentials(**json.loads(creds_str))


 def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)
+    get_dynamic_config_store().store(
+        KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True
+    )


 def delete_google_app_gmail_cred() -> None:
-    get_kv_store().delete(KV_GMAIL_CRED_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_CRED_KEY)


 def get_gmail_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
    return GoogleServiceAccountKey(**json.loads(creds_str))


 def upsert_gmail_service_account_key(
    service_account_key: GoogleServiceAccountKey,
 ) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def delete_gmail_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)


 def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/google_drive/connector.py
+++ b/backend/danswer/connectors/google_drive/connector.py
@@ -6,6 +6,7 @@ from datetime import timezone
 from enum import Enum
 from itertools import chain
 from typing import Any
+from typing import cast

 from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
@@ -20,13 +21,19 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import IGNORE_FOR_QA
 from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
-from danswer.connectors.google_drive.connector_auth import get_google_drive_creds
+from danswer.connectors.google_drive.connector_auth import (
+    get_google_drive_creds_for_authorized_user,
+)
+from danswer.connectors.google_drive.connector_auth import (
+    get_google_drive_creds_for_service_account,
+)
 from danswer.connectors.google_drive.constants import (
    DB_CREDENTIALS_DICT_DELEGATED_USER_KEY,
 )
 from danswer.connectors.google_drive.constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
 )
+from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY
 from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
@@ -36,8 +43,6 @@ from danswer.connectors.models import Section
 from danswer.file_processing.extract_file_text import docx_to_text
 from danswer.file_processing.extract_file_text import pptx_to_text
 from danswer.file_processing.extract_file_text import read_pdf_file
-from danswer.file_processing.unstructured import get_unstructured_api_key
-from danswer.file_processing.unstructured import unstructured_to_text
 from danswer.utils.batching import batch_generator
 from danswer.utils.logger import setup_logger

@@ -329,24 +334,16 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str:
        GDriveMimeType.MARKDOWN.value,
    ]:
        return service.files().get_media(fileId=file["id"]).execute().decode("utf-8")
-    if mime_type in [
-        GDriveMimeType.WORD_DOC.value,
-        GDriveMimeType.POWERPOINT.value,
-        GDriveMimeType.PDF.value,
-    ]:
+    elif mime_type == GDriveMimeType.WORD_DOC.value:
        response = service.files().get_media(fileId=file["id"]).execute()
-        if get_unstructured_api_key():
-            return unstructured_to_text(
-                file=io.BytesIO(response), file_name=file.get("name", file["id"])
-            )
-
-        if mime_type == GDriveMimeType.WORD_DOC.value:
-            return docx_to_text(file=io.BytesIO(response))
-        elif mime_type == GDriveMimeType.PDF.value:
-            text, _ = read_pdf_file(file=io.BytesIO(response))
-            return text
-        elif mime_type == GDriveMimeType.POWERPOINT.value:
-            return pptx_to_text(file=io.BytesIO(response))
+        return docx_to_text(file=io.BytesIO(response))
+    elif mime_type == GDriveMimeType.PDF.value:
+        response = service.files().get_media(fileId=file["id"]).execute()
+        text, _ = read_pdf_file(file=io.BytesIO(response))
+        return text
+    elif mime_type == GDriveMimeType.POWERPOINT.value:
+        response = service.files().get_media(fileId=file["id"]).execute()
+        return pptx_to_text(file=io.BytesIO(response))

    return UNSUPPORTED_FILE_TYPE_CONTENT

@@ -410,7 +407,42 @@ class GoogleDriveConnector(LoadConnector, PollConnector):
        (2) A credential which holds a service account key JSON file, which
        can then be used to impersonate any user in the workspace.
        """
-        creds, new_creds_dict = get_google_drive_creds(credentials)
+        creds: OAuthCredentials | ServiceAccountCredentials | None = None
+        new_creds_dict = None
+        if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
+            access_token_json_str = cast(
+                str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]
+            )
+            creds = get_google_drive_creds_for_authorized_user(
+                token_json_str=access_token_json_str
+            )
+
+            # tell caller to update token stored in DB if it has changed
+            # (e.g. the token has been refreshed)
+            new_creds_json_str = creds.to_json() if creds else ""
+            if new_creds_json_str != access_token_json_str:
+                new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str}
+
+        if DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
+            service_account_key_json_str = credentials[
+                DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
+            ]
+            creds = get_google_drive_creds_for_service_account(
+                service_account_key_json_str=service_account_key_json_str
+            )
+
+            # "Impersonate" a user if one is specified
+            delegated_user_email = cast(
+                str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY)
+            )
+            if delegated_user_email:
+                creds = creds.with_subject(delegated_user_email) if creds else None  # type: ignore
+
+        if creds is None:
+            raise PermissionError(
+                "Unable to access Google Drive - unknown credential structure."
+            )
+
        self.creds = creds
        return new_creds_dict

@@ -477,7 +509,6 @@ class GoogleDriveConnector(LoadConnector, PollConnector):
                                file["modifiedTime"]
                            ).astimezone(timezone.utc),
                            metadata={} if text_contents else {IGNORE_FOR_QA: "True"},
-                            additional_info=file.get("id"),
                        )
                    )
                except Exception as e:
--- a/backend/danswer/connectors/google_drive/connector_auth.py
+++ b/backend/danswer/connectors/google_drive/connector_auth.py
@@ -10,13 +10,11 @@ from google.oauth2.service_account import Credentials as ServiceAccountCredentia
 from google_auth_oauthlib.flow import InstalledAppFlow  # type: ignore
 from sqlalchemy.orm import Session

-from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED
 from danswer.configs.app_configs import WEB_DOMAIN
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import KV_CRED_KEY
 from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY
 from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY
-from danswer.connectors.google_drive.constants import BASE_SCOPES
 from danswer.connectors.google_drive.constants import (
    DB_CREDENTIALS_DICT_DELEGATED_USER_KEY,
 )
@@ -24,11 +22,10 @@ from danswer.connectors.google_drive.constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
 )
 from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY
-from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES
-from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES
+from danswer.connectors.google_drive.constants import SCOPES
 from danswer.db.credentials import update_credential_json
 from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.documents.models import CredentialBase
 from danswer.server.documents.models import GoogleAppCredentials
 from danswer.server.documents.models import GoogleServiceAccountKey
@@ -37,25 +34,15 @@ from danswer.utils.logger import setup_logger
 logger = setup_logger()


-def build_gdrive_scopes() -> list[str]:
-    base_scopes: list[str] = BASE_SCOPES
-    permissions_scopes: list[str] = FETCH_PERMISSIONS_SCOPES
-    groups_scopes: list[str] = FETCH_GROUPS_SCOPES
-
-    if ENTERPRISE_EDITION_ENABLED:
-        return base_scopes + permissions_scopes + groups_scopes
-    return base_scopes + permissions_scopes
-
-
 def _build_frontend_google_drive_redirect() -> str:
    return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback"


 def get_google_drive_creds_for_authorized_user(
-    token_json_str: str, scopes: list[str] = build_gdrive_scopes()
+    token_json_str: str,
 ) -> OAuthCredentials | None:
    creds_json = json.loads(token_json_str)
-    creds = OAuthCredentials.from_authorized_user_info(creds_json, scopes)
+    creds = OAuthCredentials.from_authorized_user_info(creds_json, SCOPES)
    if creds.valid:
        return creds

@@ -72,69 +59,20 @@ def get_google_drive_creds_for_authorized_user(
    return None


-def _get_google_drive_creds_for_service_account(
-    service_account_key_json_str: str, scopes: list[str] = build_gdrive_scopes()
+def get_google_drive_creds_for_service_account(
+    service_account_key_json_str: str,
 ) -> ServiceAccountCredentials | None:
    service_account_key = json.loads(service_account_key_json_str)
    creds = ServiceAccountCredentials.from_service_account_info(
-        service_account_key, scopes=scopes
+        service_account_key, scopes=SCOPES
    )
    if not creds.valid or not creds.expired:
        creds.refresh(Request())
    return creds if creds.valid else None


-def get_google_drive_creds(
-    credentials: dict[str, str], scopes: list[str] = build_gdrive_scopes()
-) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:
-    oauth_creds = None
-    service_creds = None
-    new_creds_dict = None
-    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
-        access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY])
-        oauth_creds = get_google_drive_creds_for_authorized_user(
-            token_json_str=access_token_json_str, scopes=scopes
-        )
-
-        # tell caller to update token stored in DB if it has changed
-        # (e.g. the token has been refreshed)
-        new_creds_json_str = oauth_creds.to_json() if oauth_creds else ""
-        if new_creds_json_str != access_token_json_str:
-            new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str}
-
-    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
-        service_account_key_json_str = credentials[
-            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
-        ]
-        service_creds = _get_google_drive_creds_for_service_account(
-            service_account_key_json_str=service_account_key_json_str,
-            scopes=scopes,
-        )
-
-        # "Impersonate" a user if one is specified
-        delegated_user_email = cast(
-            str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY)
-        )
-        if delegated_user_email:
-            service_creds = (
-                service_creds.with_subject(delegated_user_email)
-                if service_creds
-                else None
-            )
-
-    creds: ServiceAccountCredentials | OAuthCredentials | None = (
-        oauth_creds or service_creds
-    )
-    if creds is None:
-        raise PermissionError(
-            "Unable to access Google Drive - unknown credential structure."
-        )
-
-    return creds, new_creds_dict
-
-
 def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
+    csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id)))
    if csrf != state:
        raise PermissionError(
            "State from Google Drive Connector callback does not match expected"
@@ -142,11 +80,11 @@ def verify_csrf(credential_id: int, state: str) -> None:


 def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
-        scopes=build_gdrive_scopes(),
+        scopes=SCOPES,
        redirect_uri=_build_frontend_google_drive_redirect(),
    )
    auth_url, _ = flow.authorization_url(prompt="consent")
@@ -154,7 +92,7 @@ def get_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)
@@ -169,7 +107,7 @@ def update_credential_access_tokens(
    app_credentials = get_google_app_cred()
    flow = InstalledAppFlow.from_client_config(
        app_credentials.model_dump(),
-        scopes=build_gdrive_scopes(),
+        scopes=SCOPES,
        redirect_uri=_build_frontend_google_drive_redirect(),
    )
    flow.fetch_token(code=auth_code)
@@ -202,28 +140,32 @@ def build_service_account_creds(


 def get_google_app_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    return GoogleAppCredentials(**json.loads(creds_str))


 def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True)
+    get_dynamic_config_store().store(
+        KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True
+    )


 def delete_google_app_cred() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)
+    get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)


 def get_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))
+    creds_str = str(
+        get_dynamic_config_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
+    )
    return GoogleServiceAccountKey(**json.loads(creds_str))


 def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/google_drive/constants.py
+++ b/backend/danswer/connectors/google_drive/constants.py
@@ -1,7 +1,7 @@
 DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens"
 DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key"
 DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user"
-
-BASE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
-FETCH_PERMISSIONS_SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly"]
-FETCH_GROUPS_SCOPES = ["https://www.googleapis.com/auth/cloud-identity.groups.readonly"]
+SCOPES = [
+    "https://www.googleapis.com/auth/drive.readonly",
+    "https://www.googleapis.com/auth/drive.metadata.readonly",
+]
--- a/backend/danswer/connectors/models.py
+++ b/backend/danswer/connectors/models.py
@@ -113,9 +113,6 @@ class DocumentBase(BaseModel):
    # The default title is semantic_identifier though unless otherwise specified
    title: str | None = None
    from_ingestion_api: bool = False
-    # Anything else that may be useful that is specific to this particular connector type that other
-    # parts of the code may need. If you're unsure, this can be left as None
-    additional_info: Any = None

    def get_title_for_document_index(
        self,
--- a/backend/danswer/connectors/sharepoint/connector.py
+++ b/backend/danswer/connectors/sharepoint/connector.py
@@ -40,8 +40,8 @@ def _convert_driveitem_to_document(
    driveitem: DriveItem,
 ) -> Document:
    file_text = extract_file_text(
-        file=io.BytesIO(driveitem.get_content().execute_query().value),
        file_name=driveitem.name,
+        file=io.BytesIO(driveitem.get_content().execute_query().value),
        break_on_unprocessable=False,
    )

--- a/backend/danswer/connectors/slack/connector.py
+++ b/backend/danswer/connectors/slack/connector.py
@@ -8,12 +8,13 @@ from typing import cast

 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
+from slack_sdk.web import SlackResponse

 from danswer.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
+from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
 from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import IdConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
 from danswer.connectors.models import BasicExpertInfo
@@ -22,8 +23,9 @@ from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 from danswer.connectors.slack.utils import expert_info_from_slack_id
 from danswer.connectors.slack.utils import get_message_link
-from danswer.connectors.slack.utils import make_paginated_slack_api_call_w_retries
-from danswer.connectors.slack.utils import make_slack_api_call_w_retries
+from danswer.connectors.slack.utils import make_slack_api_call_logged
+from danswer.connectors.slack.utils import make_slack_api_call_paginated
+from danswer.connectors.slack.utils import make_slack_api_rate_limited
 from danswer.connectors.slack.utils import SlackTextCleaner
 from danswer.utils.logger import setup_logger

@@ -36,18 +38,47 @@ MessageType = dict[str, Any]
 # list of messages in a thread
 ThreadType = list[MessageType]

+basic_retry_wrapper = retry_builder()

-def _collect_paginated_channels(
+
+def _make_paginated_slack_api_call(
+    call: Callable[..., SlackResponse], **kwargs: Any
+) -> Generator[dict[str, Any], None, None]:
+    return make_slack_api_call_paginated(
+        basic_retry_wrapper(
+            make_slack_api_rate_limited(make_slack_api_call_logged(call))
+        )
+    )(**kwargs)
+
+
+def _make_slack_api_call(
+    call: Callable[..., SlackResponse], **kwargs: Any
+) -> SlackResponse:
+    return basic_retry_wrapper(
+        make_slack_api_rate_limited(make_slack_api_call_logged(call))
+    )(**kwargs)
+
+
+def get_channel_info(client: WebClient, channel_id: str) -> ChannelType:
+    """Get information about a channel. Needed to convert channel ID to channel name"""
+    return _make_slack_api_call(client.conversations_info, channel=channel_id)[0][
+        "channel"
+    ]
+
+
+def _get_channels(
    client: WebClient,
    exclude_archived: bool,
-    channel_types: list[str],
+    get_private: bool,
 ) -> list[ChannelType]:
    channels: list[dict[str, Any]] = []
-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_list,
        exclude_archived=exclude_archived,
        # also get private channels the bot is added to
-        types=channel_types,
+        types=["public_channel", "private_channel"]
+        if get_private
+        else ["public_channel"],
    ):
        channels.extend(result["channels"])

@@ -57,38 +88,19 @@ def _collect_paginated_channels(
 def get_channels(
    client: WebClient,
    exclude_archived: bool = True,
-    get_public: bool = True,
-    get_private: bool = True,
 ) -> list[ChannelType]:
    """Get all channels in the workspace"""
-    channels: list[dict[str, Any]] = []
-    channel_types = []
-    if get_public:
-        channel_types.append("public_channel")
-    if get_private:
-        channel_types.append("private_channel")
    # try getting private channels as well at first
    try:
-        channels = _collect_paginated_channels(
-            client=client,
-            exclude_archived=exclude_archived,
-            channel_types=channel_types,
+        return _get_channels(
+            client=client, exclude_archived=exclude_archived, get_private=True
        )
    except SlackApiError as e:
        logger.info(f"Unable to fetch private channels due to - {e}")
-        logger.info("trying again without private channels")
-        if get_public:
-            channel_types = ["public_channel"]
-        else:
-            logger.warning("No channels to fetch")
-            return []
-        channels = _collect_paginated_channels(
-            client=client,
-            exclude_archived=exclude_archived,
-            channel_types=channel_types,
-        )

-    return channels
+    return _get_channels(
+        client=client, exclude_archived=exclude_archived, get_private=False
+    )


 def get_channel_messages(
@@ -100,14 +112,14 @@ def get_channel_messages(
    """Get all messages in a channel"""
    # join so that the bot can access messages
    if not channel["is_member"]:
-        make_slack_api_call_w_retries(
+        _make_slack_api_call(
            client.conversations_join,
            channel=channel["id"],
            is_private=channel["is_private"],
        )
        logger.info(f"Successfully joined '{channel['name']}'")

-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_history,
        channel=channel["id"],
        oldest=oldest,
@@ -119,7 +131,7 @@ def get_channel_messages(
 def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType:
    """Get all messages in a thread"""
    threads: list[MessageType] = []
-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_replies, channel=channel_id, ts=thread_id
    ):
        threads.extend(result["messages"])
@@ -205,17 +217,12 @@ _DISALLOWED_MSG_SUBTYPES = {
    "group_leave",
    "group_archive",
    "group_unarchive",
-    "channel_leave",
-    "channel_name",
-    "channel_join",
 }


-def default_msg_filter(message: MessageType) -> bool:
+def _default_msg_filter(message: MessageType) -> bool:
    # Don't keep messages from bots
    if message.get("bot_id") or message.get("app_id"):
-        if message.get("bot_profile", {}).get("name") == "DanswerConnector":
-            return False
        return True

    # Uninformative
@@ -259,14 +266,14 @@ def filter_channels(
    ]


-def _get_all_docs(
+def get_all_docs(
    client: WebClient,
    workspace: str,
    channels: list[str] | None = None,
    channel_name_regex_enabled: bool = False,
    oldest: str | None = None,
    latest: str | None = None,
-    msg_filter_func: Callable[[MessageType], bool] = default_msg_filter,
+    msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
 ) -> Generator[Document, None, None]:
    """Get all documents in the workspace, channel by channel"""
    slack_cleaner = SlackTextCleaner(client=client)
@@ -321,44 +328,7 @@ def _get_all_docs(
        )


-def _get_all_doc_ids(
-    client: WebClient,
-    channels: list[str] | None = None,
-    channel_name_regex_enabled: bool = False,
-    msg_filter_func: Callable[[MessageType], bool] = default_msg_filter,
-) -> set[str]:
-    """
-    Get all document ids in the workspace, channel by channel
-    This is pretty identical to get_all_docs, but it returns a set of ids instead of documents
-    This makes it an order of magnitude faster than get_all_docs
-    """
-
-    all_channels = get_channels(client)
-    filtered_channels = filter_channels(
-        all_channels, channels, channel_name_regex_enabled
-    )
-
-    all_doc_ids = set()
-    for channel in filtered_channels:
-        channel_message_batches = get_channel_messages(
-            client=client,
-            channel=channel,
-        )
-
-        for message_batch in channel_message_batches:
-            for message in message_batch:
-                if msg_filter_func(message):
-                    continue
-
-                # The document id is the channel id and the ts of the first message in the thread
-                # Since we already have the first message of the thread, we dont have to
-                # fetch the thread for id retrieval, saving time and API calls
-                all_doc_ids.add(f"{channel['id']}__{message['ts']}")
-
-    return all_doc_ids
-
-
-class SlackPollConnector(PollConnector, IdConnector):
+class SlackPollConnector(PollConnector):
    def __init__(
        self,
        workspace: str,
@@ -379,16 +349,6 @@ class SlackPollConnector(PollConnector, IdConnector):
        self.client = WebClient(token=bot_token)
        return None

-    def retrieve_all_source_ids(self) -> set[str]:
-        if self.client is None:
-            raise ConnectorMissingCredentialError("Slack")
-
-        return _get_all_doc_ids(
-            client=self.client,
-            channels=self.channels,
-            channel_name_regex_enabled=self.channel_regex_enabled,
-        )
-
    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
@@ -396,7 +356,7 @@ class SlackPollConnector(PollConnector, IdConnector):
            raise ConnectorMissingCredentialError("Slack")

        documents: list[Document] = []
-        for document in _get_all_docs(
+        for document in get_all_docs(
            client=self.client,
            workspace=self.workspace,
            channels=self.channels,
--- a/backend/danswer/connectors/slack/utils.py
+++ b/backend/danswer/connectors/slack/utils.py
@@ -10,13 +10,11 @@ from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
 from slack_sdk.web import SlackResponse

-from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
 from danswer.connectors.models import BasicExpertInfo
 from danswer.utils.logger import setup_logger

 logger = setup_logger()

-basic_retry_wrapper = retry_builder()
 # number of messages we request per page when fetching paginated slack messages
 _SLACK_LIMIT = 900

@@ -36,7 +34,7 @@ def get_message_link(
    )


-def _make_slack_api_call_logged(
+def make_slack_api_call_logged(
    call: Callable[..., SlackResponse],
 ) -> Callable[..., SlackResponse]:
    @wraps(call)
@@ -49,7 +47,7 @@ def _make_slack_api_call_logged(
    return logged_call


-def _make_slack_api_call_paginated(
+def make_slack_api_call_paginated(
    call: Callable[..., SlackResponse],
 ) -> Callable[..., Generator[dict[str, Any], None, None]]:
    """Wraps calls to slack API so that they automatically handle pagination"""
@@ -118,24 +116,6 @@ def make_slack_api_rate_limited(
    return rate_limited_call


-def make_slack_api_call_w_retries(
-    call: Callable[..., SlackResponse], **kwargs: Any
-) -> SlackResponse:
-    return basic_retry_wrapper(
-        make_slack_api_rate_limited(_make_slack_api_call_logged(call))
-    )(**kwargs)
-
-
-def make_paginated_slack_api_call_w_retries(
-    call: Callable[..., SlackResponse], **kwargs: Any
-) -> Generator[dict[str, Any], None, None]:
-    return _make_slack_api_call_paginated(
-        basic_retry_wrapper(
-            make_slack_api_rate_limited(_make_slack_api_call_logged(call))
-        )
-    )(**kwargs)
-
-
 def expert_info_from_slack_id(
    user_id: str | None,
    client: WebClient,
--- a/backend/danswer/connectors/xenforo/connector.py
+++ b/backend/danswer/connectors/xenforo/connector.py
@@ -1,244 +0,0 @@
-"""
-This is the XenforoConnector class. It is used to connect to a Xenforo forum and load or update documents from the forum.
-
-To use this class, you need to provide the URL of the Xenforo forum board you want to connect to when creating an instance
-of the class. The URL should be a string that starts with 'http://' or 'https://', followed by the domain name of the
-forum, followed by the board name. For example:
-
-    base_url = 'https://www.example.com/forum/boards/some-topic/'
-
-The `load_from_state` method is used to load documents from the forum. It takes an optional `state` parameter, which
-can be used to specify a state from which to start loading documents.
-"""
-import re
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-from typing import Any
-from urllib.parse import urlparse
-
-import pytz
-import requests
-from bs4 import BeautifulSoup
-from bs4 import Tag
-
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc
-from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import LoadConnector
-from danswer.connectors.models import BasicExpertInfo
-from danswer.connectors.models import Document
-from danswer.connectors.models import Section
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def get_title(soup: BeautifulSoup) -> str:
-    el = soup.find("h1", "p-title-value")
-    if not el:
-        return ""
-    title = el.text
-    for char in (";", ":", "!", "*", "/", "\\", "?", '"', "<", ">", "|"):
-        title = title.replace(char, "_")
-    return title
-
-
-def get_pages(soup: BeautifulSoup, url: str) -> list[str]:
-    page_tags = soup.select("li.pageNav-page")
-    page_numbers = []
-    for button in page_tags:
-        if re.match(r"^\d+$", button.text):
-            page_numbers.append(button.text)
-
-    max_pages = int(max(page_numbers, key=int)) if page_numbers else 1
-
-    all_pages = []
-    for x in range(1, int(max_pages) + 1):
-        all_pages.append(f"{url}page-{x}")
-    return all_pages
-
-
-def parse_post_date(post_element: BeautifulSoup) -> datetime:
-    el = post_element.find("time")
-    if not isinstance(el, Tag) or "datetime" not in el.attrs:
-        return datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc)
-
-    date_value = el["datetime"]
-
-    # Ensure date_value is a string (if it's a list, take the first element)
-    if isinstance(date_value, list):
-        date_value = date_value[0]
-
-    post_date = datetime.strptime(date_value, "%Y-%m-%dT%H:%M:%S%z")
-    return datetime_to_utc(post_date)
-
-
-def scrape_page_posts(
-    soup: BeautifulSoup,
-    page_index: int,
-    url: str,
-    initial_run: bool,
-    start_time: datetime,
-) -> list:
-    title = get_title(soup)
-
-    documents = []
-    for post in soup.find_all("div", class_="message-inner"):
-        post_date = parse_post_date(post)
-        if initial_run or post_date > start_time:
-            el = post.find("div", class_="bbWrapper")
-            if not el:
-                continue
-            post_text = el.get_text(strip=True) + "\n"
-            author_tag = post.find("a", class_="username")
-            if author_tag is None:
-                author_tag = post.find("span", class_="username")
-            author = author_tag.get_text(strip=True) if author_tag else "Deleted author"
-            formatted_time = post_date.strftime("%Y-%m-%d %H:%M:%S")
-
-            # TODO: if a caller calls this for each page of a thread, it may see the
-            # same post multiple times if there is a sticky post
-            # that appears on each page of a thread.
-            # it's important to generate unique doc id's, so page index is part of the
-            # id. We may want to de-dupe this stuff inside the indexing service.
-            document = Document(
-                id=f"{DocumentSource.XENFORO.value}_{title}_{page_index}_{formatted_time}",
-                sections=[Section(link=url, text=post_text)],
-                title=title,
-                source=DocumentSource.XENFORO,
-                semantic_identifier=title,
-                primary_owners=[BasicExpertInfo(display_name=author)],
-                metadata={
-                    "type": "post",
-                    "author": author,
-                    "time": formatted_time,
-                },
-                doc_updated_at=post_date,
-            )
-
-            documents.append(document)
-    return documents
-
-
-class XenforoConnector(LoadConnector):
-    # Class variable to track if the connector has been run before
-    has_been_run_before = False
-
-    def __init__(self, base_url: str) -> None:
-        self.base_url = base_url
-        self.initial_run = not XenforoConnector.has_been_run_before
-        self.start = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=1)
-        self.cookies: dict[str, str] = {}
-        # mimic user browser to avoid being blocked by the website (see: https://www.useragents.me/)
-        self.headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-            "AppleWebKit/537.36 (KHTML, like Gecko) "
-            "Chrome/121.0.0.0 Safari/537.36"
-        }
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        if credentials:
-            logger.warning("Unexpected credentials provided for Xenforo Connector")
-        return None
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        # Standardize URL to always end in /.
-        if self.base_url[-1] != "/":
-            self.base_url += "/"
-
-        # Remove all extra parameters from the end such as page, post.
-        matches = ("threads/", "boards/", "forums/")
-        for each in matches:
-            if each in self.base_url:
-                try:
-                    self.base_url = self.base_url[
-                        0 : self.base_url.index(
-                            "/", self.base_url.index(each) + len(each)
-                        )
-                        + 1
-                    ]
-                except ValueError:
-                    pass
-
-        doc_batch: list[Document] = []
-        all_threads = []
-
-        # If the URL contains "boards/" or "forums/", find all threads.
-        if "boards/" in self.base_url or "forums/" in self.base_url:
-            pages = get_pages(self.requestsite(self.base_url), self.base_url)
-
-            # Get all pages on thread_list_page
-            for pre_count, thread_list_page in enumerate(pages, start=1):
-                logger.info(
-                    f"Getting pages from thread_list_page.. Current: {pre_count}/{len(pages)}\r"
-                )
-                all_threads += self.get_threads(thread_list_page)
-        # If the URL contains "threads/", add the thread to the list.
-        elif "threads/" in self.base_url:
-            all_threads.append(self.base_url)
-
-        # Process all threads
-        for thread_count, thread_url in enumerate(all_threads, start=1):
-            soup = self.requestsite(thread_url)
-            if soup is None:
-                logger.error(f"Failed to load page: {self.base_url}")
-                continue
-            pages = get_pages(soup, thread_url)
-            # Getting all pages for all threads
-            for page_index, page in enumerate(pages, start=1):
-                logger.info(
-                    f"Progress: Page {page_index}/{len(pages)} - Thread {thread_count}/{len(all_threads)}\r"
-                )
-                soup_page = self.requestsite(page)
-                doc_batch.extend(
-                    scrape_page_posts(
-                        soup_page, page_index, thread_url, self.initial_run, self.start
-                    )
-                )
-            if doc_batch:
-                yield doc_batch
-
-        # Mark the initial run finished after all threads and pages have been processed
-        XenforoConnector.has_been_run_before = True
-
-    def get_threads(self, url: str) -> list[str]:
-        soup = self.requestsite(url)
-        thread_tags = soup.find_all(class_="structItem-title")
-        base_url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(url))
-        threads = []
-        for x in thread_tags:
-            y = x.find_all(href=True)
-            for element in y:
-                link = element["href"]
-                if "threads/" in link:
-                    stripped = link[0 : link.rfind("/") + 1]
-                    if base_url + stripped not in threads:
-                        threads.append(base_url + stripped)
-        return threads
-
-    def requestsite(self, url: str) -> BeautifulSoup:
-        try:
-            response = requests.get(
-                url, cookies=self.cookies, headers=self.headers, timeout=10
-            )
-            if response.status_code != 200:
-                logger.error(
-                    f"<{url}> Request Error: {response.status_code} - {response.reason}"
-                )
-            return BeautifulSoup(response.text, "html.parser")
-        except TimeoutError:
-            logger.error("Timed out Error.")
-        except Exception as e:
-            logger.error(f"Error on {url}")
-            logger.exception(e)
-        return BeautifulSoup("", "html.parser")
-
-
-if __name__ == "__main__":
-    connector = XenforoConnector(
-        # base_url="https://cassiopaea.org/forum/threads/how-to-change-your-emotional-state.41381/"
-        base_url="https://xenforo.com/community/threads/whats-new-with-enhanced-search-resource-manager-and-media-gallery-in-xenforo-2-3.220935/"
-    )
-    document_batches = connector.load_from_state()
-    print(next(document_batches))
--- a/backend/danswer/danswerbot/slack/blocks.py
+++ b/backend/danswer/danswerbot/slack/blocks.py
@@ -25,6 +25,7 @@ from danswer.danswerbot.slack.constants import DISLIKE_BLOCK_ACTION_ID
 from danswer.danswerbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID
 from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID
 from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID
+from danswer.danswerbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID
 from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID
 from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID
 from danswer.danswerbot.slack.icons import source_to_github_img_link
@@ -359,6 +360,22 @@ def build_quotes_block(
    return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))]


+def build_standard_answer_blocks(
+    answer_message: str,
+) -> list[Block]:
+    generate_button_block = ButtonElement(
+        action_id=GENERATE_ANSWER_BUTTON_ACTION_ID,
+        text="Generate Full Answer",
+    )
+    answer_block = SectionBlock(text=answer_message)
+    return [
+        answer_block,
+        ActionsBlock(
+            elements=[generate_button_block],
+        ),
+    ]
+
+
 def build_qa_response_blocks(
    message_id: int | None,
    answer: str | None,
--- a/backend/danswer/danswerbot/slack/handlers/handle_message.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py
@@ -211,7 +211,7 @@ def handle_message(

    with Session(get_sqlalchemy_engine()) as db_session:
        if message_info.email:
-            add_non_web_user_if_not_exists(db_session, message_info.email)
+            add_non_web_user_if_not_exists(message_info.email, db_session)

        # first check if we need to respond with a standard answer
        used_standard_answer = handle_standard_answers(
--- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py
@@ -5,7 +5,6 @@ from typing import cast
 from typing import Optional
 from typing import TypeVar

-from fastapi import HTTPException
 from retry import retry
 from slack_sdk import WebClient
 from slack_sdk.models.blocks import DividerBlock
@@ -136,8 +135,7 @@ def handle_regular_answer(
        else slack_bot_config.response_type == SlackBotResponseType.CITATIONS
    )

-    if not message_ts_to_respond_to and not is_bot_msg:
-        # if the message is not "/danswer" command, then it should have a message ts to respond to
+    if not message_ts_to_respond_to:
        raise RuntimeError(
            "No message timestamp to respond to in `handle_message`. This should never happen."
        )
@@ -154,23 +152,15 @@ def handle_regular_answer(

        with Session(get_sqlalchemy_engine()) as db_session:
            if len(new_message_request.messages) > 1:
-                if new_message_request.persona_config:
-                    raise HTTPException(
-                        status_code=403,
-                        detail="Slack bot does not support persona config",
-                    )
-
-                elif new_message_request.persona_id is not None:
-                    persona = cast(
-                        Persona,
-                        fetch_persona_by_id(
-                            db_session,
-                            new_message_request.persona_id,
-                            user=None,
-                            get_editable=False,
-                        ),
-                    )
-
+                persona = cast(
+                    Persona,
+                    fetch_persona_by_id(
+                        db_session,
+                        new_message_request.persona_id,
+                        user=None,
+                        get_editable=False,
+                    ),
+                )
                llm, _ = get_llms_for_persona(persona)

                # In cases of threads, split the available tokens between docs and thread context
@@ -429,7 +419,7 @@ def handle_regular_answer(
    )

    # Get the chunks fed to the LLM only, then fill with other docs
-    llm_doc_inds = answer.llm_selected_doc_indices or []
+    llm_doc_inds = answer.llm_chunks_indices or []
    llm_docs = [top_docs[i] for i in llm_doc_inds]
    remaining_docs = [
        doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds
@@ -480,9 +470,7 @@ def handle_regular_answer(

        # For DM (ephemeral message), we need to create a thread via a normal message so the user can see
        # the ephemeral message. This also will give the user a notification which ephemeral message does not.
-        # if there is no message_ts_to_respond_to, and we have made it this far, then this is a /danswer message
-        # so we shouldn't send_team_member_message
-        if receiver_ids and message_ts_to_respond_to is not None:
+        if receiver_ids:
            send_team_member_message(
                client=client,
                channel=channel,
--- a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py
@@ -1,16 +1,60 @@
 from slack_sdk import WebClient
 from sqlalchemy.orm import Session

+from danswer.configs.constants import MessageType
+from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI
+from danswer.danswerbot.slack.blocks import build_standard_answer_blocks
+from danswer.danswerbot.slack.blocks import get_restate_blocks
+from danswer.danswerbot.slack.handlers.utils import send_team_member_message
 from danswer.danswerbot.slack.models import SlackMessageInfo
+from danswer.danswerbot.slack.utils import respond_in_thread
+from danswer.danswerbot.slack.utils import update_emote_react
+from danswer.db.chat import create_chat_session
+from danswer.db.chat import create_new_chat_message
+from danswer.db.chat import get_chat_messages_by_sessions
+from danswer.db.chat import get_chat_sessions_by_slack_thread_id
+from danswer.db.chat import get_or_create_root_message
 from danswer.db.models import Prompt
 from danswer.db.models import SlackBotConfig
+from danswer.db.standard_answer import fetch_standard_answer_categories_by_names
+from danswer.db.standard_answer import find_matching_standard_answers
+from danswer.server.manage.models import StandardAnswer
 from danswer.utils.logger import DanswerLoggingAdapter
 from danswer.utils.logger import setup_logger
-from danswer.utils.variable_functionality import fetch_versioned_implementation

 logger = setup_logger()


+def oneoff_standard_answers(
+    message: str,
+    slack_bot_categories: list[str],
+    db_session: Session,
+) -> list[StandardAnswer]:
+    """
+    Respond to the user message if it matches any configured standard answers.
+
+    Returns a list of matching StandardAnswers if found, otherwise None.
+    """
+    configured_standard_answers = {
+        standard_answer
+        for category in fetch_standard_answer_categories_by_names(
+            slack_bot_categories, db_session=db_session
+        )
+        for standard_answer in category.standard_answers
+    }
+
+    matching_standard_answers = find_matching_standard_answers(
+        query=message,
+        id_in=[answer.id for answer in configured_standard_answers],
+        db_session=db_session,
+    )
+
+    server_standard_answers = [
+        StandardAnswer.from_model(db_answer) for db_answer in matching_standard_answers
+    ]
+    return server_standard_answers
+
+
 def handle_standard_answers(
    message_info: SlackMessageInfo,
    receiver_ids: list[str] | None,
@@ -19,38 +63,153 @@ def handle_standard_answers(
    logger: DanswerLoggingAdapter,
    client: WebClient,
    db_session: Session,
-) -> bool:
-    """Returns whether one or more Standard Answer message blocks were
-    emitted by the Slack bot"""
-    versioned_handle_standard_answers = fetch_versioned_implementation(
-        "danswer.danswerbot.slack.handlers.handle_standard_answers",
-        "_handle_standard_answers",
-    )
-    return versioned_handle_standard_answers(
-        message_info=message_info,
-        receiver_ids=receiver_ids,
-        slack_bot_config=slack_bot_config,
-        prompt=prompt,
-        logger=logger,
-        client=client,
-        db_session=db_session,
-    )
-
-
-def _handle_standard_answers(
-    message_info: SlackMessageInfo,
-    receiver_ids: list[str] | None,
-    slack_bot_config: SlackBotConfig | None,
-    prompt: Prompt | None,
-    logger: DanswerLoggingAdapter,
-    client: WebClient,
-    db_session: Session,
 ) -> bool:
    """
-    Standard Answers are a paid Enterprise Edition feature. This is the fallback
-    function handling the case where EE features are not enabled.
+    Potentially respond to the user message depending on whether the user's message matches
+    any of the configured standard answers and also whether those answers have already been
+    provided in the current thread.

-    Always returns false i.e. since EE features are not enabled, we NEVER create any
-    Slack message blocks.
+    Returns True if standard answers are found to match the user's message and therefore,
+    we still need to respond to the users.
    """
-    return False
+    # if no channel config, then no standard answers are configured
+    if not slack_bot_config:
+        return False
+
+    slack_thread_id = message_info.thread_to_respond
+    configured_standard_answer_categories = (
+        slack_bot_config.standard_answer_categories if slack_bot_config else []
+    )
+    configured_standard_answers = set(
+        [
+            standard_answer
+            for standard_answer_category in configured_standard_answer_categories
+            for standard_answer in standard_answer_category.standard_answers
+        ]
+    )
+    query_msg = message_info.thread_messages[-1]
+
+    if slack_thread_id is None:
+        used_standard_answer_ids = set([])
+    else:
+        chat_sessions = get_chat_sessions_by_slack_thread_id(
+            slack_thread_id=slack_thread_id,
+            user_id=None,
+            db_session=db_session,
+        )
+        chat_messages = get_chat_messages_by_sessions(
+            chat_session_ids=[chat_session.id for chat_session in chat_sessions],
+            user_id=None,
+            db_session=db_session,
+            skip_permission_check=True,
+        )
+        used_standard_answer_ids = set(
+            [
+                standard_answer.id
+                for chat_message in chat_messages
+                for standard_answer in chat_message.standard_answers
+            ]
+        )
+
+    usable_standard_answers = configured_standard_answers.difference(
+        used_standard_answer_ids
+    )
+    if usable_standard_answers:
+        matching_standard_answers = find_matching_standard_answers(
+            query=query_msg.message,
+            id_in=[standard_answer.id for standard_answer in usable_standard_answers],
+            db_session=db_session,
+        )
+    else:
+        matching_standard_answers = []
+    if matching_standard_answers:
+        chat_session = create_chat_session(
+            db_session=db_session,
+            description="",
+            user_id=None,
+            persona_id=slack_bot_config.persona.id if slack_bot_config.persona else 0,
+            danswerbot_flow=True,
+            slack_thread_id=slack_thread_id,
+            one_shot=True,
+        )
+
+        root_message = get_or_create_root_message(
+            chat_session_id=chat_session.id, db_session=db_session
+        )
+
+        new_user_message = create_new_chat_message(
+            chat_session_id=chat_session.id,
+            parent_message=root_message,
+            prompt_id=prompt.id if prompt else None,
+            message=query_msg.message,
+            token_count=0,
+            message_type=MessageType.USER,
+            db_session=db_session,
+            commit=True,
+        )
+
+        formatted_answers = []
+        for standard_answer in matching_standard_answers:
+            block_quotified_answer = ">" + standard_answer.answer.replace("\n", "\n> ")
+            formatted_answer = (
+                f'Since you mentioned _"{standard_answer.keyword}"_, '
+                f"I thought this might be useful: \n\n{block_quotified_answer}"
+            )
+            formatted_answers.append(formatted_answer)
+        answer_message = "\n\n".join(formatted_answers)
+
+        _ = create_new_chat_message(
+            chat_session_id=chat_session.id,
+            parent_message=new_user_message,
+            prompt_id=prompt.id if prompt else None,
+            message=answer_message,
+            token_count=0,
+            message_type=MessageType.ASSISTANT,
+            error=None,
+            db_session=db_session,
+            commit=True,
+        )
+
+        update_emote_react(
+            emoji=DANSWER_REACT_EMOJI,
+            channel=message_info.channel_to_respond,
+            message_ts=message_info.msg_to_respond,
+            remove=True,
+            client=client,
+        )
+
+        restate_question_blocks = get_restate_blocks(
+            msg=query_msg.message,
+            is_bot_msg=message_info.is_bot_msg,
+        )
+
+        answer_blocks = build_standard_answer_blocks(
+            answer_message=answer_message,
+        )
+
+        all_blocks = restate_question_blocks + answer_blocks
+
+        try:
+            respond_in_thread(
+                client=client,
+                channel=message_info.channel_to_respond,
+                receiver_ids=receiver_ids,
+                text="Hello! Danswer has some results for you!",
+                blocks=all_blocks,
+                thread_ts=message_info.msg_to_respond,
+                unfurl=False,
+            )
+
+            if receiver_ids and slack_thread_id:
+                send_team_member_message(
+                    client=client,
+                    channel=message_info.channel_to_respond,
+                    thread_ts=slack_thread_id,
+                )
+
+            return True
+        except Exception as e:
+            logger.exception(f"Unable to send standard answer message: {e}")
+            return False
+    else:
+        return False
--- a/backend/danswer/danswerbot/slack/listener.py
+++ b/backend/danswer/danswerbot/slack/listener.py
@@ -49,14 +49,13 @@ from danswer.danswerbot.slack.utils import rephrase_slack_message
 from danswer.danswerbot.slack.utils import respond_in_thread
 from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.search_settings import get_current_search_settings
-from danswer.key_value_store.interface import KvKeyNotFoundError
+from danswer.dynamic_configs.interface import ConfigNotFoundError
 from danswer.natural_language_processing.search_nlp_models import EmbeddingModel
 from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder
 from danswer.one_shot_answer.models import ThreadMessage
 from danswer.search.retrieval.search_runner import download_nltk_data
 from danswer.server.manage.models import SlackBotTokens
 from danswer.utils.logger import setup_logger
-from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable
 from shared_configs.configs import MODEL_SERVER_HOST
 from shared_configs.configs import MODEL_SERVER_PORT
 from shared_configs.configs import SLACK_CHANNEL_ID
@@ -131,8 +130,9 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
            )
            return False

-        bot_tag_id = get_danswer_bot_app_id(client.web_client)
        if event_type == "message":
+            bot_tag_id = get_danswer_bot_app_id(client.web_client)
+
            is_dm = event.get("channel_type") == "im"
            is_tagged = bot_tag_id and bot_tag_id in msg
            is_danswer_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "")
@@ -158,10 +158,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
                slack_bot_config = get_slack_bot_config_for_channel(
                    channel_name=channel_name, db_session=db_session
                )
-            # If DanswerBot is not specifically tagged and the channel is not set to respond to bots, ignore the message
-            if (not bot_tag_id or bot_tag_id not in msg) and (
-                not slack_bot_config
-                or not slack_bot_config.channel_config.get("respond_to_bots")
+            if not slack_bot_config or not slack_bot_config.channel_config.get(
+                "respond_to_bots"
            ):
                channel_specific_logger.info("Ignoring message from bot")
                return False
@@ -448,9 +446,8 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
                return view_routing(req, client)
        elif req.type == "events_api" or req.type == "slash_commands":
            return process_message(req, client)
-    except Exception as e:
-        logger.exception(f"Failed to process slack event. Error: {e}")
-        logger.error(f"Slack request payload: {req.payload}")
+    except Exception:
+        logger.exception("Failed to process slack event")


 def _get_socket_client(slack_bot_tokens: SlackBotTokens) -> SocketModeClient:
@@ -484,8 +481,6 @@ if __name__ == "__main__":
    slack_bot_tokens: SlackBotTokens | None = None
    socket_client: SocketModeClient | None = None

-    set_is_ee_based_on_env_variable()
-
    logger.notice("Verifying query preprocessing (NLTK) data is downloaded")
    download_nltk_data()

@@ -524,7 +519,7 @@ if __name__ == "__main__":

            # Let the handlers run in the background + re-check for token updates every 60 seconds
            Event().wait(timeout=60)
-        except KvKeyNotFoundError:
+        except ConfigNotFoundError:
            # try again every 30 seconds. This is needed since the user may add tokens
            # via the UI at any point in the programs lifecycle - if we just allow it to
            # fail, then the user will need to restart the containers after adding tokens
--- a/backend/danswer/danswerbot/slack/tokens.py
+++ b/backend/danswer/danswerbot/slack/tokens.py
@@ -2,7 +2,7 @@ import os
 from typing import cast

 from danswer.configs.constants import KV_SLACK_BOT_TOKENS_CONFIG_KEY
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.manage.models import SlackBotTokens


@@ -13,7 +13,7 @@ def fetch_tokens() -> SlackBotTokens:
    if app_token and bot_token:
        return SlackBotTokens(app_token=app_token, bot_token=bot_token)

-    dynamic_config_store = get_kv_store()
+    dynamic_config_store = get_dynamic_config_store()
    return SlackBotTokens(
        **cast(dict, dynamic_config_store.load(key=KV_SLACK_BOT_TOKENS_CONFIG_KEY))
    )
@@ -22,7 +22,7 @@ def fetch_tokens() -> SlackBotTokens:
 def save_tokens(
    tokens: SlackBotTokens,
 ) -> None:
-    dynamic_config_store = get_kv_store()
+    dynamic_config_store = get_dynamic_config_store()
    dynamic_config_store.store(
        key=KV_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True
    )
--- a/backend/danswer/danswerbot/slack/utils.py
+++ b/backend/danswer/danswerbot/slack/utils.py
@@ -430,58 +430,35 @@ def read_slack_thread(
    replies = cast(dict, response.data).get("messages", [])
    for reply in replies:
        if "user" in reply and "bot_id" not in reply:
-            message = reply["text"]
-            user_sem_id = (
-                fetch_user_semantic_id_from_id(reply.get("user"), client)
-                or "Unknown User"
-            )
+            message = remove_danswer_bot_tag(reply["text"], client=client)
+            user_sem_id = fetch_user_semantic_id_from_id(reply["user"], client)
            message_type = MessageType.USER
        else:
            self_app_id = get_danswer_bot_app_id(client)

-            if reply.get("user") == self_app_id:
-                # DanswerBot response
-                message_type = MessageType.ASSISTANT
-                user_sem_id = "Assistant"
-
-                # DanswerBot responses have both text and blocks
-                # The useful content is in the blocks, specifically the first block unless there are
-                # auto-detected filters
-                blocks = reply.get("blocks")
-                if not blocks:
-                    logger.warning(f"DanswerBot response has no blocks: {reply}")
-                    continue
-
-                message = blocks[0].get("text", {}).get("text")
-
-                # If auto-detected filters are on, use the second block for the actual answer
-                # The first block is the auto-detected filters
-                if message.startswith("_Filters"):
-                    if len(blocks) < 2:
-                        logger.warning(f"Only filter blocks found: {reply}")
-                        continue
-                    # This is the DanswerBot answer format, if there is a change to how we respond,
-                    # this will need to be updated to get the correct "answer" portion
-                    message = reply["blocks"][1].get("text", {}).get("text")
-            else:
-                # Other bots are not counted as the LLM response which only comes from Danswer
-                message_type = MessageType.USER
-                bot_user_name = fetch_user_semantic_id_from_id(
-                    reply.get("user"), client
-                )
-                user_sem_id = bot_user_name or "Unknown" + " Bot"
-
-                # For other bots, just use the text as we have no way of knowing that the
-                # useful portion is
-                message = reply.get("text")
-                if not message:
-                    message = blocks[0].get("text", {}).get("text")
-
-            if not message:
-                logger.warning("Skipping Slack thread message, no text found")
+            # Only include bot messages from Danswer, other bots are not taken in as context
+            if self_app_id != reply.get("user"):
                continue

-        message = remove_danswer_bot_tag(message, client=client)
+            blocks = reply["blocks"]
+            if len(blocks) <= 1:
+                continue
+
+            # For the old flow, the useful block is the second one after the header block that says AI Answer
+            if reply["blocks"][0]["text"]["text"] == "AI Answer":
+                message = reply["blocks"][1]["text"]["text"]
+            else:
+                # for the new flow, the answer is the first block
+                message = reply["blocks"][0]["text"]["text"]
+
+            if message.startswith("_Filters"):
+                if len(blocks) <= 2:
+                    continue
+                message = reply["blocks"][2]["text"]["text"]
+
+            user_sem_id = "Assistant"
+            message_type = MessageType.ASSISTANT
+
        thread_messages.append(
            ThreadMessage(message=message, sender=user_sem_id, role=message_type)
        )
--- a/backend/danswer/db/chat.py
+++ b/backend/danswer/db/chat.py
@@ -226,7 +226,7 @@ def create_chat_session(
    db_session: Session,
    description: str,
    user_id: UUID | None,
-    persona_id: int | None,  # Can be none if temporary persona is used
+    persona_id: int,
    llm_override: LLMOverride | None = None,
    prompt_override: PromptOverride | None = None,
    one_shot: bool = False,
@@ -598,7 +598,6 @@ def get_doc_query_identifiers_from_model(
    chat_session: ChatSession,
    user_id: UUID | None,
    db_session: Session,
-    enforce_chat_session_id_for_search_docs: bool,
 ) -> list[tuple[str, int]]:
    """Given a list of search_doc_ids"""
    search_docs = (
@@ -618,8 +617,7 @@ def get_doc_query_identifiers_from_model(
                for doc in search_docs
            ]
        ):
-            if enforce_chat_session_id_for_search_docs:
-                raise ValueError("Invalid reference doc, not from this chat session.")
+            raise ValueError("Invalid reference doc, not from this chat session.")
    except IndexError:
        # This happens when the doc has no chat_messages associated with it.
        # which happens as an edge case where the chat message failed to save
--- a/backend/danswer/db/connector.py
+++ b/backend/danswer/db/connector.py
@@ -1,5 +1,3 @@
-from datetime import datetime
-from datetime import timezone
 from typing import cast

 from sqlalchemy import and_
@@ -270,15 +268,3 @@ def create_initial_default_connector(db_session: Session) -> None:
    )
    db_session.add(connector)
    db_session.commit()
-
-
-def mark_ccpair_as_pruned(cc_pair_id: int, db_session: Session) -> None:
-    stmt = select(ConnectorCredentialPair).where(
-        ConnectorCredentialPair.id == cc_pair_id
-    )
-    cc_pair = db_session.scalar(stmt)
-    if cc_pair is None:
-        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")
-
-    cc_pair.last_pruned = datetime.now(timezone.utc)
-    db_session.commit()
--- a/backend/danswer/db/connector_credential_pair.py
+++ b/backend/danswer/db/connector_credential_pair.py
@@ -12,7 +12,6 @@ from sqlalchemy.orm import Session
 from danswer.configs.constants import DocumentSource
 from danswer.db.connector import fetch_connector_by_id
 from danswer.db.credentials import fetch_credential_by_id
-from danswer.db.enums import AccessType
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import IndexAttempt
@@ -25,8 +24,6 @@ from danswer.db.models import UserGroup__ConnectorCredentialPair
 from danswer.db.models import UserRole
 from danswer.server.models import StatusResponse
 from danswer.utils.logger import setup_logger
-from ee.danswer.db.external_perm import delete_user__ext_group_for_cc_pair__no_commit
-from ee.danswer.external_permissions.sync_params import check_if_valid_sync_source

 logger = setup_logger()

@@ -77,7 +74,7 @@ def _add_user_filters(
            .correlate(ConnectorCredentialPair)
        )
    else:
-        where_clause |= ConnectorCredentialPair.access_type == AccessType.PUBLIC
+        where_clause |= ConnectorCredentialPair.is_public == True  # noqa: E712

    return stmt.where(where_clause)

@@ -97,19 +94,8 @@ def get_connector_credential_pairs(
        )  # noqa
    if ids:
        stmt = stmt.where(ConnectorCredentialPair.id.in_(ids))
-    return list(db_session.scalars(stmt).all())
-
-
-def add_deletion_failure_message(
-    db_session: Session,
-    cc_pair_id: int,
-    failure_message: str,
-) -> None:
-    cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session)
-    if not cc_pair:
-        return
-    cc_pair.deletion_failure_message = failure_message
-    db_session.commit()
+    results = db_session.scalars(stmt)
+    return list(results.all())


 def get_cc_pair_groups_for_ids(
@@ -311,9 +297,9 @@ def associate_default_cc_pair(db_session: Session) -> None:
    association = ConnectorCredentialPair(
        connector_id=0,
        credential_id=0,
-        access_type=AccessType.PUBLIC,
        name="DefaultCCPair",
        status=ConnectorCredentialPairStatus.ACTIVE,
+        is_public=True,
    )
    db_session.add(association)
    db_session.commit()
@@ -338,9 +324,8 @@ def add_credential_to_connector(
    connector_id: int,
    credential_id: int,
    cc_pair_name: str | None,
-    access_type: AccessType,
+    is_public: bool,
    groups: list[int] | None,
-    auto_sync_options: dict | None = None,
 ) -> StatusResponse:
    connector = fetch_connector_by_id(connector_id, db_session)
    credential = fetch_credential_by_id(credential_id, user, db_session)
@@ -348,13 +333,6 @@ def add_credential_to_connector(
    if connector is None:
        raise HTTPException(status_code=404, detail="Connector does not exist")

-    if access_type == AccessType.SYNC:
-        if not check_if_valid_sync_source(connector.source):
-            raise HTTPException(
-                status_code=400,
-                detail=f"Connector of type {connector.source} does not support SYNC access type",
-            )
-
    if credential is None:
        error_msg = (
            f"Credential {credential_id} does not exist or does not belong to user"
@@ -385,13 +363,12 @@ def add_credential_to_connector(
        credential_id=credential_id,
        name=cc_pair_name,
        status=ConnectorCredentialPairStatus.ACTIVE,
-        access_type=access_type,
-        auto_sync_options=auto_sync_options,
+        is_public=is_public,
    )
    db_session.add(association)
    db_session.flush()  # make sure the association has an id

-    if groups and access_type != AccessType.SYNC:
+    if groups:
        _relate_groups_to_cc_pair__no_commit(
            db_session=db_session,
            cc_pair_id=association.id,
@@ -434,10 +411,6 @@ def remove_credential_from_connector(
    )

    if association is not None:
-        delete_user__ext_group_for_cc_pair__no_commit(
-            db_session=db_session,
-            cc_pair_id=association.id,
-        )
        db_session.delete(association)
        db_session.commit()
        return StatusResponse(
--- a/backend/danswer/db/document.py
+++ b/backend/danswer/db/document.py
@@ -4,6 +4,7 @@ from collections.abc import Generator
 from collections.abc import Sequence
 from datetime import datetime
 from datetime import timezone
+from uuid import UUID

 from sqlalchemy import and_
 from sqlalchemy import delete
@@ -16,17 +17,14 @@ from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.engine.util import TransactionalContext
 from sqlalchemy.exc import OperationalError
 from sqlalchemy.orm import Session
-from sqlalchemy.sql.expression import null

 from danswer.configs.constants import DEFAULT_BOOST
-from danswer.db.enums import AccessType
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.feedback import delete_document_feedback_for_documents__no_commit
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import Credential
 from danswer.db.models import Document as DbDocument
 from danswer.db.models import DocumentByConnectorCredentialPair
-from danswer.db.models import User
 from danswer.db.tag import delete_document_tags_for_documents__no_commit
 from danswer.db.utils import model_to_dict
 from danswer.document_index.interfaces import DocumentMetadata
@@ -104,18 +102,6 @@ def construct_document_select_for_connector_credential_pair(
    return stmt


-def get_document_ids_for_connector_credential_pair(
-    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None
-) -> list[str]:
-    doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
-        and_(
-            DocumentByConnectorCredentialPair.connector_id == connector_id,
-            DocumentByConnectorCredentialPair.credential_id == credential_id,
-        )
-    )
-    return list(db_session.execute(doc_ids_stmt).scalars().all())
-
-
 def get_documents_for_connector_credential_pair(
    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None
 ) -> Sequence[DbDocument]:
@@ -132,26 +118,15 @@ def get_documents_for_connector_credential_pair(


 def get_documents_by_ids(
-    db_session: Session,
    document_ids: list[str],
+    db_session: Session,
 ) -> list[DbDocument]:
    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))
    documents = db_session.execute(stmt).scalars().all()
    return list(documents)


-def get_document_connector_count(
-    db_session: Session,
-    document_id: str,
-) -> int:
-    results = get_document_connector_counts(db_session, [document_id])
-    if not results or len(results) == 0:
-        return 0
-
-    return results[0][1]
-
-
-def get_document_connector_counts(
+def get_document_connector_cnts(
    db_session: Session,
    document_ids: list[str],
 ) -> Sequence[tuple[str, int]]:
@@ -166,7 +141,7 @@ def get_document_connector_counts(
    return db_session.execute(stmt).all()  # type: ignore


-def get_document_counts_for_cc_pairs(
+def get_document_cnts_for_cc_pairs(
    db_session: Session, cc_pair_identifiers: list[ConnectorCredentialPairIdentifier]
 ) -> Sequence[tuple[int, int, int]]:
    stmt = (
@@ -200,14 +175,16 @@ def get_document_counts_for_cc_pairs(
 def get_access_info_for_document(
    db_session: Session,
    document_id: str,
-) -> tuple[str, list[str | None], bool] | None:
+) -> tuple[str, list[UUID | None], bool] | None:
    """Gets access info for a single document by calling the get_access_info_for_documents function
    and passing a list with a single document ID.
+
    Args:
        db_session (Session): The database session to use.
        document_id (str): The document ID to fetch access info for.
+
    Returns:
-        Optional[Tuple[str, List[str | None], bool]]: A tuple containing the document ID, a list of user emails,
+        Optional[Tuple[str, List[UUID | None], bool]]: A tuple containing the document ID, a list of user IDs,
        and a boolean indicating if the document is globally public, or None if no results are found.
    """
    results = get_access_info_for_documents(db_session, [document_id])
@@ -220,27 +197,19 @@ def get_access_info_for_document(
 def get_access_info_for_documents(
    db_session: Session,
    document_ids: list[str],
-) -> Sequence[tuple[str, list[str | None], bool]]:
+) -> Sequence[tuple[str, list[UUID | None], bool]]:
    """Gets back all relevant access info for the given documents. This includes
    the user_ids for cc pairs that the document is associated with + whether any
    of the associated cc pairs are intending to make the document globally public.
-    Returns the list where each element contains:
-    - Document ID (which is also the ID of the DocumentByConnectorCredentialPair)
-    - List of emails of Danswer users with direct access to the doc (includes a "None" element if
-      the connector was set up by an admin when auth was off
-    - bool for whether the document is public (the document later can also be marked public by
-      automatic permission sync step)
    """
-    stmt = select(
-        DocumentByConnectorCredentialPair.id,
-        func.array_agg(func.coalesce(User.email, null())).label("user_emails"),
-        func.bool_or(ConnectorCredentialPair.access_type == AccessType.PUBLIC).label(
-            "public_doc"
-        ),
-    ).where(DocumentByConnectorCredentialPair.id.in_(document_ids))
-
    stmt = (
-        stmt.join(
+        select(
+            DocumentByConnectorCredentialPair.id,
+            func.array_agg(Credential.user_id).label("user_ids"),
+            func.bool_or(ConnectorCredentialPair.is_public).label("public_doc"),
+        )
+        .where(DocumentByConnectorCredentialPair.id.in_(document_ids))
+        .join(
            Credential,
            DocumentByConnectorCredentialPair.credential_id == Credential.id,
        )
@@ -253,13 +222,6 @@ def get_access_info_for_documents(
                == ConnectorCredentialPair.credential_id,
            ),
        )
-        .outerjoin(
-            User,
-            and_(
-                Credential.user_id == User.id,
-                ConnectorCredentialPair.access_type != AccessType.SYNC,
-            ),
-        )
        # don't include CC pairs that are being deleted
        # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them
        .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING)
@@ -305,19 +267,9 @@ def upsert_documents(
            for doc in seen_documents.values()
        ]
    )
-
-    on_conflict_stmt = insert_stmt.on_conflict_do_update(
-        index_elements=["id"],  # Conflict target
-        set_={
-            "from_ingestion_api": insert_stmt.excluded.from_ingestion_api,
-            "boost": insert_stmt.excluded.boost,
-            "hidden": insert_stmt.excluded.hidden,
-            "semantic_id": insert_stmt.excluded.semantic_id,
-            "link": insert_stmt.excluded.link,
-            "primary_owners": insert_stmt.excluded.primary_owners,
-            "secondary_owners": insert_stmt.excluded.secondary_owners,
-        },
-    )
+    # for now, there are no columns to update. If more metadata is added, then this
+    # needs to change to an `on_conflict_do_update`
+    on_conflict_stmt = insert_stmt.on_conflict_do_nothing()
    db_session.execute(on_conflict_stmt)
    db_session.commit()

@@ -398,34 +350,11 @@ def upsert_documents_complete(


 def delete_document_by_connector_credential_pair__no_commit(
-    db_session: Session,
-    document_id: str,
-    connector_credential_pair_identifier: ConnectorCredentialPairIdentifier
-    | None = None,
-) -> None:
-    """Deletes a single document by cc pair relationship entry.
-    Foreign key rows are left in place.
-    The implicit assumption is that the document itself still has other cc_pair
-    references and needs to continue existing.
-    """
-    delete_documents_by_connector_credential_pair__no_commit(
-        db_session=db_session,
-        document_ids=[document_id],
-        connector_credential_pair_identifier=connector_credential_pair_identifier,
-    )
-
-
-def delete_documents_by_connector_credential_pair__no_commit(
    db_session: Session,
    document_ids: list[str],
    connector_credential_pair_identifier: ConnectorCredentialPairIdentifier
    | None = None,
 ) -> None:
-    """This deletes just the document by cc pair entries for a particular cc pair.
-    Foreign key rows are left in place.
-    The implicit assumption is that the document itself still has other cc_pair
-    references and needs to continue existing.
-    """
    stmt = delete(DocumentByConnectorCredentialPair).where(
        DocumentByConnectorCredentialPair.id.in_(document_ids)
    )
@@ -448,9 +377,8 @@ def delete_documents__no_commit(db_session: Session, document_ids: list[str]) ->
 def delete_documents_complete__no_commit(
    db_session: Session, document_ids: list[str]
 ) -> None:
-    """This completely deletes the documents from the db, including all foreign key relationships"""
    logger.info(f"Deleting {len(document_ids)} documents from the DB")
-    delete_documents_by_connector_credential_pair__no_commit(db_session, document_ids)
+    delete_document_by_connector_credential_pair__no_commit(db_session, document_ids)
    delete_document_feedback_for_documents__no_commit(
        document_ids=document_ids, db_session=db_session
    )
--- a/backend/danswer/db/document_set.py
+++ b/backend/danswer/db/document_set.py
@@ -14,7 +14,6 @@ from sqlalchemy.orm import Session

 from danswer.db.connector_credential_pair import get_cc_pair_groups_for_ids
 from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.enums import AccessType
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import Document
@@ -181,7 +180,7 @@ def _check_if_cc_pairs_are_owned_by_groups(
            ids=missing_cc_pair_ids,
        )
        for cc_pair in cc_pairs:
-            if cc_pair.access_type != AccessType.PUBLIC:
+            if not cc_pair.is_public:
                raise ValueError(
                    f"Connector Credential Pair with ID: '{cc_pair.id}'"
                    " is not owned by the specified groups"
@@ -570,7 +569,7 @@ def construct_document_select_by_docset(
    return stmt


-def fetch_document_sets_for_document(
+def fetch_document_set_for_document(
    document_id: str,
    db_session: Session,
 ) -> list[str]:
@@ -705,7 +704,7 @@ def check_document_sets_are_public(
            ConnectorCredentialPair.id.in_(
                connector_credential_pair_ids  # type:ignore
            ),
-            ConnectorCredentialPair.access_type != AccessType.PUBLIC,
+            ConnectorCredentialPair.is_public.is_(False),
        )
        .limit(1)
        .first()
--- a/backend/danswer/db/engine.py
+++ b/backend/danswer/db/engine.py
@@ -1,18 +1,10 @@
 import contextlib
-import contextvars
-import re
-import threading
 import time
 from collections.abc import AsyncGenerator
 from collections.abc import Generator
 from datetime import datetime
-from typing import Any
 from typing import ContextManager

-import jwt
-from fastapi import Depends
-from fastapi import HTTPException
-from fastapi import Request
 from sqlalchemy import event
 from sqlalchemy import text
 from sqlalchemy.engine import create_engine
@@ -25,9 +17,6 @@ from sqlalchemy.orm import sessionmaker

 from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
 from danswer.configs.app_configs import LOG_POSTGRES_LATENCY
-from danswer.configs.app_configs import MULTI_TENANT
-from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
-from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
 from danswer.configs.app_configs import POSTGRES_DB
 from danswer.configs.app_configs import POSTGRES_HOST
 from danswer.configs.app_configs import POSTGRES_PASSWORD
@@ -35,24 +24,27 @@ from danswer.configs.app_configs import POSTGRES_POOL_PRE_PING
 from danswer.configs.app_configs import POSTGRES_POOL_RECYCLE
 from danswer.configs.app_configs import POSTGRES_PORT
 from danswer.configs.app_configs import POSTGRES_USER
-from danswer.configs.app_configs import SECRET_JWT_KEY
-from danswer.configs.constants import POSTGRES_DEFAULT_SCHEMA
 from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME
 from danswer.utils.logger import setup_logger

-
 logger = setup_logger()

 SYNC_DB_API = "psycopg2"
 ASYNC_DB_API = "asyncpg"

+POSTGRES_APP_NAME = (
+    POSTGRES_UNKNOWN_APP_NAME  # helps to diagnose open connections in postgres
+)
+
 # global so we don't create more than one engine per process
 # outside of being best practice, this is needed so we can properly pool
 # connections and not create a new pool on every request
-
+_SYNC_ENGINE: Engine | None = None
 _ASYNC_ENGINE: AsyncEngine | None = None
+
 SessionFactory: sessionmaker[Session] | None = None

+
 if LOG_POSTGRES_LATENCY:
    # Function to log before query execution
    @event.listens_for(Engine, "before_cursor_execute")
@@ -116,78 +108,6 @@ def get_db_current_time(db_session: Session) -> datetime:
    return result


-# Regular expression to validate schema names to prevent SQL injection
-SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$")
-
-
-def is_valid_schema_name(name: str) -> bool:
-    return SCHEMA_NAME_REGEX.match(name) is not None
-
-
-class SqlEngine:
-    """Class to manage a global SQLAlchemy engine (needed for proper resource control).
-    Will eventually subsume most of the standalone functions in this file.
-    Sync only for now.
-    """
-
-    _engine: Engine | None = None
-    _lock: threading.Lock = threading.Lock()
-    _app_name: str = POSTGRES_UNKNOWN_APP_NAME
-
-    # Default parameters for engine creation
-    DEFAULT_ENGINE_KWARGS = {
-        "pool_size": 20,
-        "max_overflow": 5,
-        "pool_pre_ping": POSTGRES_POOL_PRE_PING,
-        "pool_recycle": POSTGRES_POOL_RECYCLE,
-    }
-
-    def __init__(self) -> None:
-        pass
-
-    @classmethod
-    def _init_engine(cls, **engine_kwargs: Any) -> Engine:
-        """Private helper method to create and return an Engine."""
-        connection_string = build_connection_string(
-            db_api=SYNC_DB_API, app_name=cls._app_name + "_sync"
-        )
-        merged_kwargs = {**cls.DEFAULT_ENGINE_KWARGS, **engine_kwargs}
-        return create_engine(connection_string, **merged_kwargs)
-
-    @classmethod
-    def init_engine(cls, **engine_kwargs: Any) -> None:
-        """Allow the caller to init the engine with extra params. Different clients
-        such as the API server and different Celery workers and tasks
-        need different settings.
-        """
-        with cls._lock:
-            if not cls._engine:
-                cls._engine = cls._init_engine(**engine_kwargs)
-
-    @classmethod
-    def get_engine(cls) -> Engine:
-        """Gets the SQLAlchemy engine. Will init a default engine if init hasn't
-        already been called. You probably want to init first!
-        """
-        if not cls._engine:
-            with cls._lock:
-                if not cls._engine:
-                    cls._engine = cls._init_engine()
-        return cls._engine
-
-    @classmethod
-    def set_app_name(cls, app_name: str) -> None:
-        """Class method to set the app name."""
-        cls._app_name = app_name
-
-    @classmethod
-    def get_app_name(cls) -> str:
-        """Class method to get current app name."""
-        if not cls._app_name:
-            return ""
-        return cls._app_name
-
-
 def build_connection_string(
    *,
    db_api: str = ASYNC_DB_API,
@@ -200,139 +120,67 @@ def build_connection_string(
 ) -> str:
    if app_name:
        return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
+
    return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"


+def init_sqlalchemy_engine(app_name: str) -> None:
+    global POSTGRES_APP_NAME
+    POSTGRES_APP_NAME = app_name
+
+
 def get_sqlalchemy_engine() -> Engine:
-    return SqlEngine.get_engine()
+    global _SYNC_ENGINE
+    if _SYNC_ENGINE is None:
+        connection_string = build_connection_string(
+            db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync"
+        )
+        _SYNC_ENGINE = create_engine(
+            connection_string,
+            pool_size=40,
+            max_overflow=10,
+            pool_pre_ping=POSTGRES_POOL_PRE_PING,
+            pool_recycle=POSTGRES_POOL_RECYCLE,
+        )
+    return _SYNC_ENGINE


 def get_sqlalchemy_async_engine() -> AsyncEngine:
    global _ASYNC_ENGINE
    if _ASYNC_ENGINE is None:
-        # Underlying asyncpg cannot accept application_name directly in the connection string
+        # underlying asyncpg cannot accept application_name directly in the connection string
        # https://github.com/MagicStack/asyncpg/issues/798
        connection_string = build_connection_string()
        _ASYNC_ENGINE = create_async_engine(
            connection_string,
            connect_args={
-                "server_settings": {
-                    "application_name": SqlEngine.get_app_name() + "_async"
-                }
+                "server_settings": {"application_name": POSTGRES_APP_NAME + "_async"}
            },
-            # async engine is only used by API server, so we can use those values
-            # here as well
-            pool_size=POSTGRES_API_SERVER_POOL_SIZE,
-            max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
+            pool_size=40,
+            max_overflow=10,
            pool_pre_ping=POSTGRES_POOL_PRE_PING,
            pool_recycle=POSTGRES_POOL_RECYCLE,
        )
    return _ASYNC_ENGINE


-# Context variable to store the current tenant ID
-# This allows us to maintain tenant-specific context throughout the request lifecycle
-# The default value is set to POSTGRES_DEFAULT_SCHEMA for non-multi-tenant setups
-# This context variable works in both synchronous and asynchronous contexts
-# In async code, it's automatically carried across coroutines
-# In sync code, it's managed per thread
-current_tenant_id = contextvars.ContextVar(
-    "current_tenant_id", default=POSTGRES_DEFAULT_SCHEMA
-)
-
-
-# Dependency to get the current tenant ID and set the context variable
-def get_current_tenant_id(request: Request) -> str:
-    """Dependency that extracts the tenant ID from the JWT token in the request and sets the context variable."""
-    if not MULTI_TENANT:
-        tenant_id = POSTGRES_DEFAULT_SCHEMA
-        current_tenant_id.set(tenant_id)
-        return tenant_id
-
-    token = request.cookies.get("tenant_details")
-    if not token:
-        # If no token is present, use the default schema or handle accordingly
-        tenant_id = POSTGRES_DEFAULT_SCHEMA
-        current_tenant_id.set(tenant_id)
-        return tenant_id
-
-    try:
-        payload = jwt.decode(token, SECRET_JWT_KEY, algorithms=["HS256"])
-        tenant_id = payload.get("tenant_id")
-        if not tenant_id:
-            raise HTTPException(
-                status_code=400, detail="Invalid token: tenant_id missing"
-            )
-        if not is_valid_schema_name(tenant_id):
-            raise ValueError("Invalid tenant ID format")
-        current_tenant_id.set(tenant_id)
-        return tenant_id
-    except jwt.InvalidTokenError:
-        raise HTTPException(status_code=401, detail="Invalid token format")
-    except ValueError as e:
-        # Let the 400 error bubble up
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception:
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-def get_session_with_tenant(tenant_id: str | None = None) -> Session:
-    if tenant_id is None:
-        tenant_id = current_tenant_id.get()
-
-    if not is_valid_schema_name(tenant_id):
-        logger.error(f"Invalid tenant ID: {tenant_id}")
-        raise Exception("Invalid tenant ID")
-
-    engine = SqlEngine.get_engine()
-    session = Session(engine, expire_on_commit=False)
-
-    @event.listens_for(session, "after_begin")
-    def set_search_path(session: Session, transaction: Any, connection: Any) -> None:
-        connection.execute(text("SET search_path TO :schema"), {"schema": tenant_id})
-
-    return session
-
-
-def get_session(
-    tenant_id: str = Depends(get_current_tenant_id),
-) -> Generator[Session, None, None]:
-    """Generate a database session with the appropriate tenant schema set."""
-    engine = get_sqlalchemy_engine()
-    with Session(engine, expire_on_commit=False) as session:
-        if MULTI_TENANT:
-            if not is_valid_schema_name(tenant_id):
-                raise HTTPException(status_code=400, detail="Invalid tenant ID")
-            # Set the search_path to the tenant's schema
-            session.execute(text(f'SET search_path = "{tenant_id}"'))
-        yield session
-
-
-async def get_async_session(
-    tenant_id: str = Depends(get_current_tenant_id),
-) -> AsyncGenerator[AsyncSession, None]:
-    """Generate an async database session with the appropriate tenant schema set."""
-    engine = get_sqlalchemy_async_engine()
-    async with AsyncSession(engine, expire_on_commit=False) as async_session:
-        if MULTI_TENANT:
-            if not is_valid_schema_name(tenant_id):
-                raise HTTPException(status_code=400, detail="Invalid tenant ID")
-            # Set the search_path to the tenant's schema
-            await async_session.execute(text(f'SET search_path = "{tenant_id}"'))
-        yield async_session
-
-
 def get_session_context_manager() -> ContextManager[Session]:
-    """Context manager for database sessions."""
    return contextlib.contextmanager(get_session)()


-def get_session_factory() -> sessionmaker[Session]:
-    """Get a session factory."""
-    global SessionFactory
-    if SessionFactory is None:
-        SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
-    return SessionFactory
+def get_session() -> Generator[Session, None, None]:
+    # The line below was added to monitor the latency caused by Postgres connections
+    # during API calls.
+    # with tracer.trace("db.get_session"):
+    with Session(get_sqlalchemy_engine(), expire_on_commit=False) as session:
+        yield session
+
+
+async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
+    async with AsyncSession(
+        get_sqlalchemy_async_engine(), expire_on_commit=False
+    ) as async_session:
+        yield async_session


 async def warm_up_connections(
@@ -356,3 +204,10 @@ async def warm_up_connections(
        await async_conn.execute(text("SELECT 1"))
    for async_conn in async_connections:
        await async_conn.close()
+
+
+def get_session_factory() -> sessionmaker[Session]:
+    global SessionFactory
+    if SessionFactory is None:
+        SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
+    return SessionFactory
--- a/backend/danswer/db/enums.py
+++ b/backend/danswer/db/enums.py
@@ -51,9 +51,3 @@ class ConnectorCredentialPairStatus(str, PyEnum):

    def is_active(self) -> bool:
        return self == ConnectorCredentialPairStatus.ACTIVE
-
-
-class AccessType(str, PyEnum):
-    PUBLIC = "public"
-    PRIVATE = "private"
-    SYNC = "sync"
--- a/backend/danswer/db/feedback.py
+++ b/backend/danswer/db/feedback.py
@@ -16,7 +16,6 @@ from sqlalchemy.orm import Session
 from danswer.configs.constants import MessageType
 from danswer.configs.constants import SearchFeedbackType
 from danswer.db.chat import get_chat_message
-from danswer.db.enums import AccessType
 from danswer.db.models import ChatMessageFeedback
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import Document as DbDocument
@@ -95,7 +94,7 @@ def _add_user_filters(
            .correlate(CCPair)
        )
    else:
-        where_clause |= CCPair.access_type == AccessType.PUBLIC
+        where_clause |= CCPair.is_public == True  # noqa: E712

    return stmt.where(where_clause)

--- a/backend/danswer/db/index_attempt.py
+++ b/backend/danswer/db/index_attempt.py
@@ -181,45 +181,6 @@ def get_last_attempt(
    return db_session.execute(stmt).scalars().first()


-def get_latest_index_attempts_by_status(
-    secondary_index: bool,
-    db_session: Session,
-    status: IndexingStatus,
-) -> Sequence[IndexAttempt]:
-    """
-    Retrieves the most recent index attempt with the specified status for each connector_credential_pair.
-    Filters attempts based on the secondary_index flag to get either future or present index attempts.
-    Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair.
-    """
-    latest_failed_attempts = (
-        select(
-            IndexAttempt.connector_credential_pair_id,
-            func.max(IndexAttempt.id).label("max_failed_id"),
-        )
-        .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
-        .where(
-            SearchSettings.status
-            == (
-                IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
-            ),
-            IndexAttempt.status == status,
-        )
-        .group_by(IndexAttempt.connector_credential_pair_id)
-        .subquery()
-    )
-
-    stmt = select(IndexAttempt).join(
-        latest_failed_attempts,
-        (
-            IndexAttempt.connector_credential_pair_id
-            == latest_failed_attempts.c.connector_credential_pair_id
-        )
-        & (IndexAttempt.id == latest_failed_attempts.c.max_failed_id),
-    )
-
-    return db_session.execute(stmt).scalars().all()
-
-
 def get_latest_index_attempts(
    secondary_index: bool,
    db_session: Session,
@@ -374,16 +335,6 @@ def delete_index_attempts(
    cc_pair_id: int,
    db_session: Session,
 ) -> None:
-    # First, delete related entries in IndexAttemptErrors
-    stmt_errors = delete(IndexAttemptError).where(
-        IndexAttemptError.index_attempt_id.in_(
-            select(IndexAttempt.id).where(
-                IndexAttempt.connector_credential_pair_id == cc_pair_id
-            )
-        )
-    )
-    db_session.execute(stmt_errors)
-
    stmt = delete(IndexAttempt).where(
        IndexAttempt.connector_credential_pair_id == cc_pair_id,
    )
--- a/backend/danswer/db/llm.py
+++ b/backend/danswer/db/llm.py
@@ -4,11 +4,9 @@ from sqlalchemy import select
 from sqlalchemy.orm import Session

 from danswer.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel
-from danswer.db.models import DocumentSet
 from danswer.db.models import LLMProvider as LLMProviderModel
 from danswer.db.models import LLMProvider__UserGroup
 from danswer.db.models import SearchSettings
-from danswer.db.models import Tool as ToolModel
 from danswer.db.models import User
 from danswer.db.models import User__UserGroup
 from danswer.server.manage.embedding.models import CloudEmbeddingProvider
@@ -62,8 +60,7 @@ def upsert_cloud_embedding_provider(


 def upsert_llm_provider(
-    llm_provider: LLMProviderUpsertRequest,
-    db_session: Session,
+    llm_provider: LLMProviderUpsertRequest, db_session: Session
 ) -> FullLLMProvider:
    existing_llm_provider = db_session.scalar(
        select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name)
@@ -106,20 +103,6 @@ def fetch_existing_embedding_providers(
    return list(db_session.scalars(select(CloudEmbeddingProviderModel)).all())


-def fetch_existing_doc_sets(
-    db_session: Session, doc_ids: list[int]
-) -> list[DocumentSet]:
-    return list(
-        db_session.scalars(select(DocumentSet).where(DocumentSet.id.in_(doc_ids))).all()
-    )
-
-
-def fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolModel]:
-    return list(
-        db_session.scalars(select(ToolModel).where(ToolModel.id.in_(tool_ids))).all()
-    )
-
-
 def fetch_existing_llm_providers(
    db_session: Session,
    user: User | None = None,
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -39,7 +39,6 @@ from danswer.configs.constants import DEFAULT_BOOST
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import FileOrigin
 from danswer.configs.constants import MessageType
-from danswer.db.enums import AccessType
 from danswer.configs.constants import NotificationType
 from danswer.configs.constants import SearchFeedbackType
 from danswer.configs.constants import TokenRateLimitScope
@@ -50,7 +49,7 @@ from danswer.db.enums import IndexingStatus
 from danswer.db.enums import IndexModelStatus
 from danswer.db.enums import TaskStatus
 from danswer.db.pydantic_type import PydanticType
-from danswer.key_value_store.interface import JSON_ro
+from danswer.dynamic_configs.interface import JSON_ro
 from danswer.file_store.models import FileDescriptor
 from danswer.llm.override_models import LLMOverride
 from danswer.llm.override_models import PromptOverride
@@ -109,7 +108,7 @@ class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):

 class User(SQLAlchemyBaseUserTableUUID, Base):
    oauth_accounts: Mapped[list[OAuthAccount]] = relationship(
-        "OAuthAccount", lazy="joined", cascade="all, delete-orphan"
+        "OAuthAccount", lazy="joined"
    )
    role: Mapped[UserRole] = mapped_column(
        Enum(UserRole, native_enum=False, default=UserRole.BASIC)
@@ -123,13 +122,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
    # if specified, controls the assistants that are shown to the user + their order
    # if not specified, all assistants are shown
    chosen_assistants: Mapped[list[int]] = mapped_column(
-        postgresql.JSONB(), nullable=False, default=[-2, -1, 0]
-    )
-    visible_assistants: Mapped[list[int]] = mapped_column(
-        postgresql.JSONB(), nullable=False, default=[]
-    )
-    hidden_assistants: Mapped[list[int]] = mapped_column(
-        postgresql.JSONB(), nullable=False, default=[]
+        postgresql.JSONB(), nullable=True
    )

    oidc_expiry: Mapped[datetime.datetime] = mapped_column(
@@ -177,9 +170,7 @@ class InputPrompt(Base):
    active: Mapped[bool] = mapped_column(Boolean)
    user: Mapped[User | None] = relationship("User", back_populates="input_prompts")
    is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)


 class InputPrompt__User(Base):
@@ -223,9 +214,7 @@ class Notification(Base):
    notif_type: Mapped[NotificationType] = mapped_column(
        Enum(NotificationType, native_enum=False)
    )
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    dismissed: Mapped[bool] = mapped_column(Boolean, default=False)
    last_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
@@ -260,7 +249,7 @@ class Persona__User(Base):

    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True)
    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
+        ForeignKey("user.id"), primary_key=True, nullable=True
    )


@@ -271,7 +260,7 @@ class DocumentSet__User(Base):
        ForeignKey("document_set.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
+        ForeignKey("user.id"), primary_key=True, nullable=True
    )


@@ -386,40 +375,21 @@ class ConnectorCredentialPair(Base):
    connector_id: Mapped[int] = mapped_column(
        ForeignKey("connector.id"), primary_key=True
    )
-
-    deletion_failure_message: Mapped[str | None] = mapped_column(String, nullable=True)
-
    credential_id: Mapped[int] = mapped_column(
        ForeignKey("credential.id"), primary_key=True
    )
    # controls whether the documents indexed by this CC pair are visible to all
    # or if they are only visible to those with that are given explicit access
    # (e.g. via owning the credential or being a part of a group that is given access)
-    access_type: Mapped[AccessType] = mapped_column(
-        Enum(AccessType, native_enum=False), nullable=False
-    )
-
-    # special info needed for the auto-sync feature. The exact structure depends on the
-
-    # source type (defined in the connector's `source` field)
-    # E.g. for google_drive perm sync:
-    # {"customer_id": "123567", "company_domain": "@danswer.ai"}
-    auto_sync_options: Mapped[dict[str, Any] | None] = mapped_column(
-        postgresql.JSONB(), nullable=True
-    )
-    last_time_perm_sync: Mapped[datetime.datetime | None] = mapped_column(
-        DateTime(timezone=True), nullable=True
+    is_public: Mapped[bool] = mapped_column(
+        Boolean,
+        default=True,
+        nullable=False,
    )
    # Time finished, not used for calculating backend jobs which uses time started (created)
    last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
-
-    # last successful prune
-    last_pruned: Mapped[datetime.datetime | None] = mapped_column(
-        DateTime(timezone=True), nullable=True, index=True
-    )
-
    total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0)

    connector: Mapped["Connector"] = relationship(
@@ -445,7 +415,6 @@ class ConnectorCredentialPair(Base):

 class Document(Base):
    __tablename__ = "document"
-    # NOTE: if more sensitive data is added here for display, make sure to add user/group permission

    # this should correspond to the ID of the document
    # (as is passed around in Danswer)
@@ -489,18 +458,7 @@ class Document(Base):
    secondary_owners: Mapped[list[str] | None] = mapped_column(
        postgresql.ARRAY(String), nullable=True
    )
-    # Permission sync columns
-    # Email addresses are saved at the document level for externally synced permissions
-    # This is becuase the normal flow of assigning permissions is through the cc_pair
-    # doesn't apply here
-    external_user_emails: Mapped[list[str] | None] = mapped_column(
-        postgresql.ARRAY(String), nullable=True
-    )
-    # These group ids have been prefixed by the source type
-    external_user_group_ids: Mapped[list[str] | None] = mapped_column(
-        postgresql.ARRAY(String), nullable=True
-    )
-    is_public: Mapped[bool] = mapped_column(Boolean, default=False)
+    # TODO if more sensitive data is added here for display, make sure to add user/group permission

    retrieval_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship(
        "DocumentRetrievalFeedback", back_populates="document"
@@ -580,9 +538,7 @@ class Credential(Base):

    id: Mapped[int] = mapped_column(primary_key=True)
    credential_json: Mapped[dict[str, Any]] = mapped_column(EncryptedJson())
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    # if `true`, then all Admins will have access to the credential
    admin_public: Mapped[bool] = mapped_column(Boolean, default=True)
    time_created: Mapped[datetime.datetime] = mapped_column(
@@ -906,12 +862,8 @@ class ChatSession(Base):
    __tablename__ = "chat_session"

    id: Mapped[int] = mapped_column(primary_key=True)
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
-    persona_id: Mapped[int | None] = mapped_column(
-        ForeignKey("persona.id"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
+    persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"))
    description: Mapped[str] = mapped_column(Text)
    # One-shot direct answering, currently the two types of chats are not mixed
    one_shot: Mapped[bool] = mapped_column(Boolean, default=False)
@@ -945,6 +897,7 @@ class ChatSession(Base):
    prompt_override: Mapped[PromptOverride | None] = mapped_column(
        PydanticType(PromptOverride), nullable=True
    )
+
    time_updated: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
@@ -953,6 +906,7 @@ class ChatSession(Base):
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
    )
+
    user: Mapped[User] = relationship("User", back_populates="chat_sessions")
    folder: Mapped["ChatFolder"] = relationship(
        "ChatFolder", back_populates="chat_sessions"
@@ -1045,9 +999,7 @@ class ChatFolder(Base):

    id: Mapped[int] = mapped_column(primary_key=True)
    # Only null if auth is off
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    name: Mapped[str | None] = mapped_column(String, nullable=True)
    display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=0)

@@ -1178,9 +1130,7 @@ class DocumentSet(Base):
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    name: Mapped[str] = mapped_column(String, unique=True)
    description: Mapped[str] = mapped_column(String)
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    # Whether changes to the document set have been propagated
    is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
    # If `False`, then the document set is not visible to users who are not explicitly
@@ -1224,9 +1174,7 @@ class Prompt(Base):
    __tablename__ = "prompt"

    id: Mapped[int] = mapped_column(primary_key=True)
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    name: Mapped[str] = mapped_column(String)
    description: Mapped[str] = mapped_column(String)
    system_prompt: Mapped[str] = mapped_column(Text)
@@ -1261,13 +1209,9 @@ class Tool(Base):
    openapi_schema: Mapped[dict[str, Any] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
-    custom_headers: Mapped[list[dict[str, str]] | None] = mapped_column(
-        postgresql.JSONB(), nullable=True
-    )
+
    # user who created / owns the tool. Will be None for built-in tools.
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)

    user: Mapped[User | None] = relationship("User", back_populates="custom_tools")
    # Relationship to Persona through the association table
@@ -1291,9 +1235,7 @@ class Persona(Base):
    __tablename__ = "persona"

    id: Mapped[int] = mapped_column(primary_key=True)
-    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
-    )
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
    name: Mapped[str] = mapped_column(String)
    description: Mapped[str] = mapped_column(String)
    # Number of chunks to pass to the LLM for generation.
@@ -1322,18 +1264,9 @@ class Persona(Base):
    starter_messages: Mapped[list[StarterMessage] | None] = mapped_column(
        postgresql.JSONB(), nullable=True
    )
-    search_start_date: Mapped[datetime.datetime | None] = mapped_column(
-        DateTime(timezone=True), default=None
-    )
-    # Built-in personas are configured via backend during deployment
+    # Default personas are configured via backend during deployment
    # Treated specially (cannot be user edited etc.)
-    builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False)
-
-    # Default personas are personas created by admins and are automatically added
-    # to all users' assistants list.
-    is_default_persona: Mapped[bool] = mapped_column(
-        Boolean, default=False, nullable=False
-    )
+    default_persona: Mapped[bool] = mapped_column(Boolean, default=False)
    # controls whether the persona is available to be selected by users
    is_visible: Mapped[bool] = mapped_column(Boolean, default=True)
    # controls the ordering of personas in the UI
@@ -1384,10 +1317,10 @@ class Persona(Base):
    # Default personas loaded via yaml cannot have the same name
    __table_args__ = (
        Index(
-            "_builtin_persona_name_idx",
+            "_default_persona_name_idx",
            "name",
            unique=True,
-            postgresql_where=(builtin_persona == True),  # noqa: E712
+            postgresql_where=(default_persona == True),  # noqa: E712
        ),
    )

@@ -1411,6 +1344,53 @@ class ChannelConfig(TypedDict):
    follow_up_tags: NotRequired[list[str]]


+class StandardAnswerCategory(Base):
+    __tablename__ = "standard_answer_category"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    name: Mapped[str] = mapped_column(String, unique=True)
+    standard_answers: Mapped[list["StandardAnswer"]] = relationship(
+        "StandardAnswer",
+        secondary=StandardAnswer__StandardAnswerCategory.__table__,
+        back_populates="categories",
+    )
+    slack_bot_configs: Mapped[list["SlackBotConfig"]] = relationship(
+        "SlackBotConfig",
+        secondary=SlackBotConfig__StandardAnswerCategory.__table__,
+        back_populates="standard_answer_categories",
+    )
+
+
+class StandardAnswer(Base):
+    __tablename__ = "standard_answer"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    keyword: Mapped[str] = mapped_column(String)
+    answer: Mapped[str] = mapped_column(String)
+    active: Mapped[bool] = mapped_column(Boolean)
+
+    __table_args__ = (
+        Index(
+            "unique_keyword_active",
+            keyword,
+            active,
+            unique=True,
+            postgresql_where=(active == True),  # noqa: E712
+        ),
+    )
+
+    categories: Mapped[list[StandardAnswerCategory]] = relationship(
+        "StandardAnswerCategory",
+        secondary=StandardAnswer__StandardAnswerCategory.__table__,
+        back_populates="standard_answers",
+    )
+    chat_messages: Mapped[list[ChatMessage]] = relationship(
+        "ChatMessage",
+        secondary=ChatMessage__StandardAnswer.__table__,
+        back_populates="standard_answers",
+    )
+
+
 class SlackBotResponseType(str, PyEnum):
    QUOTES = "quotes"
    CITATIONS = "citations"
@@ -1436,7 +1416,7 @@ class SlackBotConfig(Base):
    )

    persona: Mapped[Persona | None] = relationship("Persona")
-    standard_answer_categories: Mapped[list["StandardAnswerCategory"]] = relationship(
+    standard_answer_categories: Mapped[list[StandardAnswerCategory]] = relationship(
        "StandardAnswerCategory",
        secondary=SlackBotConfig__StandardAnswerCategory.__table__,
        back_populates="slack_bot_configs",
@@ -1498,9 +1478,7 @@ class SamlAccount(Base):
    __tablename__ = "saml"

    id: Mapped[int] = mapped_column(primary_key=True)
-    user_id: Mapped[int] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), unique=True
-    )
+    user_id: Mapped[int] = mapped_column(ForeignKey("user.id"), unique=True)
    encrypted_cookie: Mapped[str] = mapped_column(Text, unique=True)
    expires_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
    updated_at: Mapped[datetime.datetime] = mapped_column(
@@ -1519,7 +1497,7 @@ class User__UserGroup(Base):
        ForeignKey("user_group.id"), primary_key=True
    )
    user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True
+        ForeignKey("user.id"), primary_key=True, nullable=True
    )


@@ -1668,72 +1646,94 @@ class TokenRateLimit__UserGroup(Base):
    )


-class StandardAnswerCategory(Base):
-    __tablename__ = "standard_answer_category"
-
-    id: Mapped[int] = mapped_column(primary_key=True)
-    name: Mapped[str] = mapped_column(String, unique=True)
-    standard_answers: Mapped[list["StandardAnswer"]] = relationship(
-        "StandardAnswer",
-        secondary=StandardAnswer__StandardAnswerCategory.__table__,
-        back_populates="categories",
-    )
-    slack_bot_configs: Mapped[list["SlackBotConfig"]] = relationship(
-        "SlackBotConfig",
-        secondary=SlackBotConfig__StandardAnswerCategory.__table__,
-        back_populates="standard_answer_categories",
-    )
-
-
-class StandardAnswer(Base):
-    __tablename__ = "standard_answer"
-
-    id: Mapped[int] = mapped_column(primary_key=True)
-    keyword: Mapped[str] = mapped_column(String)
-    answer: Mapped[str] = mapped_column(String)
-    active: Mapped[bool] = mapped_column(Boolean)
-    match_regex: Mapped[bool] = mapped_column(Boolean)
-    match_any_keywords: Mapped[bool] = mapped_column(Boolean)
-
-    __table_args__ = (
-        Index(
-            "unique_keyword_active",
-            keyword,
-            active,
-            unique=True,
-            postgresql_where=(active == True),  # noqa: E712
-        ),
-    )
-
-    categories: Mapped[list[StandardAnswerCategory]] = relationship(
-        "StandardAnswerCategory",
-        secondary=StandardAnswer__StandardAnswerCategory.__table__,
-        back_populates="standard_answers",
-    )
-    chat_messages: Mapped[list[ChatMessage]] = relationship(
-        "ChatMessage",
-        secondary=ChatMessage__StandardAnswer.__table__,
-        back_populates="standard_answers",
-    )
-
-
 """Tables related to Permission Sync"""


-class User__ExternalUserGroupId(Base):
+class PermissionSyncStatus(str, PyEnum):
+    IN_PROGRESS = "in_progress"
+    SUCCESS = "success"
+    FAILED = "failed"
+
+
+class PermissionSyncJobType(str, PyEnum):
+    USER_LEVEL = "user_level"
+    GROUP_LEVEL = "group_level"
+
+
+class PermissionSyncRun(Base):
+    """Represents one run of a permission sync job. For some given cc_pair, it is either sync-ing
+    the users or it is sync-ing the groups"""
+
+    __tablename__ = "permission_sync_run"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    # Not strictly needed but makes it easy to use without fetching from cc_pair
+    source_type: Mapped[DocumentSource] = mapped_column(
+        Enum(DocumentSource, native_enum=False)
+    )
+    # Currently all sync jobs are handled as a group permission sync or a user permission sync
+    update_type: Mapped[PermissionSyncJobType] = mapped_column(
+        Enum(PermissionSyncJobType)
+    )
+    cc_pair_id: Mapped[int | None] = mapped_column(
+        ForeignKey("connector_credential_pair.id"), nullable=True
+    )
+    status: Mapped[PermissionSyncStatus] = mapped_column(Enum(PermissionSyncStatus))
+    error_msg: Mapped[str | None] = mapped_column(Text, default=None)
+    updated_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
+    )
+
+    cc_pair: Mapped[ConnectorCredentialPair] = relationship("ConnectorCredentialPair")
+
+
+class ExternalPermission(Base):
    """Maps user info both internal and external to the name of the external group
    This maps the user to all of their external groups so that the external group name can be
    attached to the ACL list matching during query time. User level permissions can be handled by
    directly adding the Danswer user to the doc ACL list"""

-    __tablename__ = "user__external_user_group_id"
+    __tablename__ = "external_permission"

-    user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True)
-    # These group ids have been prefixed by the source type
-    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)
-    cc_pair_id: Mapped[int] = mapped_column(
-        ForeignKey("connector_credential_pair.id"), primary_key=True
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
+    # Email is needed because we want to keep track of users not in Danswer to simplify process
+    # when the user joins
+    user_email: Mapped[str] = mapped_column(String)
+    source_type: Mapped[DocumentSource] = mapped_column(
+        Enum(DocumentSource, native_enum=False)
    )
+    external_permission_group: Mapped[str] = mapped_column(String)
+    user = relationship("User")
+
+
+class EmailToExternalUserCache(Base):
+    """A way to map users IDs in the external tool to a user in Danswer or at least an email for
+    when the user joins. Used as a cache for when fetching external groups which have their own
+    user ids, this can easily be mapped back to users already known in Danswer without needing
+    to call external APIs to get the user emails.
+
+    This way when groups are updated in the external tool and we need to update the mapping of
+    internal users to the groups, we can sync the internal users to the external groups they are
+    part of using this.
+
+    Ie. User Chris is part of groups alpha, beta, and we can update this if Chris is no longer
+    part of alpha in some external tool.
+    """
+
+    __tablename__ = "email_to_external_user_cache"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    external_user_id: Mapped[str] = mapped_column(String)
+    user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True)
+    # Email is needed because we want to keep track of users not in Danswer to simplify process
+    # when the user joins
+    user_email: Mapped[str] = mapped_column(String)
+    source_type: Mapped[DocumentSource] = mapped_column(
+        Enum(DocumentSource, native_enum=False)
+    )
+
+    user = relationship("User")


 class UsageReport(Base):
@@ -1749,7 +1749,7 @@ class UsageReport(Base):

    # if None, report was auto-generated
    requestor_user_id: Mapped[UUID | None] = mapped_column(
-        ForeignKey("user.id", ondelete="CASCADE"), nullable=True
+        ForeignKey("user.id"), nullable=True
    )
    time_created: Mapped[datetime.datetime] = mapped_column(
        DateTime(timezone=True), server_default=func.now()
--- a/backend/danswer/db/persona.py
+++ b/backend/danswer/db/persona.py
@@ -1,5 +1,4 @@
 from collections.abc import Sequence
-from datetime import datetime
 from functools import lru_cache
 from uuid import UUID

@@ -179,7 +178,6 @@ def create_update_persona(
    except ValueError as e:
        logger.exception("Failed to create persona")
        raise HTTPException(status_code=400, detail=str(e))
-
    return PersonaSnapshot.from_model(persona)


@@ -212,22 +210,6 @@ def update_persona_shared_users(
    )


-def update_persona_public_status(
-    persona_id: int,
-    is_public: bool,
-    db_session: Session,
-    user: User | None,
-) -> None:
-    persona = fetch_persona_by_id(
-        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
-    )
-    if user and user.role != UserRole.ADMIN and persona.user_id != user.id:
-        raise ValueError("You don't have permission to modify this persona")
-
-    persona.is_public = is_public
-    db_session.commit()
-
-
 def get_prompts(
    user_id: UUID | None,
    db_session: Session,
@@ -260,7 +242,7 @@ def get_personas(
    stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable)

    if not include_default:
-        stmt = stmt.where(Persona.builtin_persona.is_(False))
+        stmt = stmt.where(Persona.default_persona.is_(False))
    if not include_slack_bot_personas:
        stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX)))
    if not include_deleted:
@@ -308,7 +290,7 @@ def mark_delete_persona_by_name(
 ) -> None:
    stmt = (
        update(Persona)
-        .where(Persona.name == persona_name, Persona.builtin_persona == is_default)
+        .where(Persona.name == persona_name, Persona.default_persona == is_default)
        .values(deleted=True)
    )

@@ -408,6 +390,7 @@ def upsert_persona(
    document_set_ids: list[int] | None = None,
    tool_ids: list[int] | None = None,
    persona_id: int | None = None,
+    default_persona: bool = False,
    commit: bool = True,
    icon_color: str | None = None,
    icon_shape: int | None = None,
@@ -415,9 +398,6 @@ def upsert_persona(
    display_priority: int | None = None,
    is_visible: bool = True,
    remove_image: bool | None = None,
-    search_start_date: datetime | None = None,
-    builtin_persona: bool = False,
-    is_default_persona: bool = False,
    chunks_above: int = CONTEXT_CHUNKS_ABOVE,
    chunks_below: int = CONTEXT_CHUNKS_BELOW,
 ) -> Persona:
@@ -458,8 +438,8 @@ def upsert_persona(
        validate_persona_tools(tools)

    if persona:
-        if not builtin_persona and persona.builtin_persona:
-            raise ValueError("Cannot update builtin persona with non-builtin.")
+        if not default_persona and persona.default_persona:
+            raise ValueError("Cannot update default persona with non-default.")

        # this checks if the user has permission to edit the persona
        persona = fetch_persona_by_id(
@@ -474,7 +454,7 @@ def upsert_persona(
        persona.llm_relevance_filter = llm_relevance_filter
        persona.llm_filter_extraction = llm_filter_extraction
        persona.recency_bias = recency_bias
-        persona.builtin_persona = builtin_persona
+        persona.default_persona = default_persona
        persona.llm_model_provider_override = llm_model_provider_override
        persona.llm_model_version_override = llm_model_version_override
        persona.starter_messages = starter_messages
@@ -486,8 +466,6 @@ def upsert_persona(
            persona.uploaded_image_id = uploaded_image_id
        persona.display_priority = display_priority
        persona.is_visible = is_visible
-        persona.search_start_date = search_start_date
-        persona.is_default_persona = is_default_persona

        # Do not delete any associations manually added unless
        # a new updated list is provided
@@ -515,7 +493,7 @@ def upsert_persona(
            llm_relevance_filter=llm_relevance_filter,
            llm_filter_extraction=llm_filter_extraction,
            recency_bias=recency_bias,
-            builtin_persona=builtin_persona,
+            default_persona=default_persona,
            prompts=prompts or [],
            document_sets=document_sets or [],
            llm_model_provider_override=llm_model_provider_override,
@@ -527,8 +505,6 @@ def upsert_persona(
            uploaded_image_id=uploaded_image_id,
            display_priority=display_priority,
            is_visible=is_visible,
-            search_start_date=search_start_date,
-            is_default_persona=is_default_persona,
        )
        db_session.add(persona)

@@ -558,7 +534,7 @@ def delete_old_default_personas(
    Need a more graceful fix later or those need to never have IDs"""
    stmt = (
        update(Persona)
-        .where(Persona.builtin_persona, Persona.id > 0)
+        .where(Persona.default_persona, Persona.id > 0)
        .values(deleted=True, name=func.concat(Persona.name, "_old"))
    )

@@ -575,7 +551,6 @@ def update_persona_visibility(
    persona = fetch_persona_by_id(
        db_session=db_session, persona_id=persona_id, user=user, get_editable=True
    )
-
    persona.is_visible = is_visible
    db_session.commit()

@@ -588,15 +563,13 @@ def validate_persona_tools(tools: list[Tool]) -> None:
            )


-def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> list[Prompt]:
+def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> Sequence[Prompt]:
    """Unsafe, can fetch prompts from all users"""
    if not prompt_ids:
        return []
-    prompts = db_session.scalars(
-        select(Prompt).where(Prompt.id.in_(prompt_ids)).where(Prompt.deleted.is_(False))
-    ).all()
+    prompts = db_session.scalars(select(Prompt).where(Prompt.id.in_(prompt_ids))).all()

-    return list(prompts)
+    return prompts


 def get_prompt_by_id(
@@ -677,7 +650,9 @@ def get_persona_by_id(
        result = db_session.execute(persona_stmt)
        persona = result.scalar_one_or_none()
        if persona is None:
-            raise ValueError(f"Persona with ID {persona_id} does not exist")
+            raise ValueError(
+                f"Persona with ID {persona_id} does not exist or does not belong to user"
+            )
        return persona

    # or check if user owns persona
@@ -740,7 +715,7 @@ def delete_persona_by_name(
    persona_name: str, db_session: Session, is_default: bool = True
 ) -> None:
    stmt = delete(Persona).where(
-        Persona.name == persona_name, Persona.builtin_persona == is_default
+        Persona.name == persona_name, Persona.default_persona == is_default
    )

    db_session.execute(stmt)
--- a/backend/danswer/db/search_settings.py
+++ b/backend/danswer/db/search_settings.py
@@ -20,7 +20,6 @@ from danswer.db.models import IndexModelStatus
 from danswer.db.models import SearchSettings
 from danswer.indexing.models import IndexingSetting
 from danswer.natural_language_processing.search_nlp_models import clean_model_name
-from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder
 from danswer.search.models import SavedSearchSettings
 from danswer.server.manage.embedding.models import (
    CloudEmbeddingProvider as ServerCloudEmbeddingProvider,
@@ -181,14 +180,6 @@ def update_current_search_settings(
        logger.warning("No current search settings found to update")
        return

-    # Whenever we update the current search settings, we should ensure that the local reranking model is warmed up.
-    if (
-        search_settings.rerank_provider_type is None
-        and search_settings.rerank_model_name is not None
-        and current_settings.rerank_model_name != search_settings.rerank_model_name
-    ):
-        warm_up_cross_encoder(search_settings.rerank_model_name)
-
    update_search_settings(current_settings, search_settings, preserved_fields)
    db_session.commit()
    logger.info("Current search settings updated successfully")
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
hagen-danswer	d304e3fd0f	gotem	2024-09-11 11:41:40 -07:00
hagen-danswer	475e528697	perhaps now this will work	2024-09-11 11:39:26 -07:00
hagen-danswer	2392751051	wb this?	2024-09-11 11:37:21 -07:00
hagen-danswer	82b5934597	trya again	2024-09-11 11:35:55 -07:00
hagen-danswer	bfe695dd1e	refactor	2024-09-11 11:28:03 -07:00
hagen-danswer	9090391275	fixed no cap	2024-09-11 11:22:50 -07:00
hagen-danswer	6e85807640	fixed fr	2024-09-11 11:21:20 -07:00
hagen-danswer	183bdcaf5d	fixed	2024-09-11 11:18:21 -07:00
hagen-danswer	b228675b1b	Made PR checks only run when relevant files are changed	2024-09-11 11:07:56 -07:00
				`@@ -1 +0,0 @@`
				`backend/tests/integration/tests/pruning/website`