Debug test

2026-02-19 08:45:47 +00:00 · 2024-09-23 11:05:27 -07:00
475 changed files with 5013 additions and 43425 deletions
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -32,20 +32,16 @@ inputs:
    description: 'Cache destinations'
    required: false
  retry-wait-time:
-    description: 'Time to wait before attempt 2 in seconds'
+    description: 'Time to wait before retry in seconds'
    required: false
-    default: '60'
-  retry-wait-time-2:
-    description: 'Time to wait before attempt 3 in seconds'
-    required: false
-    default: '120'
+    default: '5'

 runs:
  using: "composite"
  steps:
-    - name: Build and push Docker image (Attempt 1 of 3)
+    - name: Build and push Docker image (First Attempt)
      id: buildx1
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@v5
      continue-on-error: true
      with:
        context: ${{ inputs.context }}
@@ -58,17 +54,16 @@ runs:
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}

-    - name: Wait before attempt 2
+    - name: Wait to retry
      if: steps.buildx1.outcome != 'success'
      run: |
        echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
        sleep ${{ inputs.retry-wait-time }}
      shell: bash

-    - name: Build and push Docker image (Attempt 2 of 3)
-      id: buildx2
+    - name: Build and push Docker image (Retry Attempt)
      if: steps.buildx1.outcome != 'success'
-      uses: docker/build-push-action@v6
+      uses: docker/build-push-action@v5
      with:
        context: ${{ inputs.context }}
        file: ${{ inputs.file }}
@@ -79,31 +74,3 @@ runs:
        tags: ${{ inputs.tags }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}
-
-    - name: Wait before attempt 3
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      run: |
-        echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
-        sleep ${{ inputs.retry-wait-time-2 }}
-      shell: bash
-
-    - name: Build and push Docker image (Attempt 3 of 3)
-      id: buildx3
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      uses: docker/build-push-action@v6
-      with:
-        context: ${{ inputs.context }}
-        file: ${{ inputs.file }}
-        platforms: ${{ inputs.platforms }}
-        pull: ${{ inputs.pull }}
-        push: ${{ inputs.push }}
-        load: ${{ inputs.load }}
-        tags: ${{ inputs.tags }}
-        cache-from: ${{ inputs.cache-from }}
-        cache-to: ${{ inputs.cache-to }}
-
-    - name: Report failure
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
-      run: |
-        echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
-      shell: bash
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -7,17 +7,16 @@ on:

 env:
  REGISTRY_IMAGE: danswer/danswer-backend
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build-and-push:
-    # TODO: investigate a matrix build like the web container 
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    # TODO: make this a matrix build like the web containers
+    runs-on: 
+      group: amd64-image-builders

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@v2

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
@@ -32,7 +31,7 @@ jobs:
      run: |
        sudo apt-get update
        sudo apt-get install -y build-essential
-          
+        
    - name: Backend Image Docker Build and Push
      uses: docker/build-push-action@v5
      with:
@@ -42,20 +41,12 @@ jobs:
        push: true
        tags: |
          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          ${{ env.REGISTRY_IMAGE }}:latest
        build-args: |
          DANSWER_VERSION=${{ github.ref_name }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
      with:
        # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
        image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -5,18 +5,14 @@ on:
    tags:
      - '*'

-env:
-  REGISTRY_IMAGE: danswer/danswer-model-server
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
 jobs:
  build-and-push:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: 
+      group: amd64-image-builders

    steps:
    - name: Checkout code
-      uses: actions/checkout@v4
+      uses: actions/checkout@v2

    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
@@ -35,21 +31,13 @@ jobs:
        platforms: linux/amd64,linux/arm64
        push: true
        tags: |
-          ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          danswer/danswer-model-server:${{ github.ref_name }}
+          danswer/danswer-model-server:latest
        build-args: |
          DANSWER_VERSION=${{ github.ref_name }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
    - name: Run Trivy vulnerability scanner
      uses: aquasecurity/trivy-action@master
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
      with:
        image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
        severity: 'CRITICAL,HIGH'
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -7,15 +7,11 @@ on:

 env:
  REGISTRY_IMAGE: danswer/danswer-web-server
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build:
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
+    runs-on: 
+      group: ${{ matrix.platform == 'linux/amd64' && 'amd64-image-builders' || 'arm64-image-builders' }}
    strategy:
      fail-fast: false
      matrix:
@@ -39,7 +35,7 @@ jobs:
          images: ${{ env.REGISTRY_IMAGE }}
          tags: |
            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+            type=raw,value=${{ env.REGISTRY_IMAGE }}:latest
      
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -116,16 +112,8 @@ jobs:
        run: |
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-          TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: 'CRITICAL,HIGH'
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -1,6 +1,3 @@
-# This workflow is set up to be manually triggered via the GitHub Action tab.
-# Given a version, it will tag those backend and webserver images as "latest".
-
 name: Tag Latest Version

 on:
@@ -12,9 +9,7 @@ on:

 jobs:
  tag:
-    # See https://runs-on.com/runners/linux/
-    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest
    steps:
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v1
--- a/.github/workflows/hotfix-release-branches.yml
+++ b/.github/workflows/hotfix-release-branches.yml
@@ -1,172 +0,0 @@
-# This workflow is intended to be manually triggered via the GitHub Action tab.
-# Given a hotfix branch, it will attempt to open a PR to all release branches and
-# by default auto merge them
-
-name: Hotfix release branches
-
-on:
-  workflow_dispatch:
-    inputs:
-      hotfix_commit:
-        description: 'Hotfix commit hash'
-        required: true
-      hotfix_suffix:
-        description: 'Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})'
-        required: true
-      release_branch_pattern:
-        description: 'Release branch pattern (regex)'
-        required: true
-        default: 'release/.*'
-      auto_merge:
-        description: 'Automatically merge the hotfix PRs'
-        required: true
-        type: choice
-        default: 'true'
-        options:
-          - true
-          - false
-          
-jobs:
-  hotfix_release_branches:
-    permissions: write-all
-    # See https://runs-on.com/runners/linux/
-    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-    steps:
-    
-      # needs RKUO_DEPLOY_KEY for write access to merge PR's
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
-          fetch-depth: 0
-          
-      - name: Set up Git user
-        run: |
-          git config user.name "Richard Kuo [bot]"
-          git config user.email "rkuo[bot]@danswer.ai"
-
-      - name: Fetch All Branches
-        run: |
-          git fetch --all --prune
-
-      - name: Verify Hotfix Commit Exists
-        run: |
-          git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; }
-
-      - name: Get Release Branches
-        id: get_release_branches
-        run: |
-          BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ')
-          if [ -z "$BRANCHES" ]; then
-            echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
-            exit 1
-          fi
-          
-          echo "Found release branches:"
-          echo "$BRANCHES"
-          
-          # Join the branches into a single line separated by commas
-          BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')
-
-          # Set the branches as an output
-          echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT
-
-      # notes on all the vagaries of wiring up automated PR's
-      # https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs
-      # we must use a custom token for GH_TOKEN to trigger the subsequent PR checks
-      - name: Create and Merge Pull Requests to Matching Release Branches
-        env:
-          HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }}
-          HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }}
-          AUTO_MERGE: ${{ github.event.inputs.auto_merge }}
-          GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }}
-        run: |
-          # Get the branches from the previous step
-          BRANCHES="${{ steps.get_release_branches.outputs.branches }}"
-
-          # Convert BRANCHES to an array
-          IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES"
-
-          # Loop through each release branch and create and merge a PR
-          for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do
-            echo "Processing $RELEASE_BRANCH..."
-            
-            # Parse out the release version by removing "release/" from the branch name
-            RELEASE_VERSION=${RELEASE_BRANCH#release/}
-            echo "Release version parsed: $RELEASE_VERSION"
-            
-            HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}"
-            echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH"
-
-            # Checkout the release branch
-            echo "Checking out $RELEASE_BRANCH"
-            git checkout "$RELEASE_BRANCH"
-
-            # Create the new hotfix branch
-            if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then
-              echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation."
-            else
-              echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH"
-              git checkout -b "$HOTFIX_BRANCH"
-            fi
-            
-            # Check if the hotfix commit is a merge commit
-            if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then
-              # -m 1 uses the target branch as the base (which is what we want)
-              echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick"
-              CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT"
-            else
-              CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT"
-            fi
-
-            # Perform the cherry-pick
-            echo "Executing: $CHERRY_PICK_CMD"
-            eval "$CHERRY_PICK_CMD"
-
-            if [ $? -ne 0 ]; then
-              echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..."
-              git cherry-pick --abort
-              continue
-            fi
-
-            # Push the hotfix branch to the remote
-            echo "Pushing $HOTFIX_BRANCH..."
-            git push origin "$HOTFIX_BRANCH"
-            echo "Hotfix branch $HOTFIX_BRANCH created and pushed."
-            
-            # Check if PR already exists
-            EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number')
-
-            if [ -n "$EXISTING_PR" ]; then
-              echo "An open PR already exists: #$EXISTING_PR. Skipping..."
-              continue
-            fi
-            
-            # Create a new PR and capture the output
-            PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \
-              --body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \
-              --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH")
-
-            # Extract the URL from the output
-            PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+')
-            echo "Pull request created: $PR_URL"
-
-            # Extract PR number from URL
-            PR_NUMBER=$(basename "$PR_URL")
-            echo "Pull request created: $PR_NUMBER"
-
-            if [ "$AUTO_MERGE" == "true" ]; then
-              echo "Attempting to merge pull request #$PR_NUMBER"
-
-              # Attempt to merge the PR
-              gh pr merge "$PR_NUMBER" --merge --auto --delete-branch
-
-              if [ $? -eq 0 ]; then
-                echo "Pull request #$PR_NUMBER merged successfully."
-              else
-                # Optionally, handle the error or continue
-                echo "Failed to merge pull request #$PR_NUMBER."
-              fi
-            fi
-          done
--- a/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
+++ b/.github/workflows/pr-helm-chart-testing.yml.disabled.txt
@@ -12,8 +12,7 @@ on:

 jobs:
  lint-test:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+    runs-on: Amd64

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -3,14 +3,11 @@ name: Python Checks
 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 jobs:
  mypy-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest

    steps:
    - name: Checkout code
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -15,14 +15,10 @@ env:
  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  # Jira
-  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
-  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

 jobs:
  connectors-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest

    env:
      PYTHONPATH: ./backend
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,58 +0,0 @@
-name: Connector Tests
-
-on:
-  schedule:
-    # This cron expression runs the job daily at 16:00 UTC (9am PT)
-    - cron: "0 16 * * *"
-
-env:
-  # Bedrock
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
-
-  # OpenAI
-  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-
-jobs:
-  connectors-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
-
-    env:
-      PYTHONPATH: ./backend
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-
-      - name: Run Tests
-        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
-        run: |
-          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
-          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
-
-      - name: Alert on Failure
-        if: failure() && github.event_name == 'schedule'
-        env:
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-        run: |
-          curl -X POST \
-            -H 'Content-type: application/json' \
-            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
-            $SLACK_WEBHOOK
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -3,14 +3,11 @@ name: Python Unit Tests
 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 jobs:
  backend-check:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest

    env:
      PYTHONPATH: ./backend
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -1,6 +1,6 @@
 name: Quality Checks PR
 concurrency:
-  group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Quality-Checks-PR-${{ github.head_ref }}
  cancel-in-progress: true

 on:
@@ -9,8 +9,7 @@ on:

 jobs:
  quality-checks:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
--- a/.github/workflows/pr-Integration-tests.yml
+++ b/.github/workflows/pr-Integration-tests.yml
@@ -1,23 +1,19 @@
-name: Run Integration Tests v2
+name: Run Integration Tests
 concurrency:
-  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Integration-Tests-${{ github.head_ref }}
  cancel-in-progress: true

 on:
  merge_group:
  pull_request:
-    branches:
-      - main
-      - 'release/**'
+    branches: [ main ]

 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

 jobs:
  integration-tests:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    runs-on: Amd64
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -31,35 +27,25 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-      
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds 
-      # successfully.
+      # NOTE: we don't need to build the Web Docker image since it's not used
+      # during the IT for now. We have a separate action to verify it builds 
+      # succesfully
      - name: Pull Web Docker image
        run: |
          docker pull danswer/danswer-web-server:latest
-          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test
+          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it

-      # we use the runs-on cache for docker builds
-      # in conjunction with runs-on runners, it has better speed and unlimited caching
-      # https://runs-on.com/caching/s3-cache-for-github-actions/
-      # https://runs-on.com/caching/docker/
-      # https://github.com/moby/buildkit#s3-cache-experimental
-      
-      # images are built and run locally for testing purposes. Not pushed.
      - name: Build Backend Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-backend:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          tags: danswer/danswer-backend:it
+          cache-from: type=registry,ref=danswer/danswer-backend:it
+          cache-to: |
+            type=registry,ref=danswer/danswer-backend:it,mode=max
+            type=inline

      - name: Build Model Server Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -67,11 +53,11 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/amd64
-          tags: danswer/danswer-model-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          tags: danswer/danswer-model-server:it
+          cache-from: type=registry,ref=danswer/danswer-model-server:it
+          cache-to: |
+            type=registry,ref=danswer/danswer-model-server:it,mode=max
+            type=inline

      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -79,11 +65,11 @@ jobs:
          context: ./backend
          file: ./backend/tests/integration/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-integration:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          tags: danswer/integration-test-runner:it
+          cache-from: type=registry,ref=danswer/integration-test-runner:it
+          cache-to: |
+            type=registry,ref=danswer/integration-test-runner:it,mode=max
+            type=inline

      - name: Start Docker containers
        run: |
@@ -92,7 +78,7 @@ jobs:
          AUTH_TYPE=basic \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
+          IMAGE_TAG=it \
          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

@@ -134,7 +120,6 @@ jobs:
        run: |
          echo "Running integration tests..."
          docker run --rm --network danswer-stack_default \
-            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
@@ -143,9 +128,7 @@ jobs:
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            danswer/danswer-integration:test
+            danswer/integration-test-runner:it
        continue-on-error: true
        id: run_tests

--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -1,54 +0,0 @@
-name: Nightly Tag Push
-
-on:
-  schedule:
-    - cron: '0 10 * * *' # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
-
-permissions:
-  contents: write  # Allows pushing tags to the repository
-
-jobs:
-  create-and-push-tag:
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-
-    steps:
-    # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
-    # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
-    # implement here which needs an actual user's deploy key
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
-
-    - name: Set up Git user
-      run: |
-        git config user.name "Richard Kuo [bot]"
-        git config user.email "rkuo[bot]@danswer.ai"
-
-    - name: Check for existing nightly tag
-      id: check_tag
-      run: |
-        if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
-          echo "A tag starting with 'nightly-latest' already exists on HEAD."
-          echo "tag_exists=true" >> $GITHUB_OUTPUT
-        else
-          echo "No tag starting with 'nightly-latest' exists on HEAD."
-          echo "tag_exists=false" >> $GITHUB_OUTPUT
-        fi
-        
-    # don't tag again if HEAD already has a nightly-latest tag on it
-    - name: Create Nightly Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      env:
-        DATE: ${{ github.run_id }}
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        echo "Creating tag: $TAG_NAME"
-        git tag $TAG_NAME
-
-    - name: Push Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        git push origin $TAG_NAME
-        
--- a/.prettierignore
+++ b/.prettierignore
@@ -1 +0,0 @@
-backend/tests/integration/tests/pruning/website
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,7 +22,7 @@ Your input is vital to making sure that Danswer moves in the right direction.
 Before starting on implementation, please raise a GitHub issue.

 And always feel free to message us (Chris Weaver / Yuhong Sun) on 
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ) / 
+[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ) / 
 [Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all. 


--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -101,7 +101,7 @@ COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connect
 # Put logo in assets
 COPY ./assets /app/assets

-ENV PYTHONPATH=/app
+ENV PYTHONPATH /app

 # Default command which does nothing
 # This container is used by api server and background which specify their own CMD
--- a/backend/Dockerfile.cloud
+++ b/backend/Dockerfile.cloud
@@ -1,109 +0,0 @@
-FROM python:3.11.7-slim-bookworm
-
-LABEL com.danswer.maintainer="founders@danswer.ai"
-LABEL com.danswer.description="This image is the web/frontend container of Danswer which \
-contains code for both the Community and Enterprise editions of Danswer. If you do not \
-have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
-Edition features outside of personal development or testing purposes. Please reach out to \
-founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer"
-
-# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
-ARG DANSWER_VERSION=0.3-dev
-ENV DANSWER_VERSION=${DANSWER_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true"
-
-RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
-# Install system dependencies
-# cmake needed for psycopg (postgres)
-# libpq-dev needed for psycopg (postgres)
-# curl included just for users' convenience
-# zip for Vespa step futher down
-# ca-certificates for HTTPS
-RUN apt-get update && \
-    apt-get install -y \
-        cmake \
-        curl \
-        zip \
-        ca-certificates \
-        libgnutls30=3.7.9-2+deb12u3 \
-        libblkid1=2.38.1-5+deb12u1 \
-        libmount1=2.38.1-5+deb12u1 \
-        libsmartcols1=2.38.1-5+deb12u1 \
-        libuuid1=2.38.1-5+deb12u1 \
-        libxmlsec1-dev \
-        pkg-config \
-        gcc && \
-    rm -rf /var/lib/apt/lists/* && \
-    apt-get clean
-
-# Install Python dependencies
-# Remove py which is pulled in by retry, py is not needed and is a CVE
-COPY ./requirements/default.txt /tmp/requirements.txt
-COPY ./requirements/ee.txt /tmp/ee-requirements.txt
-RUN pip install --no-cache-dir --upgrade \
-        --retries 5 \
-        --timeout 30 \
-        -r /tmp/requirements.txt \
-        -r /tmp/ee-requirements.txt && \
-    pip uninstall -y py && \
-    playwright install chromium && \
-    playwright install-deps chromium && \
-    ln -s /usr/local/bin/supervisord /usr/bin/supervisord
-
-# Cleanup for CVEs and size reduction
-# https://github.com/tornadoweb/tornado/issues/3107
-# xserver-common and xvfb included by playwright installation but not needed after
-# perl-base is part of the base Python Debian image but not needed for Danswer functionality
-# perl-base could only be removed with --allow-remove-essential
-RUN apt-get update && \
-    apt-get remove -y --allow-remove-essential \
-        perl-base \
-        xserver-common \
-        xvfb \
-        cmake \
-        libldap-2.5-0 \
-        libxmlsec1-dev \
-        pkg-config \
-        gcc && \
-    apt-get install -y libxmlsec1-openssl && \
-    apt-get autoremove -y && \
-    rm -rf /var/lib/apt/lists/* && \
-    rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
-
-# Pre-downloading models for setups with limited egress
-RUN python -c "from tokenizers import Tokenizer; \
-Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
-
-
-# Pre-downloading NLTK for setups with limited egress
-RUN python -c "import nltk; \
-nltk.download('stopwords', quiet=True); \
-nltk.download('punkt', quiet=True);"
-# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
-
-# Set up application files
-WORKDIR /app
-
-# Enterprise Version Files
-COPY ./ee /app/ee
-COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
-
-# Set up application files
-COPY ./danswer /app/danswer
-COPY ./shared_configs /app/shared_configs
-COPY ./alembic /app/alembic
-COPY ./alembic_tenants /app/alembic_tenants
-COPY ./alembic.ini /app/alembic.ini
-COPY supervisord.conf /usr/etc/supervisord.conf
-
-# Escape hatch
-COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
-
-# Put logo in assets
-COPY ./assets /app/assets
-
-ENV PYTHONPATH=/app
-
-# Default command which does nothing
-# This container is used by api server and background which specify their own CMD
-CMD ["tail", "-f", "/dev/null"]
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -55,6 +55,6 @@ COPY ./shared_configs /app/shared_configs
 # Model Server main code
 COPY ./model_server /app/model_server

-ENV PYTHONPATH=/app
+ENV PYTHONPATH /app

 CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic.ini
+++ b/backend/alembic.ini
@@ -1,6 +1,6 @@
 # A generic, single database configuration.

-[DEFAULT]
+[alembic]
 # path to migration scripts
 script_location = alembic

@@ -47,8 +47,7 @@ prepend_sys_path = .
 # version_path_separator = :
 # version_path_separator = ;
 # version_path_separator = space
-version_path_separator = os  
-# Use os.pathsep. Default configuration used for new projects.
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.

 # set to 'true' to search source files recursively
 # in each "version_locations" directory
@@ -107,12 +106,3 @@ formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
-
-
-[alembic]
-script_location = alembic
-version_locations = %(script_location)s/versions
-
-[schema_private]
-script_location = alembic_tenants
-version_locations = %(script_location)s/versions
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -1,52 +1,46 @@
-from typing import Any
 import asyncio
 from logging.config import fileConfig

 from alembic import context
+from danswer.db.engine import build_connection_string
+from danswer.db.models import Base
 from sqlalchemy import pool
 from sqlalchemy.engine import Connection
 from sqlalchemy.ext.asyncio import create_async_engine
-from sqlalchemy.sql import text
-
-from danswer.configs.app_configs import MULTI_TENANT
-from danswer.db.engine import build_connection_string
-from danswer.db.models import Base
 from celery.backends.database.session import ResultModelBase  # type: ignore
+from sqlalchemy.schema import SchemaItem

-# Alembic Config object
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
 config = context.config

 # Interpret the config file for Python logging.
+# This line sets up loggers basically.
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
    fileConfig(config.config_file_name)

-# Add your model's MetaData object here
+# add your model's MetaData object here
 # for 'autogenerate' support
 # from myapp import mymodel
 # target_metadata = mymodel.Base.metadata
 target_metadata = [Base.metadata, ResultModelBase.metadata]

-
-def get_schema_options() -> tuple[str, bool]:
-    x_args_raw = context.get_x_argument()
-    x_args = {}
-    for arg in x_args_raw:
-        for pair in arg.split(","):
-            if "=" in pair:
-                key, value = pair.split("=", 1)
-                x_args[key.strip()] = value.strip()
-    schema_name = x_args.get("schema", "public")
-    create_schema = x_args.get("create_schema", "true").lower() == "true"
-    return schema_name, create_schema
-
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.

 EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}


 def include_object(
-    object: Any, name: str, type_: str, reflected: bool, compare_to: Any
+    object: SchemaItem,
+    name: str,
+    type_: str,
+    reflected: bool,
+    compare_to: SchemaItem | None,
 ) -> bool:
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
@@ -55,24 +49,21 @@ def include_object(

 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.
+
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
+
    Calls to context.execute() here emit the given string to the
    script output.
-    """
-    schema_name, _ = get_schema_options()
-    url = build_connection_string()

+    """
+    url = build_connection_string()
    context.configure(
        url=url,
        target_metadata=target_metadata,  # type: ignore
        literal_binds=True,
-        include_object=include_object,
-        version_table_schema=schema_name,
-        include_schemas=True,
-        script_location=config.get_main_option("script_location"),
        dialect_opts={"paramstyle": "named"},
    )

@@ -81,37 +72,22 @@ def run_migrations_offline() -> None:


 def do_run_migrations(connection: Connection) -> None:
-    schema_name, create_schema = get_schema_options()
-
-    if MULTI_TENANT and schema_name == "public":
-        raise ValueError(
-            "Cannot run default migrations in public schema when multi-tenancy is enabled. "
-            "Please specify a tenant-specific schema."
-        )
-
-    if create_schema:
-        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
-        connection.execute(text("COMMIT"))
-
-    # Set search_path to the target schema
-    connection.execute(text(f'SET search_path TO "{schema_name}"'))
-
    context.configure(
        connection=connection,
        target_metadata=target_metadata,  # type: ignore
        include_object=include_object,
-        version_table_schema=schema_name,
-        include_schemas=True,
-        compare_type=True,
-        compare_server_default=True,
-        script_location=config.get_main_option("script_location"),
-    )
+    )  # type: ignore

    with context.begin_transaction():
        context.run_migrations()


 async def run_async_migrations() -> None:
+    """In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+
    connectable = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
@@ -124,6 +100,8 @@ async def run_async_migrations() -> None:


 def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+
    asyncio.run(run_async_migrations())


--- a/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
+++ b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
@@ -1,46 +0,0 @@
-"""fix_user__external_user_group_id_fk
-
-Revision ID: 46b7a812670f
-Revises: f32615f71aeb
-Create Date: 2024-09-23 12:58:03.894038
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "46b7a812670f"
-down_revision = "f32615f71aeb"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Drop the existing primary key
-    op.drop_constraint(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        type_="primary",
-    )
-
-    # Add the new composite primary key
-    op.create_primary_key(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        ["user_id", "external_user_group_id", "cc_pair_id"],
-    )
-
-
-def downgrade() -> None:
-    # Drop the composite primary key
-    op.drop_constraint(
-        "user__external_user_group_id_pkey",
-        "user__external_user_group_id",
-        type_="primary",
-    )
-    # Delete all entries from the table
-    op.execute("DELETE FROM user__external_user_group_id")
-
-    # Recreate the original primary key on user_id
-    op.create_primary_key(
-        "user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"]
-    )
--- a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
+++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
@@ -9,7 +9,7 @@ import json
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store

 # revision identifiers, used by Alembic.
 revision = "703313b75876"
@@ -54,7 +54,9 @@ def upgrade() -> None:
    )

    try:
-        settings_json = cast(str, get_kv_store().load("token_budget_settings"))
+        settings_json = cast(
+            str, get_dynamic_config_store().load("token_budget_settings")
+        )
        settings = json.loads(settings_json)

        is_enabled = settings.get("enable_token_budget", False)
@@ -69,7 +71,7 @@ def upgrade() -> None:
            )

        # Delete the dynamic config
-        get_kv_store().delete("token_budget_settings")
+        get_dynamic_config_store().delete("token_budget_settings")

    except Exception:
        # Ignore if the dynamic config is not found
--- a/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py
+++ b/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py
@@ -1,27 +0,0 @@
-"""add last_pruned to the connector_credential_pair table
-
-Revision ID: ac5eaac849f9
-Revises: 52a219fb5233
-Create Date: 2024-09-10 15:04:26.437118
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "ac5eaac849f9"
-down_revision = "46b7a812670f"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # last pruned represents the last time the connector was pruned
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("last_pruned", sa.DateTime(timezone=True), nullable=True),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("connector_credential_pair", "last_pruned")
--- a/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
+++ b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
@@ -20,7 +20,7 @@ depends_on: None = None
 def upgrade() -> None:
    conn = op.get_bind()
    existing_ids_and_chosen_assistants = conn.execute(
-        sa.text('select id, chosen_assistants from "user"')
+        sa.text("select id, chosen_assistants from public.user")
    )
    op.drop_column(
        "user",
@@ -37,7 +37,7 @@ def upgrade() -> None:
    for id, chosen_assistants in existing_ids_and_chosen_assistants:
        conn.execute(
            sa.text(
-                'update "user" set chosen_assistants = :chosen_assistants where id = :id'
+                "update public.user set chosen_assistants = :chosen_assistants where id = :id"
            ),
            {"chosen_assistants": json.dumps(chosen_assistants), "id": id},
        )
@@ -46,7 +46,7 @@ def upgrade() -> None:
 def downgrade() -> None:
    conn = op.get_bind()
    existing_ids_and_chosen_assistants = conn.execute(
-        sa.text('select id, chosen_assistants from "user"')
+        sa.text("select id, chosen_assistants from public.user")
    )
    op.drop_column(
        "user",
@@ -59,7 +59,7 @@ def downgrade() -> None:
    for id, chosen_assistants in existing_ids_and_chosen_assistants:
        conn.execute(
            sa.text(
-                'update "user" set chosen_assistants = :chosen_assistants where id = :id'
+                "update public.user set chosen_assistants = :chosen_assistants where id = :id"
            ),
            {"chosen_assistants": chosen_assistants, "id": id},
        )
--- a/backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py
+++ b/backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py
@@ -1,26 +0,0 @@
-"""add_deployment_name_to_llmprovider
-
-Revision ID: e4334d5b33ba
-Revises: ac5eaac849f9
-Create Date: 2024-10-04 09:52:34.896867
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "e4334d5b33ba"
-down_revision = "ac5eaac849f9"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "llm_provider", sa.Column("deployment_name", sa.String(), nullable=True)
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("llm_provider", "deployment_name")
--- a/backend/alembic_tenants/README.md
+++ b/backend/alembic_tenants/README.md
@@ -1,3 +0,0 @@
-These files are for public table migrations when operating with multi tenancy.
-
-If you are not a Danswer developer, you can ignore this directory entirely.
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -1,111 +0,0 @@
-import asyncio
-from logging.config import fileConfig
-
-from sqlalchemy import pool
-from sqlalchemy.engine import Connection
-from sqlalchemy.ext.asyncio import create_async_engine
-from sqlalchemy.schema import SchemaItem
-
-from alembic import context
-from danswer.db.engine import build_connection_string
-from danswer.db.models import PublicBase
-
-# this is the Alembic Config object, which provides
-# access to the values within the .ini file in use.
-config = context.config
-
-# Interpret the config file for Python logging.
-# This line sets up loggers basically.
-if config.config_file_name is not None and config.attributes.get(
-    "configure_logger", True
-):
-    fileConfig(config.config_file_name)
-
-# add your model's MetaData object here
-# for 'autogenerate' support
-# from myapp import mymodel
-# target_metadata = mymodel.Base.metadata
-target_metadata = [PublicBase.metadata]
-
-# other values from the config, defined by the needs of env.py,
-# can be acquired:
-# my_important_option = config.get_main_option("my_important_option")
-# ... etc.
-
-EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
-
-
-def include_object(
-    object: SchemaItem,
-    name: str,
-    type_: str,
-    reflected: bool,
-    compare_to: SchemaItem | None,
-) -> bool:
-    if type_ == "table" and name in EXCLUDE_TABLES:
-        return False
-    return True
-
-
-def run_migrations_offline() -> None:
-    """Run migrations in 'offline' mode.
-
-    This configures the context with just a URL
-    and not an Engine, though an Engine is acceptable
-    here as well.  By skipping the Engine creation
-    we don't even need a DBAPI to be available.
-
-    Calls to context.execute() here emit the given string to the
-    script output.
-
-    """
-    url = build_connection_string()
-    context.configure(
-        url=url,
-        target_metadata=target_metadata,  # type: ignore
-        literal_binds=True,
-        dialect_opts={"paramstyle": "named"},
-    )
-
-    with context.begin_transaction():
-        context.run_migrations()
-
-
-def do_run_migrations(connection: Connection) -> None:
-    context.configure(
-        connection=connection,
-        target_metadata=target_metadata,  # type: ignore
-        include_object=include_object,
-    )  # type: ignore
-
-    with context.begin_transaction():
-        context.run_migrations()
-
-
-async def run_async_migrations() -> None:
-    """In this scenario we need to create an Engine
-    and associate a connection with the context.
-
-    """
-
-    connectable = create_async_engine(
-        build_connection_string(),
-        poolclass=pool.NullPool,
-    )
-
-    async with connectable.connect() as connection:
-        await connection.run_sync(do_run_migrations)
-
-    await connectable.dispose()
-
-
-def run_migrations_online() -> None:
-    """Run migrations in 'online' mode."""
-
-    asyncio.run(run_async_migrations())
-
-
-if context.is_offline_mode():
-    run_migrations_offline()
-else:
-    run_migrations_online()
--- a/backend/alembic_tenants/script.py.mako
+++ b/backend/alembic_tenants/script.py.mako
@@ -1,24 +0,0 @@
-"""${message}
-
-Revision ID: ${up_revision}
-Revises: ${down_revision | comma,n}
-Create Date: ${create_date}
-
-"""
-from alembic import op
-import sqlalchemy as sa
-${imports if imports else ""}
-
-# revision identifiers, used by Alembic.
-revision = ${repr(up_revision)}
-down_revision = ${repr(down_revision)}
-branch_labels = ${repr(branch_labels)}
-depends_on = ${repr(depends_on)}
-
-
-def upgrade() -> None:
-    ${upgrades if upgrades else "pass"}
-
-
-def downgrade() -> None:
-    ${downgrades if downgrades else "pass"}
--- a/backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py
+++ b/backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py
@@ -1,24 +0,0 @@
-import sqlalchemy as sa
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "14a83a331951"
-down_revision = None
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "user_tenant_mapping",
-        sa.Column("email", sa.String(), nullable=False),
-        sa.Column("tenant_id", sa.String(), nullable=False),
-        sa.UniqueConstraint("email", "tenant_id", name="uq_user_tenant"),
-        sa.UniqueConstraint("email", name="uq_email"),
-        schema="public",
-    )
-
-
-def downgrade() -> None:
-    op.drop_table("user_tenant_mapping", schema="public")
--- a/backend/danswer/auth/invited_users.py
+++ b/backend/danswer/auth/invited_users.py
@@ -1,20 +1,20 @@
 from typing import cast

 from danswer.configs.constants import KV_USER_STORE_KEY
-from danswer.key_value_store.factory import get_kv_store
-from danswer.key_value_store.interface import JSON_ro
-from danswer.key_value_store.interface import KvKeyNotFoundError
+from danswer.dynamic_configs.factory import get_dynamic_config_store
+from danswer.dynamic_configs.interface import ConfigNotFoundError
+from danswer.dynamic_configs.interface import JSON_ro


 def get_invited_users() -> list[str]:
    try:
-        store = get_kv_store()
+        store = get_dynamic_config_store()
        return cast(list, store.load(KV_USER_STORE_KEY))
-    except KvKeyNotFoundError:
+    except ConfigNotFoundError:
        return list()


 def write_invited_users(emails: list[str]) -> int:
-    store = get_kv_store()
+    store = get_dynamic_config_store()
    store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails))
    return len(emails)
--- a/backend/danswer/auth/noauth_user.py
+++ b/backend/danswer/auth/noauth_user.py
@@ -4,29 +4,29 @@ from typing import cast

 from danswer.auth.schemas import UserRole
 from danswer.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY
-from danswer.key_value_store.store import KeyValueStore
-from danswer.key_value_store.store import KvKeyNotFoundError
+from danswer.dynamic_configs.store import ConfigNotFoundError
+from danswer.dynamic_configs.store import DynamicConfigStore
 from danswer.server.manage.models import UserInfo
 from danswer.server.manage.models import UserPreferences


 def set_no_auth_user_preferences(
-    store: KeyValueStore, preferences: UserPreferences
+    store: DynamicConfigStore, preferences: UserPreferences
 ) -> None:
    store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump())


-def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences:
+def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences:
    try:
        preferences_data = cast(
            Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY)
        )
        return UserPreferences(**preferences_data)
-    except KvKeyNotFoundError:
+    except ConfigNotFoundError:
        return UserPreferences(chosen_assistants=None, default_model=None)


-def fetch_no_auth_user(store: KeyValueStore) -> UserInfo:
+def fetch_no_auth_user(store: DynamicConfigStore) -> UserInfo:
    return UserInfo(
        id="__no_auth_user__",
        email="anonymous@danswer.ai",
--- a/backend/danswer/auth/schemas.py
+++ b/backend/danswer/auth/schemas.py
@@ -34,7 +34,6 @@ class UserRead(schemas.BaseUser[uuid.UUID]):
 class UserCreate(schemas.BaseUserCreate):
    role: UserRole = UserRole.BASIC
    has_web_login: bool | None = True
-    tenant_id: str | None = None


 class UserUpdate(schemas.BaseUserUpdate):
--- a/backend/danswer/auth/users.py
+++ b/backend/danswer/auth/users.py
@@ -8,7 +8,6 @@ from email.mime.text import MIMEText
 from typing import Optional
 from typing import Tuple

-import jwt
 from email_validator import EmailNotValidError
 from email_validator import validate_email
 from fastapi import APIRouter
@@ -26,14 +25,11 @@ from fastapi_users import schemas
 from fastapi_users import UUIDIDMixin
 from fastapi_users.authentication import AuthenticationBackend
 from fastapi_users.authentication import CookieTransport
-from fastapi_users.authentication import JWTStrategy
 from fastapi_users.authentication import Strategy
 from fastapi_users.authentication.strategy.db import AccessTokenDatabase
 from fastapi_users.authentication.strategy.db import DatabaseStrategy
 from fastapi_users.openapi import OpenAPIResponseType
 from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase
-from sqlalchemy import select
-from sqlalchemy.orm import attributes
 from sqlalchemy.orm import Session

 from danswer.auth.invited_users import get_invited_users
@@ -41,13 +37,9 @@ from danswer.auth.schemas import UserCreate
 from danswer.auth.schemas import UserRole
 from danswer.auth.schemas import UserUpdate
 from danswer.configs.app_configs import AUTH_TYPE
-from danswer.configs.app_configs import DATA_PLANE_SECRET
 from danswer.configs.app_configs import DISABLE_AUTH
 from danswer.configs.app_configs import EMAIL_FROM
-from danswer.configs.app_configs import EXPECTED_API_KEY
-from danswer.configs.app_configs import MULTI_TENANT
 from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION
-from danswer.configs.app_configs import SECRET_JWT_KEY
 from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
 from danswer.configs.app_configs import SMTP_PASS
 from danswer.configs.app_configs import SMTP_PORT
@@ -65,21 +57,15 @@ from danswer.db.auth import get_access_token_db
 from danswer.db.auth import get_default_admin_user_emails
 from danswer.db.auth import get_user_count
 from danswer.db.auth import get_user_db
-from danswer.db.auth import SQLAlchemyUserAdminDB
-from danswer.db.engine import get_async_session_with_tenant
 from danswer.db.engine import get_session
-from danswer.db.engine import get_session_with_tenant
 from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.models import AccessToken
-from danswer.db.models import OAuthAccount
 from danswer.db.models import User
-from danswer.db.models import UserTenantMapping
 from danswer.db.users import get_user_by_email
 from danswer.utils.logger import setup_logger
 from danswer.utils.telemetry import optional_telemetry
 from danswer.utils.telemetry import RecordType
 from danswer.utils.variable_functionality import fetch_versioned_implementation
-from shared_configs.configs import current_tenant_id

 logger = setup_logger()

@@ -147,8 +133,8 @@ def verify_email_is_invited(email: str) -> None:
    raise PermissionError("User not on allowed user whitelist")


-def verify_email_in_whitelist(email: str, tenant_id: str | None = None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+def verify_email_in_whitelist(email: str) -> None:
+    with Session(get_sqlalchemy_engine()) as db_session:
        if not get_user_by_email(email, db_session):
            verify_email_is_invited(email)

@@ -168,20 +154,6 @@ def verify_email_domain(email: str) -> None:
            )


-def get_tenant_id_for_email(email: str) -> str:
-    if not MULTI_TENANT:
-        return "public"
-    # Implement logic to get tenant_id from the mapping table
-    with Session(get_sqlalchemy_engine()) as db_session:
-        result = db_session.execute(
-            select(UserTenantMapping.tenant_id).where(UserTenantMapping.email == email)
-        )
-        tenant_id = result.scalar_one_or_none()
-    if tenant_id is None:
-        raise exceptions.UserNotExists()
-    return tenant_id
-
-
 def send_user_verification_email(
    user_email: str,
    token: str,
@@ -246,29 +218,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                raise exceptions.UserAlreadyExists()
        return user

-    async def on_after_login(
-        self,
-        user: User,
-        request: Request | None = None,
-        response: Response | None = None,
-    ) -> None:
-        if response is None or not MULTI_TENANT:
-            return
-
-        tenant_id = get_tenant_id_for_email(user.email)
-
-        tenant_token = jwt.encode(
-            {"tenant_id": tenant_id}, SECRET_JWT_KEY, algorithm="HS256"
-        )
-
-        response.set_cookie(
-            key="tenant_details",
-            value=tenant_token,
-            httponly=True,
-            secure=WEB_DOMAIN.startswith("https"),
-            samesite="lax",
-        )
-
    async def oauth_callback(
        self: "BaseUserManager[models.UOAP, models.ID]",
        oauth_name: str,
@@ -282,111 +231,45 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        associate_by_email: bool = False,
        is_verified_by_default: bool = False,
    ) -> models.UOAP:
-        # Get tenant_id from mapping table
-        try:
-            tenant_id = (
-                get_tenant_id_for_email(account_email) if MULTI_TENANT else "public"
+        verify_email_in_whitelist(account_email)
+        verify_email_domain(account_email)
+
+        user = await super().oauth_callback(  # type: ignore
+            oauth_name=oauth_name,
+            access_token=access_token,
+            account_id=account_id,
+            account_email=account_email,
+            expires_at=expires_at,
+            refresh_token=refresh_token,
+            request=request,
+            associate_by_email=associate_by_email,
+            is_verified_by_default=is_verified_by_default,
+        )
+
+        # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to
+        # re-authenticate that frequently, so by default this is disabled
+        if expires_at and TRACK_EXTERNAL_IDP_EXPIRY:
+            oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)
+            await self.user_db.update(user, update_dict={"oidc_expiry": oidc_expiry})
+
+        # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false`
+        # otherwise, the oidc expiry will always be old, and the user will never be able to login
+        if user.oidc_expiry and not TRACK_EXTERNAL_IDP_EXPIRY:
+            await self.user_db.update(user, update_dict={"oidc_expiry": None})
+
+        # Handle case where user has used product outside of web and is now creating an account through web
+        if not user.has_web_login:
+            await self.user_db.update(
+                user,
+                update_dict={
+                    "is_verified": is_verified_by_default,
+                    "has_web_login": True,
+                },
            )
-        except exceptions.UserNotExists:
-            raise HTTPException(status_code=401, detail="User not found")
+            user.is_verified = is_verified_by_default
+            user.has_web_login = True

-        if not tenant_id:
-            raise HTTPException(status_code=401, detail="User not found")
-
-        token = None
-        async with get_async_session_with_tenant(tenant_id) as db_session:
-            token = current_tenant_id.set(tenant_id)
-            # Print a list of tables in the current database session
-            verify_email_in_whitelist(account_email, tenant_id)
-            verify_email_domain(account_email)
-            if MULTI_TENANT:
-                tenant_user_db = SQLAlchemyUserAdminDB(db_session, User, OAuthAccount)
-                self.user_db = tenant_user_db
-                self.database = tenant_user_db
-
-            oauth_account_dict = {
-                "oauth_name": oauth_name,
-                "access_token": access_token,
-                "account_id": account_id,
-                "account_email": account_email,
-                "expires_at": expires_at,
-                "refresh_token": refresh_token,
-            }
-
-            try:
-                # Attempt to get user by OAuth account
-                user = await self.get_by_oauth_account(oauth_name, account_id)
-
-            except exceptions.UserNotExists:
-                try:
-                    # Attempt to get user by email
-                    user = await self.get_by_email(account_email)
-                    if not associate_by_email:
-                        raise exceptions.UserAlreadyExists()
-
-                    user = await self.user_db.add_oauth_account(
-                        user, oauth_account_dict
-                    )
-
-                    # If user not found by OAuth account or email, create a new user
-                except exceptions.UserNotExists:
-                    password = self.password_helper.generate()
-                    user_dict = {
-                        "email": account_email,
-                        "hashed_password": self.password_helper.hash(password),
-                        "is_verified": is_verified_by_default,
-                    }
-
-                    user = await self.user_db.create(user_dict)
-                    user = await self.user_db.add_oauth_account(
-                        user, oauth_account_dict
-                    )
-                    await self.on_after_register(user, request)
-
-            else:
-                for existing_oauth_account in user.oauth_accounts:
-                    if (
-                        existing_oauth_account.account_id == account_id
-                        and existing_oauth_account.oauth_name == oauth_name
-                    ):
-                        user = await self.user_db.update_oauth_account(
-                            user, existing_oauth_account, oauth_account_dict
-                        )
-
-            # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to
-            # re-authenticate that frequently, so by default this is disabled
-
-            if expires_at and TRACK_EXTERNAL_IDP_EXPIRY:
-                oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)
-                await self.user_db.update(
-                    user, update_dict={"oidc_expiry": oidc_expiry}
-                )
-
-            # Handle case where user has used product outside of web and is now creating an account through web
-            if not user.has_web_login:  # type: ignore
-                await self.user_db.update(
-                    user,
-                    {
-                        "is_verified": is_verified_by_default,
-                        "has_web_login": True,
-                    },
-                )
-                user.is_verified = is_verified_by_default
-                user.has_web_login = True  # type: ignore
-
-            # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false`
-            # otherwise, the oidc expiry will always be old, and the user will never be able to login
-            if (
-                user.oidc_expiry is not None  # type: ignore
-                and not TRACK_EXTERNAL_IDP_EXPIRY
-            ):
-                await self.user_db.update(user, {"oidc_expiry": None})
-                user.oidc_expiry = None  # type: ignore
-
-            if token:
-                current_tenant_id.reset(token)
-
-            return user
+        return user

    async def on_after_register(
        self, user: User, request: Optional[Request] = None
@@ -417,51 +300,28 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
    async def authenticate(
        self, credentials: OAuth2PasswordRequestForm
    ) -> Optional[User]:
-        email = credentials.username
-
-        # Get tenant_id from mapping table
-
-        tenant_id = get_tenant_id_for_email(email)
-        if not tenant_id:
-            # User not found in mapping
+        try:
+            user = await self.get_by_email(credentials.username)
+        except exceptions.UserNotExists:
            self.password_helper.hash(credentials.password)
            return None

-        # Create a tenant-specific session
-        async with get_async_session_with_tenant(tenant_id) as tenant_session:
-            tenant_user_db: SQLAlchemyUserDatabase = SQLAlchemyUserDatabase(
-                tenant_session, User
+        if not user.has_web_login:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
            )
-            self.user_db = tenant_user_db

-            # Proceed with authentication
-            try:
-                user = await self.get_by_email(email)
+        verified, updated_password_hash = self.password_helper.verify_and_update(
+            credentials.password, user.hashed_password
+        )
+        if not verified:
+            return None

-            except exceptions.UserNotExists:
-                self.password_helper.hash(credentials.password)
-                return None
+        if updated_password_hash is not None:
+            await self.user_db.update(user, {"hashed_password": updated_password_hash})

-            has_web_login = attributes.get_attribute(user, "has_web_login")
-
-            if not has_web_login:
-                raise HTTPException(
-                    status_code=status.HTTP_403_FORBIDDEN,
-                    detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD",
-                )
-
-            verified, updated_password_hash = self.password_helper.verify_and_update(
-                credentials.password, user.hashed_password
-            )
-            if not verified:
-                return None
-
-            if updated_password_hash is not None:
-                await self.user_db.update(
-                    user, {"hashed_password": updated_password_hash}
-                )
-
-            return user
+        return user


 async def get_user_manager(
@@ -476,26 +336,21 @@ cookie_transport = CookieTransport(
 )


-def get_jwt_strategy() -> JWTStrategy:
-    return JWTStrategy(
-        secret=USER_AUTH_SECRET,
-        lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS,
-    )
-
-
 def get_database_strategy(
    access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db),
 ) -> DatabaseStrategy:
-    return DatabaseStrategy(
+    strategy = DatabaseStrategy(
        access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS  # type: ignore
    )

+    return strategy
+

 auth_backend = AuthenticationBackend(
-    name="jwt" if MULTI_TENANT else "database",
+    name="database",
    transport=cookie_transport,
-    get_strategy=get_jwt_strategy if MULTI_TENANT else get_database_strategy,  # type: ignore
-)  # type: ignore
+    get_strategy=get_database_strategy,
+)


 class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
@@ -509,11 +364,9 @@ class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]):
        This way the login router does not need to be included
        """
        router = APIRouter()
-
        get_current_user_token = self.authenticator.current_user_token(
            active=True, verified=requires_verification
        )
-
        logout_responses: OpenAPIResponseType = {
            **{
                status.HTTP_401_UNAUTHORIZED: {
@@ -560,8 +413,8 @@ async def optional_user_(

 async def optional_user(
    request: Request,
-    db_session: Session = Depends(get_session),
    user: User | None = Depends(optional_fastapi_current_user),
+    db_session: Session = Depends(get_session),
 ) -> User | None:
    versioned_fetch_user = fetch_versioned_implementation(
        "danswer.auth.users", "optional_user_"
@@ -652,28 +505,3 @@ async def current_admin_user(user: User | None = Depends(current_user)) -> User
 def get_default_admin_user_emails_() -> list[str]:
    # No default seeding available for Danswer MIT
    return []
-
-
-async def control_plane_dep(request: Request) -> None:
-    api_key = request.headers.get("X-API-KEY")
-    if api_key != EXPECTED_API_KEY:
-        logger.warning("Invalid API key")
-        raise HTTPException(status_code=401, detail="Invalid API key")
-
-    auth_header = request.headers.get("Authorization")
-    if not auth_header or not auth_header.startswith("Bearer "):
-        logger.warning("Invalid authorization header")
-        raise HTTPException(status_code=401, detail="Invalid authorization header")
-
-    token = auth_header.split(" ")[1]
-    try:
-        payload = jwt.decode(token, DATA_PLANE_SECRET, algorithms=["HS256"])
-        if payload.get("scope") != "tenant:create":
-            logger.warning("Insufficient permissions")
-            raise HTTPException(status_code=403, detail="Insufficient permissions")
-    except jwt.ExpiredSignatureError:
-        logger.warning("Token has expired")
-        raise HTTPException(status_code=401, detail="Token has expired")
-    except jwt.InvalidTokenError:
-        logger.warning("Invalid token")
-        raise HTTPException(status_code=401, detail="Invalid token")
--- a/backend/danswer/background/celery/celery_app.py
+++ b/backend/danswer/background/celery/celery_app.py
--- a/backend/danswer/background/celery/celery_redis.py
+++ b/backend/danswer/background/celery/celery_redis.py
@@ -21,7 +21,6 @@ from danswer.db.document import (
 )
 from danswer.db.document_set import construct_document_select_by_docset
 from danswer.utils.variable_functionality import fetch_versioned_implementation
-from danswer.utils.variable_functionality import global_version


 class RedisObjectHelper(ABC):
@@ -107,7 +106,6 @@ class RedisObjectHelper(ABC):
        db_session: Session,
        redis_client: Redis,
        lock: redis.lock.Lock,
-        tenant_id: str | None,
    ) -> int | None:
        pass

@@ -123,7 +121,6 @@ class RedisDocumentSet(RedisObjectHelper):
        db_session: Session,
        redis_client: Redis,
        lock: redis.lock.Lock,
-        tenant_id: str | None,
    ) -> int | None:
        last_lock_time = time.monotonic()

@@ -148,7 +145,7 @@ class RedisDocumentSet(RedisObjectHelper):

            result = celery_app.send_task(
                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc.id),
                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=DanswerCeleryPriority.LOW,
@@ -170,15 +167,11 @@ class RedisUserGroup(RedisObjectHelper):
        db_session: Session,
        redis_client: Redis,
        lock: redis.lock.Lock,
-        tenant_id: str | None,
    ) -> int | None:
        last_lock_time = time.monotonic()

        async_results = []

-        if not global_version.is_ee_version():
-            return 0
-
        try:
            construct_document_select_by_usergroup = fetch_versioned_implementation(
                "danswer.db.user_group",
@@ -207,7 +200,7 @@ class RedisUserGroup(RedisObjectHelper):

            result = celery_app.send_task(
                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc.id),
                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=DanswerCeleryPriority.LOW,
@@ -247,7 +240,6 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
        db_session: Session,
        redis_client: Redis,
        lock: redis.lock.Lock,
-        tenant_id: str | None,
    ) -> int | None:
        last_lock_time = time.monotonic()

@@ -282,7 +274,7 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
            # Priority on sync's triggered by new indexing should be medium
            result = celery_app.send_task(
                "vespa_metadata_sync_task",
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc.id),
                queue=DanswerCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=DanswerCeleryPriority.MEDIUM,
@@ -304,7 +296,6 @@ class RedisConnectorDeletion(RedisObjectHelper):
        db_session: Session,
        redis_client: Redis,
        lock: redis.lock.Lock,
-        tenant_id: str | None,
    ) -> int | None:
        last_lock_time = time.monotonic()

@@ -341,7 +332,6 @@ class RedisConnectorDeletion(RedisObjectHelper):
                    document_id=doc.id,
                    connector_id=cc_pair.connector_id,
                    credential_id=cc_pair.credential_id,
-                    tenant_id=tenant_id,
                ),
                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
                task_id=custom_task_id,
@@ -353,127 +343,6 @@ class RedisConnectorDeletion(RedisObjectHelper):
        return len(async_results)


-class RedisConnectorPruning(RedisObjectHelper):
-    """Celery will kick off a long running generator task to crawl the connector and
-    find any missing docs, which will each then get a new cleanup task. The progress of
-    those tasks will then be monitored to completion.
-
-    Example rough happy path order:
-    Check connectorpruning_fence_1
-    Send generator task with id connectorpruning+generator_1_{uuid}
-
-    generator runs connector with callbacks that increment connectorpruning_generator_progress_1
-    generator creates many subtasks with id connectorpruning+sub_1_{uuid}
-      in taskset connectorpruning_taskset_1
-    on completion, generator sets connectorpruning_generator_complete_1
-
-    celery postrun removes subtasks from taskset
-    monitor beat task cleans up when taskset reaches 0 items
-    """
-
-    PREFIX = "connectorpruning"
-    FENCE_PREFIX = PREFIX + "_fence"  # a fence for the entire pruning process
-    GENERATOR_TASK_PREFIX = PREFIX + "+generator"
-
-    TASKSET_PREFIX = PREFIX + "_taskset"  # stores a list of prune tasks id's
-    SUBTASK_PREFIX = PREFIX + "+sub"
-
-    GENERATOR_PROGRESS_PREFIX = (
-        PREFIX + "_generator_progress"
-    )  # a signal that contains generator progress
-    GENERATOR_COMPLETE_PREFIX = (
-        PREFIX + "_generator_complete"
-    )  # a signal that the generator has finished
-
-    def __init__(self, id: int) -> None:
-        """id: the cc_pair_id of the connector credential pair"""
-
-        super().__init__(id)
-        self.documents_to_prune: set[str] = set()
-
-    @property
-    def generator_task_id_prefix(self) -> str:
-        return f"{self.GENERATOR_TASK_PREFIX}_{self._id}"
-
-    @property
-    def generator_progress_key(self) -> str:
-        # example: connectorpruning_generator_progress_1
-        return f"{self.GENERATOR_PROGRESS_PREFIX}_{self._id}"
-
-    @property
-    def generator_complete_key(self) -> str:
-        # example: connectorpruning_generator_complete_1
-        return f"{self.GENERATOR_COMPLETE_PREFIX}_{self._id}"
-
-    @property
-    def subtask_id_prefix(self) -> str:
-        return f"{self.SUBTASK_PREFIX}_{self._id}"
-
-    def generate_tasks(
-        self,
-        celery_app: Celery,
-        db_session: Session,
-        redis_client: Redis,
-        lock: redis.lock.Lock | None,
-        tenant_id: str | None,
-    ) -> int | None:
-        last_lock_time = time.monotonic()
-
-        async_results = []
-        cc_pair = get_connector_credential_pair_from_id(self._id, db_session)
-        if not cc_pair:
-            return None
-
-        for doc_id in self.documents_to_prune:
-            current_time = time.monotonic()
-            if lock and current_time - last_lock_time >= (
-                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
-            ):
-                lock.reacquire()
-                last_lock_time = current_time
-
-            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            # we prefix the task id so it's easier to keep track of who created the task
-            # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac"
-            custom_task_id = f"{self.subtask_id_prefix}_{uuid4()}"
-
-            # add to the tracking taskset in redis BEFORE creating the celery task.
-            # note that for the moment we are using a single taskset key, not differentiated by cc_pair id
-            redis_client.sadd(self.taskset_key, custom_task_id)
-
-            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
-                "document_by_cc_pair_cleanup_task",
-                kwargs=dict(
-                    document_id=doc_id,
-                    connector_id=cc_pair.connector_id,
-                    credential_id=cc_pair.credential_id,
-                    tenant_id=tenant_id,
-                ),
-                queue=DanswerCeleryQueues.CONNECTOR_DELETION,
-                task_id=custom_task_id,
-                priority=DanswerCeleryPriority.MEDIUM,
-            )
-
-            async_results.append(result)
-
-        return len(async_results)
-
-    def is_pruning(self, db_session: Session, redis_client: Redis) -> bool:
-        """A single example of a helper method being refactored into the redis helper"""
-        cc_pair = get_connector_credential_pair_from_id(
-            cc_pair_id=self._id, db_session=db_session
-        )
-        if not cc_pair:
-            raise ValueError(f"cc_pair_id {self._id} does not exist.")
-
-        if redis_client.exists(self.fence_key):
-            return True
-
-        return False
-
-
 def celery_get_queue_length(queue: str, r: Redis) -> int:
    """This is a redis specific way to get the length of a celery queue.
    It is priority aware and knows how to count across the multiple redis lists
--- a/backend/danswer/background/celery/celery_utils.py
+++ b/backend/danswer/background/celery/celery_utils.py
@@ -1,11 +1,11 @@
-from collections.abc import Callable
 from datetime import datetime
 from datetime import timezone
-from typing import Any

 from sqlalchemy.orm import Session

 from danswer.background.celery.celery_redis import RedisConnectorDeletion
+from danswer.background.task_utils import name_cc_prune_task
+from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
 from danswer.connectors.cross_connector_utils.rate_limit_wrapper import (
    rate_limit_builder,
@@ -16,14 +16,20 @@ from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.models import Document
 from danswer.db.connector_credential_pair import get_connector_credential_pair
+from danswer.db.engine import get_db_current_time
 from danswer.db.enums import TaskStatus
+from danswer.db.models import Connector
+from danswer.db.models import Credential
 from danswer.db.models import TaskQueueState
-from danswer.redis.redis_pool import get_redis_client
+from danswer.db.tasks import check_task_is_live_and_not_timed_out
+from danswer.db.tasks import get_latest_task
+from danswer.db.tasks import get_latest_task_by_type
+from danswer.redis.redis_pool import RedisPool
 from danswer.server.documents.models import DeletionAttemptSnapshot
 from danswer.utils.logger import setup_logger

-
 logger = setup_logger()
+redis_pool = RedisPool()


 def _get_deletion_status(
@@ -40,7 +46,7 @@ def _get_deletion_status(

    rcd = RedisConnectorDeletion(cc_pair.id)

-    r = get_redis_client()
+    r = redis_pool.get_client()
    if not r.exists(rcd.fence_key):
        return None

@@ -63,19 +69,53 @@ def get_deletion_attempt_snapshot(
    )


+def should_prune_cc_pair(
+    connector: Connector, credential: Credential, db_session: Session
+) -> bool:
+    if not connector.prune_freq:
+        return False
+
+    pruning_task_name = name_cc_prune_task(
+        connector_id=connector.id, credential_id=credential.id
+    )
+    last_pruning_task = get_latest_task(pruning_task_name, db_session)
+    current_db_time = get_db_current_time(db_session)
+
+    if not last_pruning_task:
+        time_since_initialization = current_db_time - connector.time_created
+        if time_since_initialization.total_seconds() >= connector.prune_freq:
+            return True
+        return False
+
+    if not ALLOW_SIMULTANEOUS_PRUNING:
+        pruning_type_task_name = name_cc_prune_task()
+        last_pruning_type_task = get_latest_task_by_type(
+            pruning_type_task_name, db_session
+        )
+
+        if last_pruning_type_task and check_task_is_live_and_not_timed_out(
+            last_pruning_type_task, db_session
+        ):
+            return False
+
+    if check_task_is_live_and_not_timed_out(last_pruning_task, db_session):
+        return False
+
+    if not last_pruning_task.start_time:
+        return False
+
+    time_since_last_pruning = current_db_time - last_pruning_task.start_time
+    return time_since_last_pruning.total_seconds() >= connector.prune_freq
+
+
 def document_batch_to_ids(doc_batch: list[Document]) -> set[str]:
    return {doc.id for doc in doc_batch}


-def extract_ids_from_runnable_connector(
-    runnable_connector: BaseConnector,
-    progress_callback: Callable[[int], None] | None = None,
-) -> set[str]:
+def extract_ids_from_runnable_connector(runnable_connector: BaseConnector) -> set[str]:
    """
    If the PruneConnector hasnt been implemented for the given connector, just pull
-    all docs using the load_from_state and grab out the IDs.
-
-    Optionally, a callback can be passed to handle the length of each document batch.
+    all docs using the load_from_state and grab out the IDs
    """
    all_connector_doc_ids: set[str] = set()

@@ -98,36 +138,6 @@ def extract_ids_from_runnable_connector(
                max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
            )(document_batch_to_ids)
        for doc_batch in doc_batch_generator:
-            if progress_callback:
-                progress_callback(len(doc_batch))
            all_connector_doc_ids.update(doc_batch_processing_func(doc_batch))

    return all_connector_doc_ids
-
-
-def celery_is_listening_to_queue(worker: Any, name: str) -> bool:
-    """Checks to see if we're listening to the named queue"""
-
-    # how to get a list of queues this worker is listening to
-    # https://stackoverflow.com/questions/29790523/how-to-determine-which-queues-a-celery-worker-is-consuming-at-runtime
-    queue_names = list(worker.app.amqp.queues.consume_from.keys())
-    for queue_name in queue_names:
-        if queue_name == name:
-            return True
-
-    return False
-
-
-def celery_is_worker_primary(worker: Any) -> bool:
-    """There are multiple approaches that could be taken to determine if a celery worker
-    is 'primary', as defined by us. But the way we do it is to check the hostname set
-    for the celery worker, which can be done either in celeryconfig.py or on the
-    command line with '--hostname'."""
-    hostname = worker.hostname
-    if hostname.startswith("light"):
-        return False
-
-    if hostname.startswith("heavy"):
-        return False
-
-    return True
--- a/backend/danswer/background/celery/celeryconfig.py
+++ b/backend/danswer/background/celery/celeryconfig.py
@@ -1,11 +1,7 @@
 # docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html
-import urllib.parse
-
-from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT
 from danswer.configs.app_configs import CELERY_RESULT_EXPIRES
 from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY
 from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND
-from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
 from danswer.configs.app_configs import REDIS_HOST
 from danswer.configs.app_configs import REDIS_PASSWORD
 from danswer.configs.app_configs import REDIS_PORT
@@ -13,13 +9,12 @@ from danswer.configs.app_configs import REDIS_SSL
 from danswer.configs.app_configs import REDIS_SSL_CA_CERTS
 from danswer.configs.app_configs import REDIS_SSL_CERT_REQS
 from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS

 CELERY_SEPARATOR = ":"

 CELERY_PASSWORD_PART = ""
 if REDIS_PASSWORD:
-    CELERY_PASSWORD_PART = ":" + urllib.parse.quote(REDIS_PASSWORD, safe="") + "@"
+    CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@"

 REDIS_SCHEME = "redis"

@@ -41,30 +36,12 @@ result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PO
 # can stall other tasks.
 worker_prefetch_multiplier = 4

-broker_connection_retry_on_startup = True
-broker_pool_limit = CELERY_BROKER_POOL_LIMIT
-
-# redis broker settings
-# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html
 broker_transport_options = {
    "priority_steps": list(range(len(DanswerCeleryPriority))),
    "sep": CELERY_SEPARATOR,
    "queue_order_strategy": "priority",
-    "retry_on_timeout": True,
-    "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
-    "socket_keepalive": True,
-    "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS,
 }

-# redis backend settings
-# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
-
-# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend
-redis_socket_keepalive = True
-redis_retry_on_timeout = True
-redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL
-
-
 task_default_priority = DanswerCeleryPriority.MEDIUM
 task_acks_late = True

--- a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py
@@ -1,113 +0,0 @@
-import redis
-from celery import shared_task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-from sqlalchemy.orm.exc import ObjectDeletedError
-
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.enums import ConnectorCredentialPairStatus
-from danswer.db.models import ConnectorCredentialPair
-from danswer.redis.redis_pool import get_redis_client
-
-
-@shared_task(
-    name="check_for_connector_deletion_task",
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-)
-def check_for_connector_deletion_task(tenant_id: str | None) -> None:
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with Session(get_sqlalchemy_engine()) as db_session:
-            cc_pairs = get_connector_credential_pairs(db_session)
-            for cc_pair in cc_pairs:
-                try_generate_document_cc_pair_cleanup_tasks(
-                    cc_pair, db_session, r, lock_beat, tenant_id
-                )
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def try_generate_document_cc_pair_cleanup_tasks(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-    tenant_id: str | None,
-) -> int | None:
-    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
-    Note that syncing can still be required even if the number of sync tasks generated is zero.
-    Returns None if no syncing is required.
-    """
-
-    lock_beat.reacquire()
-
-    rcd = RedisConnectorDeletion(cc_pair.id)
-
-    # don't generate sync tasks if tasks are still pending
-    if r.exists(rcd.fence_key):
-        return None
-
-    # we need to refresh the state of the object inside the fence
-    # to avoid a race condition with db.commit/fence deletion
-    # at the end of this taskset
-    try:
-        db_session.refresh(cc_pair)
-    except ObjectDeletedError:
-        return None
-
-    if cc_pair.status != ConnectorCredentialPairStatus.DELETING:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rcd.taskset_key)
-
-    # Add all documents that need to be updated into the queue
-    task_logger.info(
-        f"RedisConnectorDeletion.generate_tasks starting. cc_pair_id={cc_pair.id}"
-    )
-    tasks_generated = rcd.generate_tasks(
-        celery_app, db_session, r, lock_beat, tenant_id
-    )
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisConnectorDeletion.generate_tasks finished. "
-        f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rcd.fence_key, tasks_generated)
-    return tasks_generated
--- a/backend/danswer/background/celery/tasks/periodic/tasks.py
+++ b/backend/danswer/background/celery/tasks/periodic/tasks.py
@@ -1,137 +0,0 @@
-#####
-# Periodic Tasks
-#####
-import json
-from typing import Any
-
-from celery import shared_task
-from celery.contrib.abortable import AbortableTask  # type: ignore
-from celery.exceptions import TaskRevokedError
-from sqlalchemy import inspect
-from sqlalchemy import text
-from sqlalchemy.orm import Session
-
-from danswer.background.celery.celery_app import task_logger
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import PostgresAdvisoryLocks
-from danswer.db.engine import get_sqlalchemy_engine  # type: ignore
-
-
-@shared_task(
-    name="kombu_message_cleanup_task",
-    soft_time_limit=JOB_TIMEOUT,
-    bind=True,
-    base=AbortableTask,
-)
-def kombu_message_cleanup_task(self: Any) -> int:
-    """Runs periodically to clean up the kombu_message table"""
-
-    # we will select messages older than this amount to clean up
-    KOMBU_MESSAGE_CLEANUP_AGE = 7  # days
-    KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000
-
-    ctx = {}
-    ctx["last_processed_id"] = 0
-    ctx["deleted"] = 0
-    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
-    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
-    with Session(get_sqlalchemy_engine()) as db_session:
-        # Exit the task if we can't take the advisory lock
-        result = db_session.execute(
-            text("SELECT pg_try_advisory_lock(:id)"),
-            {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value},
-        ).scalar()
-        if not result:
-            return 0
-
-        while True:
-            if self.is_aborted():
-                raise TaskRevokedError("kombu_message_cleanup_task was aborted.")
-
-            b = kombu_message_cleanup_task_helper(ctx, db_session)
-            if not b:
-                break
-
-            db_session.commit()
-
-    if ctx["deleted"] > 0:
-        task_logger.info(
-            f"Deleted {ctx['deleted']} orphaned messages from kombu_message."
-        )
-
-    return ctx["deleted"]
-
-
-def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:
-    """
-    Helper function to clean up old messages from the `kombu_message` table that are no longer relevant.
-
-    This function retrieves messages from the `kombu_message` table that are no longer visible and
-    older than a specified interval. It checks if the corresponding task_id exists in the
-    `celery_taskmeta` table. If the task_id does not exist, the message is deleted.
-
-    Args:
-        ctx (dict): A context dictionary containing configuration parameters such as:
-            - 'cleanup_age' (int): The age in days after which messages are considered old.
-            - 'page_limit' (int): The maximum number of messages to process in one batch.
-            - 'last_processed_id' (int): The ID of the last processed message to handle pagination.
-            - 'deleted' (int): A counter to track the number of deleted messages.
-        db_session (Session): The SQLAlchemy database session for executing queries.
-
-    Returns:
-        bool: Returns True if there are more rows to process, False if not.
-    """
-
-    inspector = inspect(db_session.bind)
-    if not inspector:
-        return False
-
-    # With the move to redis as celery's broker and backend, kombu tables may not even exist.
-    # We can fail silently.
-    if not inspector.has_table("kombu_message"):
-        return False
-
-    query = text(
-        """
-    SELECT id, timestamp, payload
-    FROM kombu_message WHERE visible = 'false'
-    AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days
-    AND id > :last_processed_id
-    ORDER BY id
-    LIMIT :page_limit
-"""
-    )
-    kombu_messages = db_session.execute(
-        query,
-        {
-            "interval_days": f"{ctx['cleanup_age']} days",
-            "page_limit": ctx["page_limit"],
-            "last_processed_id": ctx["last_processed_id"],
-        },
-    ).fetchall()
-
-    if len(kombu_messages) == 0:
-        return False
-
-    for msg in kombu_messages:
-        payload = json.loads(msg[2])
-        task_id = payload["headers"]["id"]
-
-        # Check if task_id exists in celery_taskmeta
-        task_exists = db_session.execute(
-            text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"),
-            {"task_id": task_id},
-        ).fetchone()
-
-        # If task_id does not exist, delete the message
-        if not task_exists:
-            result = db_session.execute(
-                text("DELETE FROM kombu_message WHERE id = :message_id"),
-                {"message_id": msg[0]},
-            )
-            if result.rowcount > 0:  # type: ignore
-                ctx["deleted"] += 1
-
-        ctx["last_processed_id"] = msg[0]
-
-    return True
--- a/backend/danswer/background/celery/tasks/pruning/tasks.py
+++ b/backend/danswer/background/celery/tasks/pruning/tasks.py
@@ -1,251 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-from uuid import uuid4
-
-import redis
-from celery import shared_task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector
-from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import DanswerCeleryQueues
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.connectors.factory import instantiate_connector
-from danswer.connectors.models import InputType
-from danswer.db.connector_credential_pair import get_connector_credential_pair
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.document import get_documents_for_connector_credential_pair
-from danswer.db.engine import get_session_with_tenant
-from danswer.db.enums import ConnectorCredentialPairStatus
-from danswer.db.models import ConnectorCredentialPair
-from danswer.redis.redis_pool import get_redis_client
-from danswer.utils.logger import setup_logger
-
-
-logger = setup_logger()
-
-
-@shared_task(
-    name="check_for_prune_task_2",
-    soft_time_limit=JOB_TIMEOUT,
-)
-def check_for_prune_task_2(tenant_id: str | None) -> None:
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_PRUNE_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with get_session_with_tenant(tenant_id) as db_session:
-            cc_pairs = get_connector_credential_pairs(db_session)
-            for cc_pair in cc_pairs:
-                tasks_created = ccpair_pruning_generator_task_creation_helper(
-                    cc_pair, db_session, tenant_id, r, lock_beat
-                )
-                if not tasks_created:
-                    continue
-
-                task_logger.info(f"Pruning started: cc_pair_id={cc_pair.id}")
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def ccpair_pruning_generator_task_creation_helper(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    tenant_id: str | None,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-) -> int | None:
-    """Returns an int if pruning is triggered.
-    The int represents the number of prune tasks generated (in this case, only one
-    because the task is a long running generator task.)
-    Returns None if no pruning is triggered (due to not being needed or
-    other reasons such as simultaneous pruning restrictions.
-
-    Checks for scheduling related conditions, then delegates the rest of the checks to
-    try_creating_prune_generator_task.
-    """
-
-    lock_beat.reacquire()
-
-    # skip pruning if no prune frequency is set
-    # pruning can still be forced via the API which will run a pruning task directly
-    if not cc_pair.connector.prune_freq:
-        return None
-
-    # skip pruning if the next scheduled prune time hasn't been reached yet
-    last_pruned = cc_pair.last_pruned
-    if not last_pruned:
-        # if never pruned, use the connector time created as the last_pruned time
-        last_pruned = cc_pair.connector.time_created
-
-    next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq)
-    if datetime.now(timezone.utc) < next_prune:
-        return None
-
-    return try_creating_prune_generator_task(cc_pair, db_session, r, tenant_id)
-
-
-def try_creating_prune_generator_task(
-    cc_pair: ConnectorCredentialPair,
-    db_session: Session,
-    r: Redis,
-    tenant_id: str | None,
-) -> int | None:
-    """Checks for any conditions that should block the pruning generator task from being
-    created, then creates the task.
-
-    Does not check for scheduling related conditions as this function
-    is used to trigger prunes immediately.
-    """
-
-    if not ALLOW_SIMULTANEOUS_PRUNING:
-        for key in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
-            return None
-
-    rcp = RedisConnectorPruning(cc_pair.id)
-
-    # skip pruning if already pruning
-    if r.exists(rcp.fence_key):
-        return None
-
-    # skip pruning if the cc_pair is deleting
-    db_session.refresh(cc_pair)
-    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
-        return None
-
-    # add a long running generator task to the queue
-    r.delete(rcp.generator_complete_key)
-    r.delete(rcp.taskset_key)
-
-    custom_task_id = f"{rcp.generator_task_id_prefix}_{uuid4()}"
-
-    celery_app.send_task(
-        "connector_pruning_generator_task",
-        kwargs=dict(
-            connector_id=cc_pair.connector_id,
-            credential_id=cc_pair.credential_id,
-            tenant_id=tenant_id,
-        ),
-        queue=DanswerCeleryQueues.CONNECTOR_PRUNING,
-        task_id=custom_task_id,
-        priority=DanswerCeleryPriority.LOW,
-    )
-
-    # set this only after all tasks have been added
-    r.set(rcp.fence_key, 1)
-    return 1
-
-
-@shared_task(name="connector_pruning_generator_task", soft_time_limit=JOB_TIMEOUT)
-def connector_pruning_generator_task(
-    connector_id: int, credential_id: int, tenant_id: str | None
-) -> None:
-    """connector pruning task. For a cc pair, this task pulls all document IDs from the source
-    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
-    from the most recently pulled document ID list"""
-
-    r = get_redis_client()
-
-    with get_session_with_tenant(tenant_id) as db_session:
-        try:
-            cc_pair = get_connector_credential_pair(
-                db_session=db_session,
-                connector_id=connector_id,
-                credential_id=credential_id,
-            )
-
-            if not cc_pair:
-                task_logger.warning(
-                    f"ccpair not found for {connector_id} {credential_id}"
-                )
-                return
-
-            rcp = RedisConnectorPruning(cc_pair.id)
-
-            # Define the callback function
-            def redis_increment_callback(amount: int) -> None:
-                r.incrby(rcp.generator_progress_key, amount)
-
-            runnable_connector = instantiate_connector(
-                db_session,
-                cc_pair.connector.source,
-                InputType.PRUNE,
-                cc_pair.connector.connector_specific_config,
-                cc_pair.credential,
-            )
-
-            # a list of docs in the source
-            all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector(
-                runnable_connector, redis_increment_callback
-            )
-
-            # a list of docs in our local index
-            all_indexed_document_ids = {
-                doc.id
-                for doc in get_documents_for_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=connector_id,
-                    credential_id=credential_id,
-                )
-            }
-
-            # generate list of docs to remove (no longer in the source)
-            doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
-
-            task_logger.info(
-                f"Pruning set collected: "
-                f"cc_pair_id={cc_pair.id} "
-                f"docs_to_remove={len(doc_ids_to_remove)} "
-                f"doc_source={cc_pair.connector.source}"
-            )
-
-            rcp.documents_to_prune = set(doc_ids_to_remove)
-
-            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks starting. cc_pair_id={cc_pair.id}"
-            )
-            tasks_generated = rcp.generate_tasks(
-                celery_app, db_session, r, None, tenant_id
-            )
-            if tasks_generated is None:
-                return None
-
-            task_logger.info(
-                f"RedisConnectorPruning.generate_tasks finished. "
-                f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-            )
-
-            r.set(rcp.generator_complete_key, tasks_generated)
-        except Exception as e:
-            task_logger.exception(
-                f"Failed to run pruning for connector id {connector_id}."
-            )
-
-            r.delete(rcp.generator_progress_key)
-            r.delete(rcp.taskset_key)
-            r.delete(rcp.fence_key)
-            raise e
--- a/backend/danswer/background/celery/tasks/shared/tasks.py
+++ b/backend/danswer/background/celery/tasks/shared/tasks.py
@@ -1,126 +0,0 @@
-from celery import shared_task
-from celery import Task
-from celery.exceptions import SoftTimeLimitExceeded
-
-from danswer.access.access import get_access_for_document
-from danswer.background.celery.celery_app import task_logger
-from danswer.db.document import delete_document_by_connector_credential_pair__no_commit
-from danswer.db.document import delete_documents_complete__no_commit
-from danswer.db.document import get_document
-from danswer.db.document import get_document_connector_count
-from danswer.db.document import mark_document_as_synced
-from danswer.db.document_set import fetch_document_sets_for_document
-from danswer.db.engine import get_session_with_tenant
-from danswer.document_index.document_index_utils import get_both_index_names
-from danswer.document_index.factory import get_default_document_index
-from danswer.document_index.interfaces import VespaDocumentFields
-from danswer.server.documents.models import ConnectorCredentialPairIdentifier
-
-
-@shared_task(
-    name="document_by_cc_pair_cleanup_task",
-    bind=True,
-    soft_time_limit=45,
-    time_limit=60,
-    max_retries=3,
-)
-def document_by_cc_pair_cleanup_task(
-    self: Task,
-    document_id: str,
-    connector_id: int,
-    credential_id: int,
-    tenant_id: str | None,
-) -> bool:
-    """A lightweight subtask used to clean up document to cc pair relationships.
-    Created by connection deletion and connector pruning parent tasks."""
-
-    """
-    To delete a connector / credential pair:
-    (1) find all documents associated with connector / credential pair where there
-    this the is only connector / credential pair that has indexed it
-    (2) delete all documents from document stores
-    (3) delete all entries from postgres
-    (4) find all documents associated with connector / credential pair where there
-    are multiple connector / credential pairs that have indexed it
-    (5) update document store entries to remove access associated with the
-    connector / credential pair from the access list
-    (6) delete all relevant entries from postgres
-    """
-    try:
-        with get_session_with_tenant(tenant_id) as db_session:
-            action = "skip"
-            chunks_affected = 0
-
-            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
-            document_index = get_default_document_index(
-                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
-            )
-
-            count = get_document_connector_count(db_session, document_id)
-            if count == 1:
-                # count == 1 means this is the only remaining cc_pair reference to the doc
-                # delete it from vespa and the db
-                action = "delete"
-
-                chunks_affected = document_index.delete_single(document_id)
-                delete_documents_complete__no_commit(
-                    db_session=db_session,
-                    document_ids=[document_id],
-                )
-            elif count > 1:
-                action = "update"
-
-                # count > 1 means the document still has cc_pair references
-                doc = get_document(document_id, db_session)
-                if not doc:
-                    return False
-
-                # the below functions do not include cc_pairs being deleted.
-                # i.e. they will correctly omit access for the current cc_pair
-                doc_access = get_access_for_document(
-                    document_id=document_id, db_session=db_session
-                )
-
-                doc_sets = fetch_document_sets_for_document(document_id, db_session)
-                update_doc_sets: set[str] = set(doc_sets)
-
-                fields = VespaDocumentFields(
-                    document_sets=update_doc_sets,
-                    access=doc_access,
-                    boost=doc.boost,
-                    hidden=doc.hidden,
-                )
-
-                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                chunks_affected = document_index.update_single(
-                    document_id, fields=fields
-                )
-
-                # there are still other cc_pair references to the doc, so just resync to Vespa
-                delete_document_by_connector_credential_pair__no_commit(
-                    db_session=db_session,
-                    document_id=document_id,
-                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
-                        connector_id=connector_id,
-                        credential_id=credential_id,
-                    ),
-                )
-
-                mark_document_as_synced(document_id, db_session)
-            else:
-                pass
-
-            task_logger.info(
-                f"document_id={document_id} refcount={count} action={action} chunks={chunks_affected}"
-            )
-            db_session.commit()
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}")
-    except Exception as e:
-        task_logger.exception("Unexpected exception")
-
-        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-        countdown = 2 ** (self.request.retries + 4)
-        self.retry(exc=e, countdown=countdown)
-
-    return True
--- a/backend/danswer/background/celery/tasks/vespa/tasks.py
+++ b/backend/danswer/background/celery/tasks/vespa/tasks.py
@@ -1,621 +0,0 @@
-import traceback
-from typing import cast
-
-import redis
-from celery import shared_task
-from celery import Task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis import Redis
-from sqlalchemy.orm import Session
-
-from danswer.access.access import get_access_for_document
-from danswer.background.celery.celery_app import celery_app
-from danswer.background.celery.celery_app import task_logger
-from danswer.background.celery.celery_redis import celery_get_queue_length
-from danswer.background.celery.celery_redis import RedisConnectorCredentialPair
-from danswer.background.celery.celery_redis import RedisConnectorDeletion
-from danswer.background.celery.celery_redis import RedisConnectorPruning
-from danswer.background.celery.celery_redis import RedisDocumentSet
-from danswer.background.celery.celery_redis import RedisUserGroup
-from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from danswer.configs.constants import DanswerCeleryQueues
-from danswer.configs.constants import DanswerRedisLocks
-from danswer.db.connector import fetch_connector_by_id
-from danswer.db.connector import mark_ccpair_as_pruned
-from danswer.db.connector_credential_pair import add_deletion_failure_message
-from danswer.db.connector_credential_pair import (
-    delete_connector_credential_pair__no_commit,
-)
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
-from danswer.db.connector_credential_pair import get_connector_credential_pairs
-from danswer.db.document import count_documents_by_needs_sync
-from danswer.db.document import get_document
-from danswer.db.document import mark_document_as_synced
-from danswer.db.document_set import delete_document_set
-from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit
-from danswer.db.document_set import fetch_document_sets
-from danswer.db.document_set import fetch_document_sets_for_document
-from danswer.db.document_set import get_document_set_by_id
-from danswer.db.document_set import mark_document_set_as_synced
-from danswer.db.engine import get_session_with_tenant
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.index_attempt import delete_index_attempts
-from danswer.db.models import DocumentSet
-from danswer.db.models import UserGroup
-from danswer.document_index.document_index_utils import get_both_index_names
-from danswer.document_index.factory import get_default_document_index
-from danswer.document_index.interfaces import UpdateRequest
-from danswer.redis.redis_pool import get_redis_client
-from danswer.utils.variable_functionality import fetch_versioned_implementation
-from danswer.utils.variable_functionality import (
-    fetch_versioned_implementation_with_fallback,
-)
-from danswer.utils.variable_functionality import global_version
-from danswer.utils.variable_functionality import noop_fallback
-
-
-# celery auto associates tasks created inside another task,
-# which bloats the result metadata considerably. trail=False prevents this.
-@shared_task(
-    name="check_for_vespa_sync_task",
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-)
-def check_for_vespa_sync_task(tenant_id: str | None) -> None:
-    """Runs periodically to check if any document needs syncing.
-    Generates sets of tasks for Celery if syncing is needed."""
-
-    r = get_redis_client()
-
-    lock_beat = r.lock(
-        DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # these tasks should never overlap
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        with get_session_with_tenant(tenant_id) as db_session:
-            try_generate_stale_document_sync_tasks(db_session, r, lock_beat, tenant_id)
-
-            # check if any document sets are not synced
-            document_set_info = fetch_document_sets(
-                user_id=None, db_session=db_session, include_outdated=True
-            )
-            for document_set, _ in document_set_info:
-                try_generate_document_set_sync_tasks(
-                    document_set, db_session, r, lock_beat, tenant_id
-                )
-
-            # check if any user groups are not synced
-            if global_version.is_ee_version():
-                try:
-                    fetch_user_groups = fetch_versioned_implementation(
-                        "danswer.db.user_group", "fetch_user_groups"
-                    )
-
-                    user_groups = fetch_user_groups(
-                        db_session=db_session, only_up_to_date=False
-                    )
-                    for usergroup in user_groups:
-                        try_generate_user_group_sync_tasks(
-                            usergroup, db_session, r, lock_beat, tenant_id
-                        )
-                except ModuleNotFoundError:
-                    # Always exceptions on the MIT version, which is expected
-                    # We shouldn't actually get here if the ee version check works
-                    pass
-
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    except Exception:
-        task_logger.exception("Unexpected exception")
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-def try_generate_stale_document_sync_tasks(
-    db_session: Session, r: Redis, lock_beat: redis.lock.Lock, tenant_id: str | None
-) -> int | None:
-    # the fence is up, do nothing
-    if r.exists(RedisConnectorCredentialPair.get_fence_key()):
-        return None
-
-    r.delete(RedisConnectorCredentialPair.get_taskset_key())  # delete the taskset
-
-    # add tasks to celery and build up the task set to monitor in redis
-    stale_doc_count = count_documents_by_needs_sync(db_session)
-    if stale_doc_count == 0:
-        return None
-
-    task_logger.info(
-        f"Stale documents found (at least {stale_doc_count}). Generating sync tasks by cc pair."
-    )
-
-    task_logger.info("RedisConnector.generate_tasks starting by cc_pair.")
-
-    # rkuo: we could technically sync all stale docs in one big pass.
-    # but I feel it's more understandable to group the docs by cc_pair
-    total_tasks_generated = 0
-    cc_pairs = get_connector_credential_pairs(db_session)
-    for cc_pair in cc_pairs:
-        rc = RedisConnectorCredentialPair(cc_pair.id)
-        tasks_generated = rc.generate_tasks(
-            celery_app, db_session, r, lock_beat, tenant_id
-        )
-
-        if tasks_generated is None:
-            continue
-
-        if tasks_generated == 0:
-            continue
-
-        task_logger.info(
-            f"RedisConnector.generate_tasks finished for single cc_pair. "
-            f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}"
-        )
-
-        total_tasks_generated += tasks_generated
-
-    task_logger.info(
-        f"RedisConnector.generate_tasks finished for all cc_pairs. total_tasks_generated={total_tasks_generated}"
-    )
-
-    r.set(RedisConnectorCredentialPair.get_fence_key(), total_tasks_generated)
-    return total_tasks_generated
-
-
-def try_generate_document_set_sync_tasks(
-    document_set: DocumentSet,
-    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-    tenant_id: str | None,
-) -> int | None:
-    lock_beat.reacquire()
-
-    rds = RedisDocumentSet(document_set.id)
-
-    # don't generate document set sync tasks if tasks are still pending
-    if r.exists(rds.fence_key):
-        return None
-
-    # don't generate sync tasks if we're up to date
-    # race condition with the monitor/cleanup function if we use a cached result!
-    db_session.refresh(document_set)
-    if document_set.is_up_to_date:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rds.taskset_key)
-
-    task_logger.info(
-        f"RedisDocumentSet.generate_tasks starting. document_set_id={document_set.id}"
-    )
-
-    # Add all documents that need to be updated into the queue
-    tasks_generated = rds.generate_tasks(
-        celery_app, db_session, r, lock_beat, tenant_id
-    )
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisDocumentSet.generate_tasks finished. "
-        f"document_set_id={document_set.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rds.fence_key, tasks_generated)
-    return tasks_generated
-
-
-def try_generate_user_group_sync_tasks(
-    usergroup: UserGroup,
-    db_session: Session,
-    r: Redis,
-    lock_beat: redis.lock.Lock,
-    tenant_id: str | None,
-) -> int | None:
-    lock_beat.reacquire()
-
-    rug = RedisUserGroup(usergroup.id)
-
-    # don't generate sync tasks if tasks are still pending
-    if r.exists(rug.fence_key):
-        return None
-
-    # race condition with the monitor/cleanup function if we use a cached result!
-    db_session.refresh(usergroup)
-    if usergroup.is_up_to_date:
-        return None
-
-    # add tasks to celery and build up the task set to monitor in redis
-    r.delete(rug.taskset_key)
-
-    # Add all documents that need to be updated into the queue
-    task_logger.info(
-        f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}"
-    )
-    tasks_generated = rug.generate_tasks(
-        celery_app, db_session, r, lock_beat, tenant_id
-    )
-    if tasks_generated is None:
-        return None
-
-    # Currently we are allowing the sync to proceed with 0 tasks.
-    # It's possible for sets/groups to be generated initially with no entries
-    # and they still need to be marked as up to date.
-    # if tasks_generated == 0:
-    #     return 0
-
-    task_logger.info(
-        f"RedisUserGroup.generate_tasks finished. "
-        f"usergroup_id={usergroup.id} tasks_generated={tasks_generated}"
-    )
-
-    # set this only after all tasks have been added
-    r.set(rug.fence_key, tasks_generated)
-    return tasks_generated
-
-
-def monitor_connector_taskset(r: Redis) -> None:
-    fence_value = r.get(RedisConnectorCredentialPair.get_fence_key())
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = r.scard(RedisConnectorCredentialPair.get_taskset_key())
-    task_logger.info(
-        f"Stale document sync progress: remaining={count} initial={initial_count}"
-    )
-    if count == 0:
-        r.delete(RedisConnectorCredentialPair.get_taskset_key())
-        r.delete(RedisConnectorCredentialPair.get_fence_key())
-        task_logger.info(f"Successfully synced stale documents. count={initial_count}")
-
-
-def monitor_document_set_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    document_set_id = RedisDocumentSet.get_id_from_fence_key(fence_key)
-    if document_set_id is None:
-        task_logger.warning(f"could not parse document set id from {fence_key}")
-        return
-
-    rds = RedisDocumentSet(document_set_id)
-
-    fence_value = r.get(rds.fence_key)
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rds.taskset_key))
-    task_logger.info(
-        f"Document set sync progress: document_set_id={document_set_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    document_set = cast(
-        DocumentSet,
-        get_document_set_by_id(db_session=db_session, document_set_id=document_set_id),
-    )  # casting since we "know" a document set with this ID exists
-    if document_set:
-        if not document_set.connector_credential_pairs:
-            # if there are no connectors, then delete the document set.
-            delete_document_set(document_set_row=document_set, db_session=db_session)
-            task_logger.info(
-                f"Successfully deleted document set with ID: '{document_set_id}'!"
-            )
-        else:
-            mark_document_set_as_synced(document_set_id, db_session)
-            task_logger.info(
-                f"Successfully synced document set with ID: '{document_set_id}'!"
-            )
-
-    r.delete(rds.taskset_key)
-    r.delete(rds.fence_key)
-
-
-def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id = RedisConnectorDeletion.get_id_from_fence_key(fence_key)
-    if cc_pair_id is None:
-        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
-        return
-
-    rcd = RedisConnectorDeletion(cc_pair_id)
-
-    fence_value = r.get(rcd.fence_key)
-    if fence_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, fence_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rcd.taskset_key))
-    task_logger.info(
-        f"Connector deletion progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    with Session(get_sqlalchemy_engine()) as db_session:
-        cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session)
-        if not cc_pair:
-            task_logger.warning(
-                f"monitor_connector_deletion_taskset - cc_pair_id not found: cc_pair_id={cc_pair_id}"
-            )
-            return
-
-        try:
-            # clean up the rest of the related Postgres entities
-            # index attempts
-            delete_index_attempts(
-                db_session=db_session,
-                cc_pair_id=cc_pair.id,
-            )
-
-            # document sets
-            delete_document_set_cc_pair_relationship__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-
-            # user groups
-            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
-                "danswer.db.user_group",
-                "delete_user_group_cc_pair_relationship__no_commit",
-                noop_fallback,
-            )
-            cleanup_user_groups(
-                cc_pair_id=cc_pair.id,
-                db_session=db_session,
-            )
-
-            # finally, delete the cc-pair
-            delete_connector_credential_pair__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-            # if there are no credentials left, delete the connector
-            connector = fetch_connector_by_id(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-            )
-            if not connector or not len(connector.credentials):
-                task_logger.info(
-                    "Found no credentials left for connector, deleting connector"
-                )
-                db_session.delete(connector)
-            db_session.commit()
-        except Exception as e:
-            stack_trace = traceback.format_exc()
-            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
-            add_deletion_failure_message(db_session, cc_pair.id, error_message)
-            task_logger.exception(
-                f"Failed to run connector_deletion. "
-                f"cc_pair_id={cc_pair_id} connector_id={cc_pair.connector_id} credential_id={cc_pair.credential_id}"
-            )
-            raise e
-
-    task_logger.info(
-        f"Successfully deleted cc_pair: "
-        f"cc_pair_id={cc_pair_id} "
-        f"connector_id={cc_pair.connector_id} "
-        f"credential_id={cc_pair.credential_id} "
-        f"docs_deleted={initial_count}"
-    )
-
-    r.delete(rcd.taskset_key)
-    r.delete(rcd.fence_key)
-
-
-def monitor_ccpair_pruning_taskset(
-    key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id = RedisConnectorPruning.get_id_from_fence_key(fence_key)
-    if cc_pair_id is None:
-        task_logger.warning(
-            f"monitor_connector_pruning_taskset: could not parse cc_pair_id from {fence_key}"
-        )
-        return
-
-    rcp = RedisConnectorPruning(cc_pair_id)
-
-    fence_value = r.get(rcp.fence_key)
-    if fence_value is None:
-        return
-
-    generator_value = r.get(rcp.generator_complete_key)
-    if generator_value is None:
-        return
-
-    try:
-        initial_count = int(cast(int, generator_value))
-    except ValueError:
-        task_logger.error("The value is not an integer.")
-        return
-
-    count = cast(int, r.scard(rcp.taskset_key))
-    task_logger.info(
-        f"Connector pruning progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}"
-    )
-    if count > 0:
-        return
-
-    mark_ccpair_as_pruned(cc_pair_id, db_session)
-    task_logger.info(
-        f"Successfully pruned connector credential pair. cc_pair_id={cc_pair_id}"
-    )
-
-    r.delete(rcp.taskset_key)
-    r.delete(rcp.generator_progress_key)
-    r.delete(rcp.generator_complete_key)
-    r.delete(rcp.fence_key)
-
-
-@shared_task(name="monitor_vespa_sync", soft_time_limit=300, bind=True)
-def monitor_vespa_sync(self: Task, tenant_id: str | None) -> None:
-    """This is a celery beat task that monitors and finalizes metadata sync tasksets.
-    It scans for fence values and then gets the counts of any associated tasksets.
-    If the count is 0, that means all tasks finished and we should clean up.
-
-    This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't
-    do anything too expensive in this function!
-    """
-    r = get_redis_client()
-
-    lock_beat: redis.lock.Lock = r.lock(
-        DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    try:
-        # prevent overlapping tasks
-        if not lock_beat.acquire(blocking=False):
-            return
-
-        # print current queue lengths
-        r_celery = self.app.broker_connection().channel().client  # type: ignore
-        n_celery = celery_get_queue_length("celery", r)
-        n_sync = celery_get_queue_length(
-            DanswerCeleryQueues.VESPA_METADATA_SYNC, r_celery
-        )
-        n_deletion = celery_get_queue_length(
-            DanswerCeleryQueues.CONNECTOR_DELETION, r_celery
-        )
-        n_pruning = celery_get_queue_length(
-            DanswerCeleryQueues.CONNECTOR_PRUNING, r_celery
-        )
-
-        task_logger.info(
-            f"Queue lengths: celery={n_celery} sync={n_sync} deletion={n_deletion} pruning={n_pruning}"
-        )
-
-        lock_beat.reacquire()
-        if r.exists(RedisConnectorCredentialPair.get_fence_key()):
-            monitor_connector_taskset(r)
-
-        lock_beat.reacquire()
-        for key_bytes in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"):
-            monitor_connector_deletion_taskset(key_bytes, r)
-
-        with get_session_with_tenant(tenant_id) as db_session:
-            lock_beat.reacquire()
-            for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
-                monitor_document_set_taskset(key_bytes, r, db_session)
-
-            lock_beat.reacquire()
-            for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
-                monitor_usergroup_taskset = (
-                    fetch_versioned_implementation_with_fallback(
-                        "danswer.background.celery.tasks.vespa.tasks",
-                        "monitor_usergroup_taskset",
-                        noop_fallback,
-                    )
-                )
-                monitor_usergroup_taskset(key_bytes, r, db_session)
-
-            lock_beat.reacquire()
-            for key_bytes in r.scan_iter(RedisConnectorPruning.FENCE_PREFIX + "*"):
-                monitor_ccpair_pruning_taskset(key_bytes, r, db_session)
-
-        # uncomment for debugging if needed
-        # r_celery = celery_app.broker_connection().channel().client
-        # length = celery_get_queue_length(DanswerCeleryQueues.VESPA_METADATA_SYNC, r_celery)
-        # task_logger.warning(f"queue={DanswerCeleryQueues.VESPA_METADATA_SYNC} length={length}")
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-
-
-@shared_task(
-    name="vespa_metadata_sync_task",
-    bind=True,
-    soft_time_limit=45,
-    time_limit=60,
-    max_retries=3,
-)
-def vespa_metadata_sync_task(
-    self: Task, document_id: str, tenant_id: str | None
-) -> bool:
-    task_logger.info(f"document_id={document_id}")
-
-    try:
-        with get_session_with_tenant(tenant_id) as db_session:
-            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
-            document_index = get_default_document_index(
-                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
-            )
-
-            doc = get_document(document_id, db_session)
-            if not doc:
-                return False
-
-            # document set sync
-            doc_sets = fetch_document_sets_for_document(document_id, db_session)
-            update_doc_sets: set[str] = set(doc_sets)
-
-            # User group sync
-            doc_access = get_access_for_document(
-                document_id=document_id, db_session=db_session
-            )
-            update_request = UpdateRequest(
-                document_ids=[document_id],
-                document_sets=update_doc_sets,
-                access=doc_access,
-                boost=doc.boost,
-                hidden=doc.hidden,
-            )
-
-            # update Vespa
-            document_index.update(update_requests=[update_request])
-
-            # update db last. Worst case = we crash right before this and
-            # the sync might repeat again later
-            mark_document_as_synced(document_id, db_session)
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}")
-    except Exception as e:
-        task_logger.exception("Unexpected exception")
-
-        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-        countdown = 2 ** (self.request.retries + 4)
-        self.retry(exc=e, countdown=countdown)
-
-    return True
--- a/backend/danswer/background/connector_deletion.py
+++ b/backend/danswer/background/connector_deletion.py
@@ -0,0 +1,110 @@
+"""
+To delete a connector / credential pair:
+(1) find all documents associated with connector / credential pair where there
+this the is only connector / credential pair that has indexed it
+(2) delete all documents from document stores
+(3) delete all entries from postgres
+(4) find all documents associated with connector / credential pair where there
+are multiple connector / credential pairs that have indexed it
+(5) update document store entries to remove access associated with the
+connector / credential pair from the access list
+(6) delete all relevant entries from postgres
+"""
+from sqlalchemy.orm import Session
+
+from danswer.access.access import get_access_for_documents
+from danswer.db.document import delete_documents_by_connector_credential_pair__no_commit
+from danswer.db.document import delete_documents_complete__no_commit
+from danswer.db.document import get_document_connector_counts
+from danswer.db.document import prepare_to_modify_documents
+from danswer.db.document_set import fetch_document_sets_for_documents
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.document_index.interfaces import DocumentIndex
+from danswer.document_index.interfaces import UpdateRequest
+from danswer.server.documents.models import ConnectorCredentialPairIdentifier
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_DELETION_BATCH_SIZE = 1000
+
+
+def delete_connector_credential_pair_batch(
+    document_ids: list[str],
+    connector_id: int,
+    credential_id: int,
+    document_index: DocumentIndex,
+) -> None:
+    """
+    Removes a batch of documents ids from a cc-pair. If no other cc-pair uses a document anymore
+    it gets permanently deleted.
+    """
+    with Session(get_sqlalchemy_engine()) as db_session:
+        # acquire lock for all documents in this batch so that indexing can't
+        # override the deletion
+        with prepare_to_modify_documents(
+            db_session=db_session, document_ids=document_ids
+        ):
+            document_connector_counts = get_document_connector_counts(
+                db_session=db_session, document_ids=document_ids
+            )
+
+            # figure out which docs need to be completely deleted
+            document_ids_to_delete = [
+                document_id
+                for document_id, cnt in document_connector_counts
+                if cnt == 1
+            ]
+            logger.debug(f"Deleting documents: {document_ids_to_delete}")
+
+            document_index.delete(doc_ids=document_ids_to_delete)
+
+            delete_documents_complete__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_delete,
+            )
+
+            # figure out which docs need to be updated
+            document_ids_to_update = [
+                document_id for document_id, cnt in document_connector_counts if cnt > 1
+            ]
+
+            # maps document id to list of document set names
+            new_doc_sets_for_documents: dict[str, set[str]] = {
+                document_id_and_document_set_names_tuple[0]: set(
+                    document_id_and_document_set_names_tuple[1]
+                )
+                for document_id_and_document_set_names_tuple in fetch_document_sets_for_documents(
+                    db_session=db_session,
+                    document_ids=document_ids_to_update,
+                )
+            }
+
+            # determine future ACLs for documents in batch
+            access_for_documents = get_access_for_documents(
+                document_ids=document_ids_to_update,
+                db_session=db_session,
+            )
+
+            # update Vespa
+            logger.debug(f"Updating documents: {document_ids_to_update}")
+            update_requests = [
+                UpdateRequest(
+                    document_ids=[document_id],
+                    access=access,
+                    document_sets=new_doc_sets_for_documents[document_id],
+                )
+                for document_id, access in access_for_documents.items()
+            ]
+            document_index.update(update_requests=update_requests)
+
+            # clean up Postgres
+            delete_documents_by_connector_credential_pair__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_update,
+                connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
+                    connector_id=connector_id,
+                    credential_id=credential_id,
+                ),
+            )
+            db_session.commit()
--- a/backend/danswer/background/indexing/run_indexing.py
+++ b/backend/danswer/background/indexing/run_indexing.py
@@ -4,7 +4,6 @@ from datetime import datetime
 from datetime import timedelta
 from datetime import timezone

-from sqlalchemy import text
 from sqlalchemy.orm import Session

 from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt
@@ -15,10 +14,9 @@ from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
 from danswer.connectors.connector_runner import ConnectorRunner
 from danswer.connectors.factory import instantiate_connector
 from danswer.connectors.models import IndexAttemptMetadata
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
 from danswer.db.connector_credential_pair import get_last_successful_attempt_time
 from danswer.db.connector_credential_pair import update_connector_credential_pair
-from danswer.db.engine import get_session_with_tenant
+from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.enums import ConnectorCredentialPairStatus
 from danswer.db.index_attempt import get_index_attempt
 from danswer.db.index_attempt import mark_attempt_failed
@@ -31,7 +29,6 @@ from danswer.db.models import IndexingStatus
 from danswer.db.models import IndexModelStatus
 from danswer.document_index.factory import get_default_document_index
 from danswer.indexing.embedder import DefaultIndexingEmbedder
-from danswer.indexing.indexing_heartbeat import IndexingHeartbeat
 from danswer.indexing.indexing_pipeline import build_indexing_pipeline
 from danswer.utils.logger import IndexAttemptSingleton
 from danswer.utils.logger import setup_logger
@@ -47,12 +44,11 @@ def _get_connector_runner(
    attempt: IndexAttempt,
    start_time: datetime,
    end_time: datetime,
-    tenant_id: str | None,
 ) -> ConnectorRunner:
    """
    NOTE: `start_time` and `end_time` are only used for poll connectors

-    Returns an iterator of document batches and whether the returned documents
+    Returns an interator of document batches and whether the returned documents
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """
@@ -70,17 +66,12 @@ def _get_connector_runner(
        logger.exception(f"Unable to instantiate connector due to {e}")
        # since we failed to even instantiate the connector, we pause the CCPair since
        # it will never succeed
-
-        cc_pair = get_connector_credential_pair_from_id(
-            attempt.connector_credential_pair.id, db_session
+        update_connector_credential_pair(
+            db_session=db_session,
+            connector_id=attempt.connector_credential_pair.connector.id,
+            credential_id=attempt.connector_credential_pair.credential.id,
+            status=ConnectorCredentialPairStatus.PAUSED,
        )
-        if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:
-            update_connector_credential_pair(
-                db_session=db_session,
-                connector_id=attempt.connector_credential_pair.connector.id,
-                credential_id=attempt.connector_credential_pair.credential.id,
-                status=ConnectorCredentialPairStatus.PAUSED,
-            )
        raise e

    return ConnectorRunner(
@@ -89,7 +80,8 @@ def _get_connector_runner(


 def _run_indexing(
-    db_session: Session, index_attempt: IndexAttempt, tenant_id: str | None
+    db_session: Session,
+    index_attempt: IndexAttempt,
 ) -> None:
    """
    1. Get documents which are either new or updated from specified application
@@ -111,26 +103,16 @@ def _run_indexing(
    )

    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
-        search_settings=search_settings,
-        heartbeat=IndexingHeartbeat(
-            index_attempt_id=index_attempt.id,
-            db_session=db_session,
-            # let the world know we're still making progress after
-            # every 10 batches
-            freq=10,
-        ),
+        search_settings=search_settings
    )

    indexing_pipeline = build_indexing_pipeline(
        attempt_id=index_attempt.id,
        embedder=embedding_model,
        document_index=document_index,
-        ignore_time_skip=(
-            index_attempt.from_beginning
-            or (search_settings.status == IndexModelStatus.FUTURE)
-        ),
+        ignore_time_skip=index_attempt.from_beginning
+        or (search_settings.status == IndexModelStatus.FUTURE),
        db_session=db_session,
-        tenant_id=tenant_id,
    )

    db_cc_pair = index_attempt.connector_credential_pair
@@ -187,7 +169,6 @@ def _run_indexing(
                attempt=index_attempt,
                start_time=window_start,
                end_time=window_end,
-                tenant_id=tenant_id,
            )

            all_connector_doc_ids: set[str] = set()
@@ -215,9 +196,7 @@ def _run_indexing(
                db_session.refresh(index_attempt)
                if index_attempt.status != IndexingStatus.IN_PROGRESS:
                    # Likely due to user manually disabling it or model swap
-                    raise RuntimeError(
-                        f"Index Attempt was canceled, status is {index_attempt.status}"
-                    )
+                    raise RuntimeError("Index Attempt was canceled")

                batch_description = []
                for doc in doc_batch:
@@ -378,21 +357,12 @@ def _run_indexing(
        )


-def _prepare_index_attempt(
-    db_session: Session, index_attempt_id: int, tenant_id: str | None
-) -> IndexAttempt:
+def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexAttempt:
    # make sure that the index attempt can't change in between checking the
    # status and marking it as in_progress. This setting will be discarded
    # after the next commit:
    # https://docs.sqlalchemy.org/en/20/orm/session_transaction.html#setting-isolation-for-individual-transactions
    db_session.connection(execution_options={"isolation_level": "SERIALIZABLE"})  # type: ignore
-    if tenant_id is not None:
-        # Explicitly set the search path for the given tenant
-        db_session.execute(text(f'SET search_path TO "{tenant_id}"'))
-        # Verify the search path was set correctly
-        result = db_session.execute(text("SHOW search_path"))
-        current_search_path = result.scalar()
-        logger.info(f"Current search path set to: {current_search_path}")

    attempt = get_index_attempt(
        db_session=db_session,
@@ -415,11 +385,12 @@ def _prepare_index_attempt(


 def run_indexing_entrypoint(
-    index_attempt_id: int,
-    tenant_id: str | None,
-    connector_credential_pair_id: int,
-    is_ee: bool = False,
+    index_attempt_id: int, connector_credential_pair_id: int, is_ee: bool = False
 ) -> None:
+    """Entrypoint for indexing run when using dask distributed.
+    Wraps the actual logic in a `try` block so that we can catch any exceptions
+    and mark the attempt as failed."""
+
    try:
        if is_ee:
            global_version.set_ee()
@@ -429,29 +400,26 @@ def run_indexing_entrypoint(
        IndexAttemptSingleton.set_cc_and_index_id(
            index_attempt_id, connector_credential_pair_id
        )
-        with get_session_with_tenant(tenant_id) as db_session:
-            attempt = _prepare_index_attempt(db_session, index_attempt_id, tenant_id)
+
+        with Session(get_sqlalchemy_engine()) as db_session:
+            # make sure that it is valid to run this indexing attempt + mark it
+            # as in progress
+            attempt = _prepare_index_attempt(db_session, index_attempt_id)

            logger.info(
-                f"Indexing starting for tenant {tenant_id}: "
-                if tenant_id is not None
-                else ""
-                + f"connector='{attempt.connector_credential_pair.connector.name}' "
+                f"Indexing starting: "
+                f"connector='{attempt.connector_credential_pair.connector.name}' "
                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
                f"credentials='{attempt.connector_credential_pair.connector_id}'"
            )

-            _run_indexing(db_session, attempt, tenant_id)
+            _run_indexing(db_session, attempt)

            logger.info(
-                f"Indexing finished for tenant {tenant_id}: "
-                if tenant_id is not None
-                else ""
-                + f"connector='{attempt.connector_credential_pair.connector.name}' "
+                f"Indexing finished: "
+                f"connector='{attempt.connector_credential_pair.connector.name}' "
                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
                f"credentials='{attempt.connector_credential_pair.connector_id}'"
            )
    except Exception as e:
-        logger.exception(
-            f"Indexing job with ID '{index_attempt_id}' for tenant {tenant_id} failed due to {e}"
-        )
+        logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}")
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -6,8 +6,6 @@ import dask
 from dask.distributed import Client
 from dask.distributed import Future
 from distributed import LocalCluster
-from sqlalchemy import text
-from sqlalchemy.exc import ProgrammingError
 from sqlalchemy.orm import Session

 from danswer.background.indexing.dask_utils import ResourceLogger
@@ -17,17 +15,15 @@ from danswer.background.indexing.run_indexing import run_indexing_entrypoint
 from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
 from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
 from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
-from danswer.configs.app_configs import MULTI_TENANT
 from danswer.configs.app_configs import NUM_INDEXING_WORKERS
 from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME
-from danswer.configs.constants import TENANT_ID_PREFIX
 from danswer.db.connector import fetch_connectors
 from danswer.db.connector_credential_pair import fetch_connector_credential_pairs
 from danswer.db.engine import get_db_current_time
-from danswer.db.engine import get_session_with_tenant
-from danswer.db.engine import SqlEngine
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.engine import init_sqlalchemy_engine
 from danswer.db.index_attempt import create_index_attempt
 from danswer.db.index_attempt import get_index_attempt
 from danswer.db.index_attempt import get_inprogress_index_attempts
@@ -100,20 +96,14 @@ def _should_create_new_indexing(
            if last_index.status == IndexingStatus.IN_PROGRESS:
                return False
        else:
-            if (
-                connector.id == 0 or connector.source == DocumentSource.INGESTION_API
-            ):  # Ingestion API
+            if connector.id == 0:  # Ingestion API
                return False
        return True

    # If the connector is paused or is the ingestion API, don't index
    # NOTE: during an embedding model switch over, the following logic
    # is bypassed by the above check for a future model
-    if (
-        not cc_pair.status.is_active()
-        or connector.id == 0
-        or connector.source == DocumentSource.INGESTION_API
-    ):
+    if not cc_pair.status.is_active() or connector.id == 0:
        return False

    if not last_index:
@@ -157,15 +147,13 @@ def _mark_run_failed(
 """Main funcs"""


-def create_indexing_jobs(
-    existing_jobs: dict[int, Future | SimpleJob], tenant_id: str | None
-) -> None:
+def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None:
    """Creates new indexing jobs for each connector / credential pair which is:
    1. Enabled
    2. `refresh_frequency` time has passed since the last indexing run for this pair
    3. There is not already an ongoing indexing attempt for this pair
    """
-    with get_session_with_tenant(tenant_id) as db_session:
+    with Session(get_sqlalchemy_engine()) as db_session:
        ongoing: set[tuple[int | None, int]] = set()
        for attempt_id in existing_jobs:
            attempt = get_index_attempt(
@@ -220,12 +208,11 @@ def create_indexing_jobs(

 def cleanup_indexing_jobs(
    existing_jobs: dict[int, Future | SimpleJob],
-    tenant_id: str | None,
    timeout_hours: int = CLEANUP_INDEXING_JOBS_TIMEOUT,
 ) -> dict[int, Future | SimpleJob]:
    existing_jobs_copy = existing_jobs.copy()
    # clean up completed jobs
-    with get_session_with_tenant(tenant_id) as db_session:
+    with Session(get_sqlalchemy_engine()) as db_session:
        for attempt_id, job in existing_jobs.items():
            index_attempt = get_index_attempt(
                db_session=db_session, index_attempt_id=attempt_id
@@ -263,41 +250,38 @@ def cleanup_indexing_jobs(
                )

        # clean up in-progress jobs that were never completed
-        try:
-            connectors = fetch_connectors(db_session)
-            for connector in connectors:
-                in_progress_indexing_attempts = get_inprogress_index_attempts(
-                    connector.id, db_session
-                )
-
-                for index_attempt in in_progress_indexing_attempts:
-                    if index_attempt.id in existing_jobs:
-                        # If index attempt is canceled, stop the run
-                        if index_attempt.status == IndexingStatus.FAILED:
-                            existing_jobs[index_attempt.id].cancel()
-                        # check to see if the job has been updated in last `timeout_hours` hours, if not
-                        # assume it to frozen in some bad state and just mark it as failed. Note: this relies
-                        # on the fact that the `time_updated` field is constantly updated every
-                        # batch of documents indexed
-                        current_db_time = get_db_current_time(db_session=db_session)
-                        time_since_update = current_db_time - index_attempt.time_updated
-                        if time_since_update.total_seconds() > 60 * 60 * timeout_hours:
-                            existing_jobs[index_attempt.id].cancel()
-                            _mark_run_failed(
-                                db_session=db_session,
-                                index_attempt=index_attempt,
-                                failure_reason="Indexing run frozen - no updates in the last three hours. "
-                                "The run will be re-attempted at next scheduled indexing time.",
-                            )
-                    else:
-                        # If job isn't known, simply mark it as failed
+        connectors = fetch_connectors(db_session)
+        for connector in connectors:
+            in_progress_indexing_attempts = get_inprogress_index_attempts(
+                connector.id, db_session
+            )
+            for index_attempt in in_progress_indexing_attempts:
+                if index_attempt.id in existing_jobs:
+                    # If index attempt is canceled, stop the run
+                    if index_attempt.status == IndexingStatus.FAILED:
+                        existing_jobs[index_attempt.id].cancel()
+                    # check to see if the job has been updated in last `timeout_hours` hours, if not
+                    # assume it to frozen in some bad state and just mark it as failed. Note: this relies
+                    # on the fact that the `time_updated` field is constantly updated every
+                    # batch of documents indexed
+                    current_db_time = get_db_current_time(db_session=db_session)
+                    time_since_update = current_db_time - index_attempt.time_updated
+                    if time_since_update.total_seconds() > 60 * 60 * timeout_hours:
+                        existing_jobs[index_attempt.id].cancel()
                        _mark_run_failed(
                            db_session=db_session,
                            index_attempt=index_attempt,
-                            failure_reason=_UNEXPECTED_STATE_FAILURE_REASON,
+                            failure_reason="Indexing run frozen - no updates in the last three hours. "
+                            "The run will be re-attempted at next scheduled indexing time.",
                        )
-        except ProgrammingError:
-            logger.debug(f"No Connector Table exists for: {tenant_id}")
+                else:
+                    # If job isn't known, simply mark it as failed
+                    _mark_run_failed(
+                        db_session=db_session,
+                        index_attempt=index_attempt,
+                        failure_reason=_UNEXPECTED_STATE_FAILURE_REASON,
+                    )
+
    return existing_jobs_copy


@@ -305,15 +289,13 @@ def kickoff_indexing_jobs(
    existing_jobs: dict[int, Future | SimpleJob],
    client: Client | SimpleJobClient,
    secondary_client: Client | SimpleJobClient,
-    tenant_id: str | None,
 ) -> dict[int, Future | SimpleJob]:
    existing_jobs_copy = existing_jobs.copy()
-
-    current_session = get_session_with_tenant(tenant_id)
+    engine = get_sqlalchemy_engine()

    # Don't include jobs waiting in the Dask queue that just haven't started running
    # Also (rarely) don't include for jobs that started but haven't updated the indexing tables yet
-    with current_session as db_session:
+    with Session(engine) as db_session:
        # get_not_started_index_attempts orders its returned results from oldest to newest
        # we must process attempts in a FIFO manner to prevent connector starvation
        new_indexing_attempts = [
@@ -344,7 +326,7 @@ def kickoff_indexing_jobs(
            logger.warning(
                f"Skipping index attempt as Connector has been deleted: {attempt}"
            )
-            with current_session as db_session:
+            with Session(engine) as db_session:
                mark_attempt_failed(
                    attempt, db_session, failure_reason="Connector is null"
                )
@@ -353,7 +335,7 @@ def kickoff_indexing_jobs(
            logger.warning(
                f"Skipping index attempt as Credential has been deleted: {attempt}"
            )
-            with current_session as db_session:
+            with Session(engine) as db_session:
                mark_attempt_failed(
                    attempt, db_session, failure_reason="Credential is null"
                )
@@ -364,9 +346,8 @@ def kickoff_indexing_jobs(
                run = client.submit(
                    run_indexing_entrypoint,
                    attempt.id,
-                    tenant_id,
                    attempt.connector_credential_pair_id,
-                    global_version.is_ee_version(),
+                    global_version.get_is_ee_version(),
                    pure=False,
                )
                if not run:
@@ -376,9 +357,8 @@ def kickoff_indexing_jobs(
                run = secondary_client.submit(
                    run_indexing_entrypoint,
                    attempt.id,
-                    tenant_id,
                    attempt.connector_credential_pair_id,
-                    global_version.is_ee_version(),
+                    global_version.get_is_ee_version(),
                    pure=False,
                )
                if not run:
@@ -412,40 +392,41 @@ def kickoff_indexing_jobs(
    return existing_jobs_copy


-def get_all_tenant_ids() -> list[str] | list[None]:
-    if not MULTI_TENANT:
-        return [None]
-    with get_session_with_tenant(tenant_id="public") as session:
-        result = session.execute(
-            text(
-                """
-            SELECT schema_name
-            FROM information_schema.schemata
-            WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'public')"""
-            )
-        )
-        tenant_ids = [row[0] for row in result]
-
-    valid_tenants = [
-        tenant
-        for tenant in tenant_ids
-        if tenant is None or tenant.startswith(TENANT_ID_PREFIX)
-    ]
-
-    return valid_tenants
-
-
 def update_loop(
    delay: int = 10,
    num_workers: int = NUM_INDEXING_WORKERS,
    num_secondary_workers: int = NUM_SECONDARY_INDEXING_WORKERS,
 ) -> None:
+    engine = get_sqlalchemy_engine()
+    with Session(engine) as db_session:
+        check_index_swap(db_session=db_session)
+        search_settings = get_current_search_settings(db_session)
+
+        # So that the first time users aren't surprised by really slow speed of first
+        # batch of documents indexed
+
+        if search_settings.provider_type is None:
+            logger.notice("Running a first inference to warm up embedding model")
+            embedding_model = EmbeddingModel.from_db_model(
+                search_settings=search_settings,
+                server_host=INDEXING_MODEL_SERVER_HOST,
+                server_port=MODEL_SERVER_PORT,
+            )
+
+            warm_up_bi_encoder(
+                embedding_model=embedding_model,
+            )
+
    client_primary: Client | SimpleJobClient
    client_secondary: Client | SimpleJobClient
    if DASK_JOB_CLIENT_ENABLED:
        cluster_primary = LocalCluster(
            n_workers=num_workers,
            threads_per_worker=1,
+            # there are warning about high memory usage + "Event loop unresponsive"
+            # which are not relevant to us since our workers are expected to use a
+            # lot of memory + involve CPU intensive tasks that will not relinquish
+            # the event loop
            silence_logs=logging.ERROR,
        )
        cluster_secondary = LocalCluster(
@@ -461,67 +442,32 @@ def update_loop(
        client_primary = SimpleJobClient(n_workers=num_workers)
        client_secondary = SimpleJobClient(n_workers=num_secondary_workers)

-    existing_jobs: dict[str | None, dict[int, Future | SimpleJob]] = {}
+    existing_jobs: dict[int, Future | SimpleJob] = {}

-    logger.notice("Startup complete. Waiting for indexing jobs...")
    while True:
        start = time.time()
        start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
        logger.debug(f"Running update, current UTC time: {start_time_utc}")

        if existing_jobs:
+            # TODO: make this debug level once the "no jobs are being scheduled" issue is resolved
            logger.debug(
                "Found existing indexing jobs: "
-                f"{[(tenant_id, list(jobs.keys())) for tenant_id, jobs in existing_jobs.items()]}"
+                f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}"
            )

        try:
-            tenants = get_all_tenant_ids()
-
-            for tenant_id in tenants:
-                try:
-                    logger.debug(
-                        f"Processing {'index attempts' if tenant_id is None else f'tenant {tenant_id}'}"
-                    )
-                    with get_session_with_tenant(tenant_id) as db_session:
-                        check_index_swap(db_session=db_session)
-                        if not MULTI_TENANT:
-                            search_settings = get_current_search_settings(db_session)
-                            if search_settings.provider_type is None:
-                                logger.notice(
-                                    "Running a first inference to warm up embedding model"
-                                )
-                                embedding_model = EmbeddingModel.from_db_model(
-                                    search_settings=search_settings,
-                                    server_host=INDEXING_MODEL_SERVER_HOST,
-                                    server_port=MODEL_SERVER_PORT,
-                                )
-                                warm_up_bi_encoder(embedding_model=embedding_model)
-                                logger.notice("First inference complete.")
-
-                    tenant_jobs = existing_jobs.get(tenant_id, {})
-
-                    tenant_jobs = cleanup_indexing_jobs(
-                        existing_jobs=tenant_jobs, tenant_id=tenant_id
-                    )
-                    create_indexing_jobs(existing_jobs=tenant_jobs, tenant_id=tenant_id)
-                    tenant_jobs = kickoff_indexing_jobs(
-                        existing_jobs=tenant_jobs,
-                        client=client_primary,
-                        secondary_client=client_secondary,
-                        tenant_id=tenant_id,
-                    )
-
-                    existing_jobs[tenant_id] = tenant_jobs
-
-                except Exception as e:
-                    logger.exception(
-                        f"Failed to process tenant {tenant_id or 'default'}: {e}"
-                    )
-
+            with Session(get_sqlalchemy_engine()) as db_session:
+                check_index_swap(db_session)
+            existing_jobs = cleanup_indexing_jobs(existing_jobs=existing_jobs)
+            create_indexing_jobs(existing_jobs=existing_jobs)
+            existing_jobs = kickoff_indexing_jobs(
+                existing_jobs=existing_jobs,
+                client=client_primary,
+                secondary_client=client_secondary,
+            )
        except Exception as e:
            logger.exception(f"Failed to run update due to {e}")
-
        sleep_time = delay - (time.time() - start)
        if sleep_time > 0:
            time.sleep(sleep_time)
@@ -529,9 +475,7 @@ def update_loop(

 def update__main() -> None:
    set_is_ee_based_on_env_variable()
-
-    # initialize the Postgres connection pool
-    SqlEngine.set_app_name(POSTGRES_INDEXER_APP_NAME)
+    init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME)

    logger.notice("Starting indexing service")
    update_loop()
--- a/backend/danswer/chat/load_yamls.py
+++ b/backend/danswer/chat/load_yamls.py
@@ -6,6 +6,7 @@ from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from danswer.configs.chat_configs import PERSONAS_YAML
 from danswer.configs.chat_configs import PROMPTS_YAML
 from danswer.db.document_set import get_or_create_document_set_by_name
+from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.input_prompt import insert_input_prompt_if_not_exists
 from danswer.db.models import DocumentSet as DocumentSetDBModel
 from danswer.db.models import Persona
@@ -17,32 +18,30 @@ from danswer.db.persona import upsert_prompt
 from danswer.search.enums import RecencyBiasSetting


-def load_prompts_from_yaml(
-    db_session: Session, prompts_yaml: str = PROMPTS_YAML
-) -> None:
+def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None:
    with open(prompts_yaml, "r") as file:
        data = yaml.safe_load(file)

    all_prompts = data.get("prompts", [])
-    for prompt in all_prompts:
-        upsert_prompt(
-            user=None,
-            prompt_id=prompt.get("id"),
-            name=prompt["name"],
-            description=prompt["description"].strip(),
-            system_prompt=prompt["system"].strip(),
-            task_prompt=prompt["task"].strip(),
-            include_citations=prompt["include_citations"],
-            datetime_aware=prompt.get("datetime_aware", True),
-            default_prompt=True,
-            personas=None,
-            db_session=db_session,
-            commit=True,
-        )
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for prompt in all_prompts:
+            upsert_prompt(
+                user=None,
+                prompt_id=prompt.get("id"),
+                name=prompt["name"],
+                description=prompt["description"].strip(),
+                system_prompt=prompt["system"].strip(),
+                task_prompt=prompt["task"].strip(),
+                include_citations=prompt["include_citations"],
+                datetime_aware=prompt.get("datetime_aware", True),
+                default_prompt=True,
+                personas=None,
+                db_session=db_session,
+                commit=True,
+            )


 def load_personas_from_yaml(
-    db_session: Session,
    personas_yaml: str = PERSONAS_YAML,
    default_chunks: float = MAX_CHUNKS_FED_TO_CHAT,
 ) -> None:
@@ -50,117 +49,117 @@ def load_personas_from_yaml(
        data = yaml.safe_load(file)

    all_personas = data.get("personas", [])
-    for persona in all_personas:
-        doc_set_names = persona["document_sets"]
-        doc_sets: list[DocumentSetDBModel] = [
-            get_or_create_document_set_by_name(db_session, name)
-            for name in doc_set_names
-        ]
-
-        # Assume if user hasn't set any document sets for the persona, the user may want
-        # to later attach document sets to the persona manually, therefore, don't overwrite/reset
-        # the document sets for the persona
-        doc_set_ids: list[int] | None = None
-        if doc_sets:
-            doc_set_ids = [doc_set.id for doc_set in doc_sets]
-        else:
-            doc_set_ids = None
-
-        prompt_ids: list[int] | None = None
-        prompt_set_names = persona["prompts"]
-        if prompt_set_names:
-            prompts: list[PromptDBModel | None] = [
-                get_prompt_by_name(prompt_name, user=None, db_session=db_session)
-                for prompt_name in prompt_set_names
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for persona in all_personas:
+            doc_set_names = persona["document_sets"]
+            doc_sets: list[DocumentSetDBModel] = [
+                get_or_create_document_set_by_name(db_session, name)
+                for name in doc_set_names
            ]
-            if any([prompt is None for prompt in prompts]):
-                raise ValueError("Invalid Persona configs, not all prompts exist")

-            if prompts:
-                prompt_ids = [prompt.id for prompt in prompts if prompt is not None]
+            # Assume if user hasn't set any document sets for the persona, the user may want
+            # to later attach document sets to the persona manually, therefore, don't overwrite/reset
+            # the document sets for the persona
+            doc_set_ids: list[int] | None = None
+            if doc_sets:
+                doc_set_ids = [doc_set.id for doc_set in doc_sets]
+            else:
+                doc_set_ids = None

-        p_id = persona.get("id")
-        tool_ids = []
-        if persona.get("image_generation"):
-            image_gen_tool = (
-                db_session.query(ToolDBModel)
-                .filter(ToolDBModel.name == "ImageGenerationTool")
+            prompt_ids: list[int] | None = None
+            prompt_set_names = persona["prompts"]
+            if prompt_set_names:
+                prompts: list[PromptDBModel | None] = [
+                    get_prompt_by_name(prompt_name, user=None, db_session=db_session)
+                    for prompt_name in prompt_set_names
+                ]
+                if any([prompt is None for prompt in prompts]):
+                    raise ValueError("Invalid Persona configs, not all prompts exist")
+
+                if prompts:
+                    prompt_ids = [prompt.id for prompt in prompts if prompt is not None]
+
+            p_id = persona.get("id")
+            tool_ids = []
+            if persona.get("image_generation"):
+                image_gen_tool = (
+                    db_session.query(ToolDBModel)
+                    .filter(ToolDBModel.name == "ImageGenerationTool")
+                    .first()
+                )
+                if image_gen_tool:
+                    tool_ids.append(image_gen_tool.id)
+
+            llm_model_provider_override = persona.get("llm_model_provider_override")
+            llm_model_version_override = persona.get("llm_model_version_override")
+
+            # Set specific overrides for image generation persona
+            if persona.get("image_generation"):
+                llm_model_version_override = "gpt-4o"
+
+            existing_persona = (
+                db_session.query(Persona)
+                .filter(Persona.name == persona["name"])
                .first()
            )
-            if image_gen_tool:
-                tool_ids.append(image_gen_tool.id)

-        llm_model_provider_override = persona.get("llm_model_provider_override")
-        llm_model_version_override = persona.get("llm_model_version_override")
-
-        # Set specific overrides for image generation persona
-        if persona.get("image_generation"):
-            llm_model_version_override = "gpt-4o"
-
-        existing_persona = (
-            db_session.query(Persona).filter(Persona.name == persona["name"]).first()
-        )
-
-        upsert_persona(
-            user=None,
-            persona_id=(-1 * p_id) if p_id is not None else None,
-            name=persona["name"],
-            description=persona["description"],
-            num_chunks=persona.get("num_chunks")
-            if persona.get("num_chunks") is not None
-            else default_chunks,
-            llm_relevance_filter=persona.get("llm_relevance_filter"),
-            starter_messages=persona.get("starter_messages"),
-            llm_filter_extraction=persona.get("llm_filter_extraction"),
-            icon_shape=persona.get("icon_shape"),
-            icon_color=persona.get("icon_color"),
-            llm_model_provider_override=llm_model_provider_override,
-            llm_model_version_override=llm_model_version_override,
-            recency_bias=RecencyBiasSetting(persona["recency_bias"]),
-            prompt_ids=prompt_ids,
-            document_set_ids=doc_set_ids,
-            tool_ids=tool_ids,
-            builtin_persona=True,
-            is_public=True,
-            display_priority=existing_persona.display_priority
-            if existing_persona is not None
-            else persona.get("display_priority"),
-            is_visible=existing_persona.is_visible
-            if existing_persona is not None
-            else persona.get("is_visible"),
-            db_session=db_session,
-        )
+            upsert_persona(
+                user=None,
+                persona_id=(-1 * p_id) if p_id is not None else None,
+                name=persona["name"],
+                description=persona["description"],
+                num_chunks=persona.get("num_chunks")
+                if persona.get("num_chunks") is not None
+                else default_chunks,
+                llm_relevance_filter=persona.get("llm_relevance_filter"),
+                starter_messages=persona.get("starter_messages"),
+                llm_filter_extraction=persona.get("llm_filter_extraction"),
+                icon_shape=persona.get("icon_shape"),
+                icon_color=persona.get("icon_color"),
+                llm_model_provider_override=llm_model_provider_override,
+                llm_model_version_override=llm_model_version_override,
+                recency_bias=RecencyBiasSetting(persona["recency_bias"]),
+                prompt_ids=prompt_ids,
+                document_set_ids=doc_set_ids,
+                tool_ids=tool_ids,
+                builtin_persona=True,
+                is_public=True,
+                display_priority=existing_persona.display_priority
+                if existing_persona is not None
+                else persona.get("display_priority"),
+                is_visible=existing_persona.is_visible
+                if existing_persona is not None
+                else persona.get("is_visible"),
+                db_session=db_session,
+            )


-def load_input_prompts_from_yaml(
-    db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
-) -> None:
+def load_input_prompts_from_yaml(input_prompts_yaml: str = INPUT_PROMPT_YAML) -> None:
    with open(input_prompts_yaml, "r") as file:
        data = yaml.safe_load(file)

    all_input_prompts = data.get("input_prompts", [])
-    for input_prompt in all_input_prompts:
-        # If these prompts are deleted (which is a hard delete in the DB), on server startup
-        # they will be recreated, but the user can always just deactivate them, just a light inconvenience
-
-        insert_input_prompt_if_not_exists(
-            user=None,
-            input_prompt_id=input_prompt.get("id"),
-            prompt=input_prompt["prompt"],
-            content=input_prompt["content"],
-            is_public=input_prompt["is_public"],
-            active=input_prompt.get("active", True),
-            db_session=db_session,
-            commit=True,
-        )
+    with Session(get_sqlalchemy_engine()) as db_session:
+        for input_prompt in all_input_prompts:
+            # If these prompts are deleted (which is a hard delete in the DB), on server startup
+            # they will be recreated, but the user can always just deactivate them, just a light inconvenience
+            insert_input_prompt_if_not_exists(
+                user=None,
+                input_prompt_id=input_prompt.get("id"),
+                prompt=input_prompt["prompt"],
+                content=input_prompt["content"],
+                is_public=input_prompt["is_public"],
+                active=input_prompt.get("active", True),
+                db_session=db_session,
+                commit=True,
+            )


 def load_chat_yamls(
-    db_session: Session,
    prompt_yaml: str = PROMPTS_YAML,
    personas_yaml: str = PERSONAS_YAML,
    input_prompts_yaml: str = INPUT_PROMPT_YAML,
 ) -> None:
-    load_prompts_from_yaml(db_session, prompt_yaml)
-    load_personas_from_yaml(db_session, personas_yaml)
-    load_input_prompts_from_yaml(db_session, input_prompts_yaml)
+    load_prompts_from_yaml(prompt_yaml)
+    load_personas_from_yaml(personas_yaml)
+    load_input_prompts_from_yaml(input_prompts_yaml)
--- a/backend/danswer/chat/process_message.py
+++ b/backend/danswer/chat/process_message.py
@@ -18,10 +18,6 @@ from danswer.chat.models import MessageResponseIDInfo
 from danswer.chat.models import MessageSpecificCitations
 from danswer.chat.models import QADocsResponse
 from danswer.chat.models import StreamingError
-from danswer.configs.app_configs import AZURE_DALLE_API_BASE
-from danswer.configs.app_configs import AZURE_DALLE_API_KEY
-from danswer.configs.app_configs import AZURE_DALLE_API_VERSION
-from danswer.configs.app_configs import AZURE_DALLE_DEPLOYMENT_NAME
 from danswer.configs.chat_configs import BING_API_KEY
 from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
@@ -564,26 +560,7 @@ def stream_chat_message_objects(
                        and llm.config.api_key
                        and llm.config.model_provider == "openai"
                    ):
-                        img_generation_llm_config = LLMConfig(
-                            model_provider=llm.config.model_provider,
-                            model_name="dall-e-3",
-                            temperature=GEN_AI_TEMPERATURE,
-                            api_key=llm.config.api_key,
-                            api_base=llm.config.api_base,
-                            api_version=llm.config.api_version,
-                        )
-                    elif (
-                        llm.config.model_provider == "azure"
-                        and AZURE_DALLE_API_KEY is not None
-                    ):
-                        img_generation_llm_config = LLMConfig(
-                            model_provider="azure",
-                            model_name=f"azure/{AZURE_DALLE_DEPLOYMENT_NAME}",
-                            temperature=GEN_AI_TEMPERATURE,
-                            api_key=AZURE_DALLE_API_KEY,
-                            api_base=AZURE_DALLE_API_BASE,
-                            api_version=AZURE_DALLE_API_VERSION,
-                        )
+                        img_generation_llm_config = llm.config
                    else:
                        llm_providers = fetch_existing_llm_providers(db_session)
                        openai_provider = next(
@@ -602,7 +579,7 @@ def stream_chat_message_objects(
                            )
                        img_generation_llm_config = LLMConfig(
                            model_provider=openai_provider.provider,
-                            model_name="dall-e-3",
+                            model_name=openai_provider.default_model_name,
                            temperature=GEN_AI_TEMPERATURE,
                            api_key=openai_provider.api_key,
                            api_base=openai_provider.api_base,
@@ -614,7 +591,6 @@ def stream_chat_message_objects(
                            api_base=img_generation_llm_config.api_base,
                            api_version=img_generation_llm_config.api_version,
                            additional_headers=litellm_additional_headers,
-                            model=img_generation_llm_config.model_name,
                        )
                    ]
                elif tool_cls.__name__ == InternetSearchTool.__name__:
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -138,12 +138,6 @@ POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"

-POSTGRES_API_SERVER_POOL_SIZE = int(
-    os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40
-)
-POSTGRES_API_SERVER_POOL_OVERFLOW = int(
-    os.environ.get("POSTGRES_API_SERVER_POOL_OVERFLOW") or 10
-)
 # defaults to False
 POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true"

@@ -170,29 +164,13 @@ REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int(
 )
 REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))  # broker

-# will propagate to both our redis client as well as celery's redis client
-REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60))
-
-# our redis client only, not celery's
-REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128))
-
 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings
 # should be one of "required", "optional", or "none"
 REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none")
-REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None)
+REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", "")

 CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400))  # seconds

-# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit
-# Setting to None may help when there is a proxy in the way closing idle connections
-CELERY_BROKER_POOL_LIMIT_DEFAULT = 10
-try:
-    CELERY_BROKER_POOL_LIMIT = int(
-        os.environ.get("CELERY_BROKER_POOL_LIMIT", CELERY_BROKER_POOL_LIMIT_DEFAULT)
-    )
-except ValueError:
-    CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT
-
 #####
 # Connector Configs
 #####
@@ -269,10 +247,6 @@ JIRA_CONNECTOR_LABELS_TO_SKIP = [
    for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
    if ignored_tag
 ]
-# Maximum size for Jira tickets in bytes (default: 100KB)
-JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
-    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
-)

 GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

@@ -296,7 +270,7 @@ ALLOW_SIMULTANEOUS_PRUNING = (
    os.environ.get("ALLOW_SIMULTANEOUS_PRUNING", "").lower() == "true"
 )

-# This is the maximum rate at which documents are queried for a pruning job. 0 disables the limitation.
+# This is the maxiumum rate at which documents are queried for a pruning job. 0 disables the limitation.
 MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int(
    os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0)
 )
@@ -360,10 +334,12 @@ INDEXING_TRACER_INTERVAL = int(os.environ.get("INDEXING_TRACER_INTERVAL", 0))
 # exception without aborting the attempt.
 INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT", 0))

-
 #####
 # Miscellaneous
 #####
+# File based Key Value store no longer used
+DYNAMIC_CONFIG_STORE = "PostgresBackedDynamicConfigStore"
+
 JOB_TIMEOUT = 60 * 60 * 6  # 6 hours default
 # used to allow the background indexing jobs to use a different embedding
 # model server than the API server
@@ -401,9 +377,6 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
    os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
 )

-VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "5")
-
-SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

 #####
 # Enterprise Edition Configs
@@ -415,19 +388,3 @@ SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")
 ENTERPRISE_EDITION_ENABLED = (
    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true"
 )
-
-# Azure DALL-E Configurations
-AZURE_DALLE_API_VERSION = os.environ.get("AZURE_DALLE_API_VERSION")
-AZURE_DALLE_API_KEY = os.environ.get("AZURE_DALLE_API_KEY")
-AZURE_DALLE_API_BASE = os.environ.get("AZURE_DALLE_API_BASE")
-AZURE_DALLE_DEPLOYMENT_NAME = os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME")
-
-
-MULTI_TENANT = os.environ.get("MULTI_TENANT", "").lower() == "true"
-SECRET_JWT_KEY = os.environ.get("SECRET_JWT_KEY", "")
-
-
-DATA_PLANE_SECRET = os.environ.get("DATA_PLANE_SECRET", "")
-EXPECTED_API_KEY = os.environ.get("EXPECTED_API_KEY", "")
-
-ENABLE_EMAIL_INVITES = os.environ.get("ENABLE_EMAIL_INVITES", "").lower() == "true"
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -1,5 +1,3 @@
-import platform
-import socket
 from enum import auto
 from enum import Enum

@@ -31,20 +29,14 @@ DISABLED_GEN_AI_MSG = (
    "You can still use Danswer as a search engine."
 )

-# Prefix used for all tenant ids
-TENANT_ID_PREFIX = "tenant_"
-
 # Postgres connection constants for application_name
 POSTGRES_WEB_APP_NAME = "web"
 POSTGRES_INDEXER_APP_NAME = "indexer"
 POSTGRES_CELERY_APP_NAME = "celery"
 POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat"
-POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = "celery_worker_primary"
-POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
-POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
+POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker"
 POSTGRES_PERMISSIONS_APP_NAME = "permissions"
 POSTGRES_UNKNOWN_APP_NAME = "unknown"
-POSTGRES_DEFAULT_SCHEMA = "public"

 # API Keys
 DANSWER_API_KEY_PREFIX = "API_KEY__"
@@ -54,7 +46,6 @@ UNNAMED_KEY_PLACEHOLDER = "Unnamed"
 # Key-Value store keys
 KV_REINDEX_KEY = "needs_reindexing"
 KV_SEARCH_SETTINGS = "search_settings"
-KV_UNSTRUCTURED_API_KEY = "unstructured_api_key"
 KV_USER_STORE_KEY = "INVITED_USERS"
 KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences"
 KV_CRED_KEY = "credential_id_{}"
@@ -71,7 +62,6 @@ KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings"
 KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__"

 CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 60
-CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120


 class DocumentSource(str, Enum):
@@ -114,7 +104,6 @@ class DocumentSource(str, Enum):
    R2 = "r2"
    GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
    OCI_STORAGE = "oci_storage"
-    XENFORO = "xenforo"
    NOT_APPLICABLE = "not_applicable"


@@ -190,17 +179,17 @@ class PostgresAdvisoryLocks(Enum):


 class DanswerCeleryQueues:
+    VESPA_DOCSET_SYNC_GENERATOR = "vespa_docset_sync_generator"
+    VESPA_USERGROUP_SYNC_GENERATOR = "vespa_usergroup_sync_generator"
    VESPA_METADATA_SYNC = "vespa_metadata_sync"
    CONNECTOR_DELETION = "connector_deletion"
-    CONNECTOR_PRUNING = "connector_pruning"


 class DanswerRedisLocks:
-    PRIMARY_WORKER = "da_lock:primary_worker"
    CHECK_VESPA_SYNC_BEAT_LOCK = "da_lock:check_vespa_sync_beat"
    MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat"
    CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat"
-    CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"
+    MONITOR_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:monitor_connector_deletion_beat"


 class DanswerCeleryPriority(int, Enum):
@@ -209,13 +198,3 @@ class DanswerCeleryPriority(int, Enum):
    MEDIUM = auto()
    LOW = auto()
    LOWEST = auto()
-
-
-REDIS_SOCKET_KEEPALIVE_OPTIONS = {}
-REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15
-REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3
-
-if platform.system() == "Darwin":
-    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60  # type: ignore
-else:
-    REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60  # type: ignore
--- a/backend/danswer/connectors/blob/connector.py
+++ b/backend/danswer/connectors/blob/connector.py
@@ -194,8 +194,8 @@ class BlobStorageConnector(LoadConnector, PollConnector):

                try:
                    text = extract_file_text(
+                        name,
                        BytesIO(downloaded_file),
-                        file_name=name,
                        break_on_unprocessable=False,
                    )
                    batch.append(
--- a/backend/danswer/connectors/confluence/confluence_utils.py
+++ b/backend/danswer/connectors/confluence/confluence_utils.py
@@ -1,32 +0,0 @@
-import bs4
-
-
-def build_confluence_document_id(base_url: str, content_url: str) -> str:
-    """For confluence, the document id is the page url for a page based document
-        or the attachment download url for an attachment based document
-
-    Args:
-        base_url (str): The base url of the Confluence instance
-        content_url (str): The url of the page or attachment download url
-
-    Returns:
-        str: The document id
-    """
-    return f"{base_url}{content_url}"
-
-
-def get_used_attachments(text: str) -> list[str]:
-    """Parse a Confluence html page to generate a list of current
-        attachment in used
-
-    Args:
-        text (str): The page content
-
-    Returns:
-        list[str]: List of filenames currently in use by the page text
-    """
-    files_in_used = []
-    soup = bs4.BeautifulSoup(text, "html.parser")
-    for attachment in soup.findAll("ri:attachment"):
-        files_in_used.append(attachment.attrs["ri:filename"])
-    return files_in_used
--- a/backend/danswer/connectors/confluence/connector.py
+++ b/backend/danswer/connectors/confluence/connector.py
@@ -22,10 +22,6 @@ from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING
 from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
-from danswer.connectors.confluence.confluence_utils import (
-    build_confluence_document_id,
-)
-from danswer.connectors.confluence.confluence_utils import get_used_attachments
 from danswer.connectors.confluence.rate_limit_handler import (
    make_confluence_call_handle_rate_limit,
 )
@@ -109,6 +105,24 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str:
    return format_document_soup(soup)


+def get_used_attachments(text: str, confluence_client: Confluence) -> list[str]:
+    """Parse a Confluence html page to generate a list of current
+        attachment in used
+
+    Args:
+        text (str): The page content
+        confluence_client (Confluence): Confluence client
+
+    Returns:
+        list[str]: List of filename currently in used
+    """
+    files_in_used = []
+    soup = bs4.BeautifulSoup(text, "html.parser")
+    for attachment in soup.findAll("ri:attachment"):
+        files_in_used.append(attachment.attrs["ri:filename"])
+    return files_in_used
+
+
 def _comment_dfs(
    comments_str: str,
    comment_pages: Collection[dict[str, Any]],
@@ -519,9 +533,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            return None

        extracted_text = extract_file_text(
-            io.BytesIO(response.content),
-            file_name=attachment["title"],
-            break_on_unprocessable=False,
+            attachment["title"], io.BytesIO(response.content), False
        )
        if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD:
            logger.warning(
@@ -612,22 +624,19 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            page_html = (
                page["body"].get("storage", page["body"].get("view", {})).get("value")
            )
-            # The url and the id are the same
-            page_url = build_confluence_document_id(
-                self.wiki_base, page["_links"]["webui"]
-            )
+            page_url = self.wiki_base + page["_links"]["webui"]
            if not page_html:
                logger.debug("Page is empty, skipping: %s", page_url)
                continue
            page_text = parse_html_page(page_html, self.confluence_client)

-            files_in_used = get_used_attachments(page_html)
+            files_in_used = get_used_attachments(page_html, self.confluence_client)
            attachment_text, unused_page_attachments = self._fetch_attachments(
                self.confluence_client, page_id, files_in_used
            )
            unused_attachments.extend(unused_page_attachments)

-            page_text += "\n" + attachment_text if attachment_text else ""
+            page_text += attachment_text
            comments_text = self._fetch_comments(self.confluence_client, page_id)
            page_text += comments_text
            doc_metadata: dict[str, str | list[str]] = {"Wiki Space Name": self.space}
@@ -674,9 +683,8 @@ class ConfluenceConnector(LoadConnector, PollConnector):
            if time_filter and not time_filter(last_updated):
                continue

-            # The url and the id are the same
-            attachment_url = build_confluence_document_id(
-                self.wiki_base, attachment["_links"]["download"]
+            attachment_url = self._attachment_to_download_link(
+                self.confluence_client, attachment
            )
            attachment_content = self._attachment_to_content(
                self.confluence_client, attachment
--- a/backend/danswer/connectors/confluence/rate_limit_handler.py
+++ b/backend/danswer/connectors/confluence/rate_limit_handler.py
@@ -50,12 +50,6 @@ def make_confluence_call_handle_rate_limit(confluence_call: F) -> F:
                            pass

                    if retry_after is not None:
-                        if retry_after > 600:
-                            logger.warning(
-                                f"Clamping retry_after from {retry_after} to {max_delay} seconds..."
-                            )
-                            retry_after = max_delay
-
                        logger.warning(
                            f"Rate limit hit. Retrying after {retry_after} seconds..."
                        )
--- a/backend/danswer/connectors/danswer_jira/connector.py
+++ b/backend/danswer/connectors/danswer_jira/connector.py
@@ -9,7 +9,6 @@ from jira.resources import Issue

 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
-from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -135,18 +134,10 @@ def fetch_jira_issues_batch(
            else extract_text_from_adf(jira.raw["fields"]["description"])
        )
        comments = _get_comment_strs(jira, comment_email_blacklist)
-        ticket_content = f"{description}\n" + "\n".join(
+        semantic_rep = f"{description}\n" + "\n".join(
            [f"Comment: {comment}" for comment in comments if comment]
        )

-        # Check ticket size
-        if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
-            logger.info(
-                f"Skipping {jira.key} because it exceeds the maximum size of "
-                f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
-            )
-            continue
-
        page_url = f"{jira_client.client_info()}/browse/{jira.key}"

        people = set()
@@ -189,7 +180,7 @@ def fetch_jira_issues_batch(
        doc_batch.append(
            Document(
                id=page_url,
-                sections=[Section(link=page_url, text=ticket_content)],
+                sections=[Section(link=page_url, text=semantic_rep)],
                source=DocumentSource.JIRA,
                semantic_identifier=jira.fields.summary,
                doc_updated_at=time_str_to_utc(jira.fields.updated),
@@ -245,12 +236,10 @@ class JiraConnector(LoadConnector, PollConnector):
        if self.jira_client is None:
            raise ConnectorMissingCredentialError("Jira")

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
        start_ind = 0
        while True:
            doc_batch, fetched_batch_size = fetch_jira_issues_batch(
-                jql=f"project = {quoted_project}",
+                jql=f"project = {self.jira_project}",
                start_index=start_ind,
                jira_client=self.jira_client,
                batch_size=self.batch_size,
@@ -278,10 +267,8 @@ class JiraConnector(LoadConnector, PollConnector):
            "%Y-%m-%d %H:%M"
        )

-        # Quote the project name to handle reserved words
-        quoted_project = f'"{self.jira_project}"'
        jql = (
-            f"project = {quoted_project} AND "
+            f"project = {self.jira_project} AND "
            f"updated >= '{start_date_str}' AND "
            f"updated <= '{end_date_str}'"
        )
--- a/backend/danswer/connectors/dropbox/connector.py
+++ b/backend/danswer/connectors/dropbox/connector.py
@@ -97,8 +97,8 @@ class DropboxConnector(LoadConnector, PollConnector):
                    link = self._get_shared_link(entry.path_display)
                    try:
                        text = extract_file_text(
+                            entry.name,
                            BytesIO(downloaded_file),
-                            file_name=entry.name,
                            break_on_unprocessable=False,
                        )
                        batch.append(
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -42,7 +42,6 @@ from danswer.connectors.slack.load_connector import SlackLoadConnector
 from danswer.connectors.teams.connector import TeamsConnector
 from danswer.connectors.web.connector import WebConnector
 from danswer.connectors.wikipedia.connector import WikipediaConnector
-from danswer.connectors.xenforo.connector import XenforoConnector
 from danswer.connectors.zendesk.connector import ZendeskConnector
 from danswer.connectors.zulip.connector import ZulipConnector
 from danswer.db.credentials import backend_update_credential_json
@@ -63,7 +62,6 @@ def identify_connector_class(
        DocumentSource.SLACK: {
            InputType.LOAD_STATE: SlackLoadConnector,
            InputType.POLL: SlackPollConnector,
-            InputType.PRUNE: SlackPollConnector,
        },
        DocumentSource.GITHUB: GithubConnector,
        DocumentSource.GMAIL: GmailConnector,
@@ -99,7 +97,6 @@ def identify_connector_class(
        DocumentSource.R2: BlobStorageConnector,
        DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
        DocumentSource.OCI_STORAGE: BlobStorageConnector,
-        DocumentSource.XENFORO: XenforoConnector,
    }
    connector_by_source = connector_map.get(source, {})

--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -74,14 +74,13 @@ def _process_file(
        )

    # Using the PDF reader function directly to pass in password cleanly
-    elif extension == ".pdf" and pdf_pass is not None:
+    elif extension == ".pdf":
        file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass)

    else:
        file_content_raw = extract_file_text(
-            file=file,
            file_name=file_name,
-            break_on_unprocessable=True,
+            file=file,
        )

    all_metadata = {**metadata, **file_metadata} if metadata else file_metadata
--- a/backend/danswer/connectors/gmail/connector_auth.py
+++ b/backend/danswer/connectors/gmail/connector_auth.py
@@ -25,7 +25,7 @@ from danswer.connectors.gmail.constants import (
 from danswer.connectors.gmail.constants import SCOPES
 from danswer.db.credentials import update_credential_json
 from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.documents.models import CredentialBase
 from danswer.server.documents.models import GoogleAppCredentials
 from danswer.server.documents.models import GoogleServiceAccountKey
@@ -72,7 +72,7 @@ def get_gmail_creds_for_service_account(


 def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
+    csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id)))
    if csrf != state:
        raise PermissionError(
            "State from Gmail Connector callback does not match expected"
@@ -80,7 +80,7 @@ def verify_csrf(credential_id: int, state: str) -> None:


 def get_gmail_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
@@ -92,14 +92,14 @@ def get_gmail_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)


 def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
@@ -111,7 +111,7 @@ def get_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)
@@ -158,40 +158,42 @@ def build_service_account_creds(


 def get_google_app_gmail_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY))
    return GoogleAppCredentials(**json.loads(creds_str))


 def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True)
+    get_dynamic_config_store().store(
+        KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True
+    )


 def delete_google_app_gmail_cred() -> None:
-    get_kv_store().delete(KV_GMAIL_CRED_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_CRED_KEY)


 def get_gmail_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY))
    return GoogleServiceAccountKey(**json.loads(creds_str))


 def upsert_gmail_service_account_key(
    service_account_key: GoogleServiceAccountKey,
 ) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def delete_gmail_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)


 def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/google_drive/connector.py
+++ b/backend/danswer/connectors/google_drive/connector.py
@@ -36,8 +36,6 @@ from danswer.connectors.models import Section
 from danswer.file_processing.extract_file_text import docx_to_text
 from danswer.file_processing.extract_file_text import pptx_to_text
 from danswer.file_processing.extract_file_text import read_pdf_file
-from danswer.file_processing.unstructured import get_unstructured_api_key
-from danswer.file_processing.unstructured import unstructured_to_text
 from danswer.utils.batching import batch_generator
 from danswer.utils.logger import setup_logger

@@ -329,24 +327,16 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str:
        GDriveMimeType.MARKDOWN.value,
    ]:
        return service.files().get_media(fileId=file["id"]).execute().decode("utf-8")
-    if mime_type in [
-        GDriveMimeType.WORD_DOC.value,
-        GDriveMimeType.POWERPOINT.value,
-        GDriveMimeType.PDF.value,
-    ]:
+    elif mime_type == GDriveMimeType.WORD_DOC.value:
        response = service.files().get_media(fileId=file["id"]).execute()
-        if get_unstructured_api_key():
-            return unstructured_to_text(
-                file=io.BytesIO(response), file_name=file.get("name", file["id"])
-            )
-
-        if mime_type == GDriveMimeType.WORD_DOC.value:
-            return docx_to_text(file=io.BytesIO(response))
-        elif mime_type == GDriveMimeType.PDF.value:
-            text, _ = read_pdf_file(file=io.BytesIO(response))
-            return text
-        elif mime_type == GDriveMimeType.POWERPOINT.value:
-            return pptx_to_text(file=io.BytesIO(response))
+        return docx_to_text(file=io.BytesIO(response))
+    elif mime_type == GDriveMimeType.PDF.value:
+        response = service.files().get_media(fileId=file["id"]).execute()
+        text, _ = read_pdf_file(file=io.BytesIO(response))
+        return text
+    elif mime_type == GDriveMimeType.POWERPOINT.value:
+        response = service.files().get_media(fileId=file["id"]).execute()
+        return pptx_to_text(file=io.BytesIO(response))

    return UNSUPPORTED_FILE_TYPE_CONTENT

--- a/backend/danswer/connectors/google_drive/connector_auth.py
+++ b/backend/danswer/connectors/google_drive/connector_auth.py
@@ -28,7 +28,7 @@ from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES
 from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES
 from danswer.db.credentials import update_credential_json
 from danswer.db.models import User
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.documents.models import CredentialBase
 from danswer.server.documents.models import GoogleAppCredentials
 from danswer.server.documents.models import GoogleServiceAccountKey
@@ -134,7 +134,7 @@ def get_google_drive_creds(


 def verify_csrf(credential_id: int, state: str) -> None:
-    csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id)))
+    csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id)))
    if csrf != state:
        raise PermissionError(
            "State from Google Drive Connector callback does not match expected"
@@ -142,7 +142,7 @@ def verify_csrf(credential_id: int, state: str) -> None:


 def get_auth_url(credential_id: int) -> str:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    credential_json = json.loads(creds_str)
    flow = InstalledAppFlow.from_client_config(
        credential_json,
@@ -154,7 +154,7 @@ def get_auth_url(credential_id: int) -> str:
    parsed_url = cast(ParseResult, urlparse(auth_url))
    params = parse_qs(parsed_url.query)

-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True
    )  # type: ignore
    return str(auth_url)
@@ -202,28 +202,32 @@ def build_service_account_creds(


 def get_google_app_cred() -> GoogleAppCredentials:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
+    creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY))
    return GoogleAppCredentials(**json.loads(creds_str))


 def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None:
-    get_kv_store().store(KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True)
+    get_dynamic_config_store().store(
+        KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True
+    )


 def delete_google_app_cred() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)
+    get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_CRED_KEY)


 def get_service_account_key() -> GoogleServiceAccountKey:
-    creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY))
+    creds_str = str(
+        get_dynamic_config_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
+    )
    return GoogleServiceAccountKey(**json.loads(creds_str))


 def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None:
-    get_kv_store().store(
+    get_dynamic_config_store().store(
        KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True
    )


 def delete_service_account_key() -> None:
-    get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
+    get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)
--- a/backend/danswer/connectors/sharepoint/connector.py
+++ b/backend/danswer/connectors/sharepoint/connector.py
@@ -40,8 +40,8 @@ def _convert_driveitem_to_document(
    driveitem: DriveItem,
 ) -> Document:
    file_text = extract_file_text(
-        file=io.BytesIO(driveitem.get_content().execute_query().value),
        file_name=driveitem.name,
+        file=io.BytesIO(driveitem.get_content().execute_query().value),
        break_on_unprocessable=False,
    )

--- a/backend/danswer/connectors/slack/connector.py
+++ b/backend/danswer/connectors/slack/connector.py
@@ -8,12 +8,13 @@ from typing import cast

 from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
+from slack_sdk.web import SlackResponse

 from danswer.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
 from danswer.configs.constants import DocumentSource
+from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
 from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import IdConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
 from danswer.connectors.models import BasicExpertInfo
@@ -22,8 +23,9 @@ from danswer.connectors.models import Document
 from danswer.connectors.models import Section
 from danswer.connectors.slack.utils import expert_info_from_slack_id
 from danswer.connectors.slack.utils import get_message_link
-from danswer.connectors.slack.utils import make_paginated_slack_api_call_w_retries
-from danswer.connectors.slack.utils import make_slack_api_call_w_retries
+from danswer.connectors.slack.utils import make_slack_api_call_logged
+from danswer.connectors.slack.utils import make_slack_api_call_paginated
+from danswer.connectors.slack.utils import make_slack_api_rate_limited
 from danswer.connectors.slack.utils import SlackTextCleaner
 from danswer.utils.logger import setup_logger

@@ -36,18 +38,47 @@ MessageType = dict[str, Any]
 # list of messages in a thread
 ThreadType = list[MessageType]

+basic_retry_wrapper = retry_builder()

-def _collect_paginated_channels(
+
+def _make_paginated_slack_api_call(
+    call: Callable[..., SlackResponse], **kwargs: Any
+) -> Generator[dict[str, Any], None, None]:
+    return make_slack_api_call_paginated(
+        basic_retry_wrapper(
+            make_slack_api_rate_limited(make_slack_api_call_logged(call))
+        )
+    )(**kwargs)
+
+
+def _make_slack_api_call(
+    call: Callable[..., SlackResponse], **kwargs: Any
+) -> SlackResponse:
+    return basic_retry_wrapper(
+        make_slack_api_rate_limited(make_slack_api_call_logged(call))
+    )(**kwargs)
+
+
+def get_channel_info(client: WebClient, channel_id: str) -> ChannelType:
+    """Get information about a channel. Needed to convert channel ID to channel name"""
+    return _make_slack_api_call(client.conversations_info, channel=channel_id)[0][
+        "channel"
+    ]
+
+
+def _get_channels(
    client: WebClient,
    exclude_archived: bool,
-    channel_types: list[str],
+    get_private: bool,
 ) -> list[ChannelType]:
    channels: list[dict[str, Any]] = []
-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_list,
        exclude_archived=exclude_archived,
        # also get private channels the bot is added to
-        types=channel_types,
+        types=["public_channel", "private_channel"]
+        if get_private
+        else ["public_channel"],
    ):
        channels.extend(result["channels"])

@@ -57,38 +88,19 @@ def _collect_paginated_channels(
 def get_channels(
    client: WebClient,
    exclude_archived: bool = True,
-    get_public: bool = True,
-    get_private: bool = True,
 ) -> list[ChannelType]:
    """Get all channels in the workspace"""
-    channels: list[dict[str, Any]] = []
-    channel_types = []
-    if get_public:
-        channel_types.append("public_channel")
-    if get_private:
-        channel_types.append("private_channel")
    # try getting private channels as well at first
    try:
-        channels = _collect_paginated_channels(
-            client=client,
-            exclude_archived=exclude_archived,
-            channel_types=channel_types,
+        return _get_channels(
+            client=client, exclude_archived=exclude_archived, get_private=True
        )
    except SlackApiError as e:
        logger.info(f"Unable to fetch private channels due to - {e}")
-        logger.info("trying again without private channels")
-        if get_public:
-            channel_types = ["public_channel"]
-        else:
-            logger.warning("No channels to fetch")
-            return []
-        channels = _collect_paginated_channels(
-            client=client,
-            exclude_archived=exclude_archived,
-            channel_types=channel_types,
-        )

-    return channels
+    return _get_channels(
+        client=client, exclude_archived=exclude_archived, get_private=False
+    )


 def get_channel_messages(
@@ -100,14 +112,14 @@ def get_channel_messages(
    """Get all messages in a channel"""
    # join so that the bot can access messages
    if not channel["is_member"]:
-        make_slack_api_call_w_retries(
+        _make_slack_api_call(
            client.conversations_join,
            channel=channel["id"],
            is_private=channel["is_private"],
        )
        logger.info(f"Successfully joined '{channel['name']}'")

-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_history,
        channel=channel["id"],
        oldest=oldest,
@@ -119,7 +131,7 @@ def get_channel_messages(
 def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType:
    """Get all messages in a thread"""
    threads: list[MessageType] = []
-    for result in make_paginated_slack_api_call_w_retries(
+    for result in _make_paginated_slack_api_call(
        client.conversations_replies, channel=channel_id, ts=thread_id
    ):
        threads.extend(result["messages"])
@@ -205,17 +217,12 @@ _DISALLOWED_MSG_SUBTYPES = {
    "group_leave",
    "group_archive",
    "group_unarchive",
-    "channel_leave",
-    "channel_name",
-    "channel_join",
 }


-def default_msg_filter(message: MessageType) -> bool:
+def _default_msg_filter(message: MessageType) -> bool:
    # Don't keep messages from bots
    if message.get("bot_id") or message.get("app_id"):
-        if message.get("bot_profile", {}).get("name") == "DanswerConnector":
-            return False
        return True

    # Uninformative
@@ -259,14 +266,14 @@ def filter_channels(
    ]


-def _get_all_docs(
+def get_all_docs(
    client: WebClient,
    workspace: str,
    channels: list[str] | None = None,
    channel_name_regex_enabled: bool = False,
    oldest: str | None = None,
    latest: str | None = None,
-    msg_filter_func: Callable[[MessageType], bool] = default_msg_filter,
+    msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter,
 ) -> Generator[Document, None, None]:
    """Get all documents in the workspace, channel by channel"""
    slack_cleaner = SlackTextCleaner(client=client)
@@ -321,44 +328,7 @@ def _get_all_docs(
        )


-def _get_all_doc_ids(
-    client: WebClient,
-    channels: list[str] | None = None,
-    channel_name_regex_enabled: bool = False,
-    msg_filter_func: Callable[[MessageType], bool] = default_msg_filter,
-) -> set[str]:
-    """
-    Get all document ids in the workspace, channel by channel
-    This is pretty identical to get_all_docs, but it returns a set of ids instead of documents
-    This makes it an order of magnitude faster than get_all_docs
-    """
-
-    all_channels = get_channels(client)
-    filtered_channels = filter_channels(
-        all_channels, channels, channel_name_regex_enabled
-    )
-
-    all_doc_ids = set()
-    for channel in filtered_channels:
-        channel_message_batches = get_channel_messages(
-            client=client,
-            channel=channel,
-        )
-
-        for message_batch in channel_message_batches:
-            for message in message_batch:
-                if msg_filter_func(message):
-                    continue
-
-                # The document id is the channel id and the ts of the first message in the thread
-                # Since we already have the first message of the thread, we dont have to
-                # fetch the thread for id retrieval, saving time and API calls
-                all_doc_ids.add(f"{channel['id']}__{message['ts']}")
-
-    return all_doc_ids
-
-
-class SlackPollConnector(PollConnector, IdConnector):
+class SlackPollConnector(PollConnector):
    def __init__(
        self,
        workspace: str,
@@ -379,16 +349,6 @@ class SlackPollConnector(PollConnector, IdConnector):
        self.client = WebClient(token=bot_token)
        return None

-    def retrieve_all_source_ids(self) -> set[str]:
-        if self.client is None:
-            raise ConnectorMissingCredentialError("Slack")
-
-        return _get_all_doc_ids(
-            client=self.client,
-            channels=self.channels,
-            channel_name_regex_enabled=self.channel_regex_enabled,
-        )
-
    def poll_source(
        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
@@ -396,7 +356,7 @@ class SlackPollConnector(PollConnector, IdConnector):
            raise ConnectorMissingCredentialError("Slack")

        documents: list[Document] = []
-        for document in _get_all_docs(
+        for document in get_all_docs(
            client=self.client,
            workspace=self.workspace,
            channels=self.channels,
--- a/backend/danswer/connectors/slack/utils.py
+++ b/backend/danswer/connectors/slack/utils.py
@@ -10,13 +10,11 @@ from slack_sdk import WebClient
 from slack_sdk.errors import SlackApiError
 from slack_sdk.web import SlackResponse

-from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
 from danswer.connectors.models import BasicExpertInfo
 from danswer.utils.logger import setup_logger

 logger = setup_logger()

-basic_retry_wrapper = retry_builder()
 # number of messages we request per page when fetching paginated slack messages
 _SLACK_LIMIT = 900

@@ -36,7 +34,7 @@ def get_message_link(
    )


-def _make_slack_api_call_logged(
+def make_slack_api_call_logged(
    call: Callable[..., SlackResponse],
 ) -> Callable[..., SlackResponse]:
    @wraps(call)
@@ -49,7 +47,7 @@ def _make_slack_api_call_logged(
    return logged_call


-def _make_slack_api_call_paginated(
+def make_slack_api_call_paginated(
    call: Callable[..., SlackResponse],
 ) -> Callable[..., Generator[dict[str, Any], None, None]]:
    """Wraps calls to slack API so that they automatically handle pagination"""
@@ -118,24 +116,6 @@ def make_slack_api_rate_limited(
    return rate_limited_call


-def make_slack_api_call_w_retries(
-    call: Callable[..., SlackResponse], **kwargs: Any
-) -> SlackResponse:
-    return basic_retry_wrapper(
-        make_slack_api_rate_limited(_make_slack_api_call_logged(call))
-    )(**kwargs)
-
-
-def make_paginated_slack_api_call_w_retries(
-    call: Callable[..., SlackResponse], **kwargs: Any
-) -> Generator[dict[str, Any], None, None]:
-    return _make_slack_api_call_paginated(
-        basic_retry_wrapper(
-            make_slack_api_rate_limited(_make_slack_api_call_logged(call))
-        )
-    )(**kwargs)
-
-
 def expert_info_from_slack_id(
    user_id: str | None,
    client: WebClient,
--- a/backend/danswer/connectors/xenforo/connector.py
+++ b/backend/danswer/connectors/xenforo/connector.py
@@ -1,244 +0,0 @@
-"""
-This is the XenforoConnector class. It is used to connect to a Xenforo forum and load or update documents from the forum.
-
-To use this class, you need to provide the URL of the Xenforo forum board you want to connect to when creating an instance
-of the class. The URL should be a string that starts with 'http://' or 'https://', followed by the domain name of the
-forum, followed by the board name. For example:
-
-    base_url = 'https://www.example.com/forum/boards/some-topic/'
-
-The `load_from_state` method is used to load documents from the forum. It takes an optional `state` parameter, which
-can be used to specify a state from which to start loading documents.
-"""
-import re
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-from typing import Any
-from urllib.parse import urlparse
-
-import pytz
-import requests
-from bs4 import BeautifulSoup
-from bs4 import Tag
-
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc
-from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import LoadConnector
-from danswer.connectors.models import BasicExpertInfo
-from danswer.connectors.models import Document
-from danswer.connectors.models import Section
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def get_title(soup: BeautifulSoup) -> str:
-    el = soup.find("h1", "p-title-value")
-    if not el:
-        return ""
-    title = el.text
-    for char in (";", ":", "!", "*", "/", "\\", "?", '"', "<", ">", "|"):
-        title = title.replace(char, "_")
-    return title
-
-
-def get_pages(soup: BeautifulSoup, url: str) -> list[str]:
-    page_tags = soup.select("li.pageNav-page")
-    page_numbers = []
-    for button in page_tags:
-        if re.match(r"^\d+$", button.text):
-            page_numbers.append(button.text)
-
-    max_pages = int(max(page_numbers, key=int)) if page_numbers else 1
-
-    all_pages = []
-    for x in range(1, int(max_pages) + 1):
-        all_pages.append(f"{url}page-{x}")
-    return all_pages
-
-
-def parse_post_date(post_element: BeautifulSoup) -> datetime:
-    el = post_element.find("time")
-    if not isinstance(el, Tag) or "datetime" not in el.attrs:
-        return datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc)
-
-    date_value = el["datetime"]
-
-    # Ensure date_value is a string (if it's a list, take the first element)
-    if isinstance(date_value, list):
-        date_value = date_value[0]
-
-    post_date = datetime.strptime(date_value, "%Y-%m-%dT%H:%M:%S%z")
-    return datetime_to_utc(post_date)
-
-
-def scrape_page_posts(
-    soup: BeautifulSoup,
-    page_index: int,
-    url: str,
-    initial_run: bool,
-    start_time: datetime,
-) -> list:
-    title = get_title(soup)
-
-    documents = []
-    for post in soup.find_all("div", class_="message-inner"):
-        post_date = parse_post_date(post)
-        if initial_run or post_date > start_time:
-            el = post.find("div", class_="bbWrapper")
-            if not el:
-                continue
-            post_text = el.get_text(strip=True) + "\n"
-            author_tag = post.find("a", class_="username")
-            if author_tag is None:
-                author_tag = post.find("span", class_="username")
-            author = author_tag.get_text(strip=True) if author_tag else "Deleted author"
-            formatted_time = post_date.strftime("%Y-%m-%d %H:%M:%S")
-
-            # TODO: if a caller calls this for each page of a thread, it may see the
-            # same post multiple times if there is a sticky post
-            # that appears on each page of a thread.
-            # it's important to generate unique doc id's, so page index is part of the
-            # id. We may want to de-dupe this stuff inside the indexing service.
-            document = Document(
-                id=f"{DocumentSource.XENFORO.value}_{title}_{page_index}_{formatted_time}",
-                sections=[Section(link=url, text=post_text)],
-                title=title,
-                source=DocumentSource.XENFORO,
-                semantic_identifier=title,
-                primary_owners=[BasicExpertInfo(display_name=author)],
-                metadata={
-                    "type": "post",
-                    "author": author,
-                    "time": formatted_time,
-                },
-                doc_updated_at=post_date,
-            )
-
-            documents.append(document)
-    return documents
-
-
-class XenforoConnector(LoadConnector):
-    # Class variable to track if the connector has been run before
-    has_been_run_before = False
-
-    def __init__(self, base_url: str) -> None:
-        self.base_url = base_url
-        self.initial_run = not XenforoConnector.has_been_run_before
-        self.start = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=1)
-        self.cookies: dict[str, str] = {}
-        # mimic user browser to avoid being blocked by the website (see: https://www.useragents.me/)
-        self.headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-            "AppleWebKit/537.36 (KHTML, like Gecko) "
-            "Chrome/121.0.0.0 Safari/537.36"
-        }
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        if credentials:
-            logger.warning("Unexpected credentials provided for Xenforo Connector")
-        return None
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        # Standardize URL to always end in /.
-        if self.base_url[-1] != "/":
-            self.base_url += "/"
-
-        # Remove all extra parameters from the end such as page, post.
-        matches = ("threads/", "boards/", "forums/")
-        for each in matches:
-            if each in self.base_url:
-                try:
-                    self.base_url = self.base_url[
-                        0 : self.base_url.index(
-                            "/", self.base_url.index(each) + len(each)
-                        )
-                        + 1
-                    ]
-                except ValueError:
-                    pass
-
-        doc_batch: list[Document] = []
-        all_threads = []
-
-        # If the URL contains "boards/" or "forums/", find all threads.
-        if "boards/" in self.base_url or "forums/" in self.base_url:
-            pages = get_pages(self.requestsite(self.base_url), self.base_url)
-
-            # Get all pages on thread_list_page
-            for pre_count, thread_list_page in enumerate(pages, start=1):
-                logger.info(
-                    f"Getting pages from thread_list_page.. Current: {pre_count}/{len(pages)}\r"
-                )
-                all_threads += self.get_threads(thread_list_page)
-        # If the URL contains "threads/", add the thread to the list.
-        elif "threads/" in self.base_url:
-            all_threads.append(self.base_url)
-
-        # Process all threads
-        for thread_count, thread_url in enumerate(all_threads, start=1):
-            soup = self.requestsite(thread_url)
-            if soup is None:
-                logger.error(f"Failed to load page: {self.base_url}")
-                continue
-            pages = get_pages(soup, thread_url)
-            # Getting all pages for all threads
-            for page_index, page in enumerate(pages, start=1):
-                logger.info(
-                    f"Progress: Page {page_index}/{len(pages)} - Thread {thread_count}/{len(all_threads)}\r"
-                )
-                soup_page = self.requestsite(page)
-                doc_batch.extend(
-                    scrape_page_posts(
-                        soup_page, page_index, thread_url, self.initial_run, self.start
-                    )
-                )
-            if doc_batch:
-                yield doc_batch
-
-        # Mark the initial run finished after all threads and pages have been processed
-        XenforoConnector.has_been_run_before = True
-
-    def get_threads(self, url: str) -> list[str]:
-        soup = self.requestsite(url)
-        thread_tags = soup.find_all(class_="structItem-title")
-        base_url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(url))
-        threads = []
-        for x in thread_tags:
-            y = x.find_all(href=True)
-            for element in y:
-                link = element["href"]
-                if "threads/" in link:
-                    stripped = link[0 : link.rfind("/") + 1]
-                    if base_url + stripped not in threads:
-                        threads.append(base_url + stripped)
-        return threads
-
-    def requestsite(self, url: str) -> BeautifulSoup:
-        try:
-            response = requests.get(
-                url, cookies=self.cookies, headers=self.headers, timeout=10
-            )
-            if response.status_code != 200:
-                logger.error(
-                    f"<{url}> Request Error: {response.status_code} - {response.reason}"
-                )
-            return BeautifulSoup(response.text, "html.parser")
-        except TimeoutError:
-            logger.error("Timed out Error.")
-        except Exception as e:
-            logger.error(f"Error on {url}")
-            logger.exception(e)
-        return BeautifulSoup("", "html.parser")
-
-
-if __name__ == "__main__":
-    connector = XenforoConnector(
-        # base_url="https://cassiopaea.org/forum/threads/how-to-change-your-emotional-state.41381/"
-        base_url="https://xenforo.com/community/threads/whats-new-with-enhanced-search-resource-manager-and-media-gallery-in-xenforo-2-3.220935/"
-    )
-    document_batches = connector.load_from_state()
-    print(next(document_batches))
--- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py
+++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py
@@ -5,6 +5,7 @@ from typing import cast
 from typing import Optional
 from typing import TypeVar

+from fastapi import HTTPException
 from retry import retry
 from slack_sdk import WebClient
 from slack_sdk.models.blocks import DividerBlock
@@ -154,8 +155,12 @@ def handle_regular_answer(
        with Session(get_sqlalchemy_engine()) as db_session:
            if len(new_message_request.messages) > 1:
                if new_message_request.persona_config:
-                    raise RuntimeError("Slack bot does not support persona config")
-                elif new_message_request.persona_id is not None:
+                    raise HTTPException(
+                        status_code=403,
+                        detail="Slack bot does not support persona config",
+                    )
+
+                elif new_message_request.persona_id:
                    persona = cast(
                        Persona,
                        fetch_persona_by_id(
@@ -165,10 +170,6 @@ def handle_regular_answer(
                            get_editable=False,
                        ),
                    )
-                else:
-                    raise RuntimeError(
-                        "No persona id provided, this should never happen."
-                    )

                llm, _ = get_llms_for_persona(persona)

--- a/backend/danswer/danswerbot/slack/listener.py
+++ b/backend/danswer/danswerbot/slack/listener.py
@@ -49,7 +49,7 @@ from danswer.danswerbot.slack.utils import rephrase_slack_message
 from danswer.danswerbot.slack.utils import respond_in_thread
 from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.search_settings import get_current_search_settings
-from danswer.key_value_store.interface import KvKeyNotFoundError
+from danswer.dynamic_configs.interface import ConfigNotFoundError
 from danswer.natural_language_processing.search_nlp_models import EmbeddingModel
 from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder
 from danswer.one_shot_answer.models import ThreadMessage
@@ -131,8 +131,9 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
            )
            return False

-        bot_tag_id = get_danswer_bot_app_id(client.web_client)
        if event_type == "message":
+            bot_tag_id = get_danswer_bot_app_id(client.web_client)
+
            is_dm = event.get("channel_type") == "im"
            is_tagged = bot_tag_id and bot_tag_id in msg
            is_danswer_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "")
@@ -158,10 +159,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool
                slack_bot_config = get_slack_bot_config_for_channel(
                    channel_name=channel_name, db_session=db_session
                )
-            # If DanswerBot is not specifically tagged and the channel is not set to respond to bots, ignore the message
-            if (not bot_tag_id or bot_tag_id not in msg) and (
-                not slack_bot_config
-                or not slack_bot_config.channel_config.get("respond_to_bots")
+            if not slack_bot_config or not slack_bot_config.channel_config.get(
+                "respond_to_bots"
            ):
                channel_specific_logger.info("Ignoring message from bot")
                return False
@@ -448,9 +447,8 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non
                return view_routing(req, client)
        elif req.type == "events_api" or req.type == "slash_commands":
            return process_message(req, client)
-    except Exception as e:
-        logger.exception(f"Failed to process slack event. Error: {e}")
-        logger.error(f"Slack request payload: {req.payload}")
+    except Exception:
+        logger.exception("Failed to process slack event")


 def _get_socket_client(slack_bot_tokens: SlackBotTokens) -> SocketModeClient:
@@ -524,7 +522,7 @@ if __name__ == "__main__":

            # Let the handlers run in the background + re-check for token updates every 60 seconds
            Event().wait(timeout=60)
-        except KvKeyNotFoundError:
+        except ConfigNotFoundError:
            # try again every 30 seconds. This is needed since the user may add tokens
            # via the UI at any point in the programs lifecycle - if we just allow it to
            # fail, then the user will need to restart the containers after adding tokens
--- a/backend/danswer/danswerbot/slack/tokens.py
+++ b/backend/danswer/danswerbot/slack/tokens.py
@@ -2,7 +2,7 @@ import os
 from typing import cast

 from danswer.configs.constants import KV_SLACK_BOT_TOKENS_CONFIG_KEY
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.server.manage.models import SlackBotTokens


@@ -13,7 +13,7 @@ def fetch_tokens() -> SlackBotTokens:
    if app_token and bot_token:
        return SlackBotTokens(app_token=app_token, bot_token=bot_token)

-    dynamic_config_store = get_kv_store()
+    dynamic_config_store = get_dynamic_config_store()
    return SlackBotTokens(
        **cast(dict, dynamic_config_store.load(key=KV_SLACK_BOT_TOKENS_CONFIG_KEY))
    )
@@ -22,7 +22,7 @@ def fetch_tokens() -> SlackBotTokens:
 def save_tokens(
    tokens: SlackBotTokens,
 ) -> None:
-    dynamic_config_store = get_kv_store()
+    dynamic_config_store = get_dynamic_config_store()
    dynamic_config_store.store(
        key=KV_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True
    )
--- a/backend/danswer/danswerbot/slack/utils.py
+++ b/backend/danswer/danswerbot/slack/utils.py
@@ -430,58 +430,35 @@ def read_slack_thread(
    replies = cast(dict, response.data).get("messages", [])
    for reply in replies:
        if "user" in reply and "bot_id" not in reply:
-            message = reply["text"]
-            user_sem_id = (
-                fetch_user_semantic_id_from_id(reply.get("user"), client)
-                or "Unknown User"
-            )
+            message = remove_danswer_bot_tag(reply["text"], client=client)
+            user_sem_id = fetch_user_semantic_id_from_id(reply["user"], client)
            message_type = MessageType.USER
        else:
            self_app_id = get_danswer_bot_app_id(client)

-            if reply.get("user") == self_app_id:
-                # DanswerBot response
-                message_type = MessageType.ASSISTANT
-                user_sem_id = "Assistant"
-
-                # DanswerBot responses have both text and blocks
-                # The useful content is in the blocks, specifically the first block unless there are
-                # auto-detected filters
-                blocks = reply.get("blocks")
-                if not blocks:
-                    logger.warning(f"DanswerBot response has no blocks: {reply}")
-                    continue
-
-                message = blocks[0].get("text", {}).get("text")
-
-                # If auto-detected filters are on, use the second block for the actual answer
-                # The first block is the auto-detected filters
-                if message.startswith("_Filters"):
-                    if len(blocks) < 2:
-                        logger.warning(f"Only filter blocks found: {reply}")
-                        continue
-                    # This is the DanswerBot answer format, if there is a change to how we respond,
-                    # this will need to be updated to get the correct "answer" portion
-                    message = reply["blocks"][1].get("text", {}).get("text")
-            else:
-                # Other bots are not counted as the LLM response which only comes from Danswer
-                message_type = MessageType.USER
-                bot_user_name = fetch_user_semantic_id_from_id(
-                    reply.get("user"), client
-                )
-                user_sem_id = bot_user_name or "Unknown" + " Bot"
-
-                # For other bots, just use the text as we have no way of knowing that the
-                # useful portion is
-                message = reply.get("text")
-                if not message:
-                    message = blocks[0].get("text", {}).get("text")
-
-            if not message:
-                logger.warning("Skipping Slack thread message, no text found")
+            # Only include bot messages from Danswer, other bots are not taken in as context
+            if self_app_id != reply.get("user"):
                continue

-        message = remove_danswer_bot_tag(message, client=client)
+            blocks = reply["blocks"]
+            if len(blocks) <= 1:
+                continue
+
+            # For the old flow, the useful block is the second one after the header block that says AI Answer
+            if reply["blocks"][0]["text"]["text"] == "AI Answer":
+                message = reply["blocks"][1]["text"]["text"]
+            else:
+                # for the new flow, the answer is the first block
+                message = reply["blocks"][0]["text"]["text"]
+
+            if message.startswith("_Filters"):
+                if len(blocks) <= 2:
+                    continue
+                message = reply["blocks"][2]["text"]["text"]
+
+            user_sem_id = "Assistant"
+            message_type = MessageType.ASSISTANT
+
        thread_messages.append(
            ThreadMessage(message=message, sender=user_sem_id, role=message_type)
        )
--- a/backend/danswer/db/auth.py
+++ b/backend/danswer/db/auth.py
@@ -13,7 +13,7 @@ from sqlalchemy.future import select

 from danswer.auth.schemas import UserRole
 from danswer.db.engine import get_async_session
-from danswer.db.engine import get_async_session_with_tenant
+from danswer.db.engine import get_sqlalchemy_async_engine
 from danswer.db.models import AccessToken
 from danswer.db.models import OAuthAccount
 from danswer.db.models import User
@@ -34,7 +34,7 @@ def get_default_admin_user_emails() -> list[str]:


 async def get_user_count() -> int:
-    async with get_async_session_with_tenant() as asession:
+    async with AsyncSession(get_sqlalchemy_async_engine()) as asession:
        stmt = select(func.count(User.id))
        result = await asession.execute(stmt)
        user_count = result.scalar()
--- a/backend/danswer/db/connector.py
+++ b/backend/danswer/db/connector.py
@@ -1,5 +1,3 @@
-from datetime import datetime
-from datetime import timezone
 from typing import cast

 from sqlalchemy import and_
@@ -270,15 +268,3 @@ def create_initial_default_connector(db_session: Session) -> None:
    )
    db_session.add(connector)
    db_session.commit()
-
-
-def mark_ccpair_as_pruned(cc_pair_id: int, db_session: Session) -> None:
-    stmt = select(ConnectorCredentialPair).where(
-        ConnectorCredentialPair.id == cc_pair_id
-    )
-    cc_pair = db_session.scalar(stmt)
-    if cc_pair is None:
-        raise ValueError(f"No cc_pair with ID: {cc_pair_id}")
-
-    cc_pair.last_pruned = datetime.now(timezone.utc)
-    db_session.commit()
--- a/backend/danswer/db/connector_credential_pair.py
+++ b/backend/danswer/db/connector_credential_pair.py
@@ -26,7 +26,9 @@ from danswer.db.models import UserRole
 from danswer.server.models import StatusResponse
 from danswer.utils.logger import setup_logger
 from ee.danswer.db.external_perm import delete_user__ext_group_for_cc_pair__no_commit
-from ee.danswer.external_permissions.sync_params import check_if_valid_sync_source
+from ee.danswer.external_permissions.permission_sync_function_map import (
+    check_if_valid_sync_source,
+)

 logger = setup_logger()

@@ -390,7 +392,6 @@ def add_credential_to_connector(
    )
    db_session.add(association)
    db_session.flush()  # make sure the association has an id
-    db_session.refresh(association)

    if groups and access_type != AccessType.SYNC:
        _relate_groups_to_cc_pair__no_commit(
--- a/backend/danswer/db/document.py
+++ b/backend/danswer/db/document.py
@@ -104,18 +104,6 @@ def construct_document_select_for_connector_credential_pair(
    return stmt


-def get_document_ids_for_connector_credential_pair(
-    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None
-) -> list[str]:
-    doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
-        and_(
-            DocumentByConnectorCredentialPair.connector_id == connector_id,
-            DocumentByConnectorCredentialPair.credential_id == credential_id,
-        )
-    )
-    return list(db_session.execute(doc_ids_stmt).scalars().all())
-
-
 def get_documents_for_connector_credential_pair(
    db_session: Session, connector_id: int, credential_id: int, limit: int | None = None
 ) -> Sequence[DbDocument]:
@@ -132,8 +120,8 @@ def get_documents_for_connector_credential_pair(


 def get_documents_by_ids(
-    db_session: Session,
    document_ids: list[str],
+    db_session: Session,
 ) -> list[DbDocument]:
    stmt = select(DbDocument).where(DbDocument.id.in_(document_ids))
    documents = db_session.execute(stmt).scalars().all()
--- a/backend/danswer/db/engine.py
+++ b/backend/danswer/db/engine.py
@@ -1,18 +1,10 @@
 import contextlib
-import re
-import threading
 import time
 from collections.abc import AsyncGenerator
 from collections.abc import Generator
-from contextlib import asynccontextmanager
-from contextlib import contextmanager
 from datetime import datetime
-from typing import Any
 from typing import ContextManager

-import jwt
-from fastapi import HTTPException
-from fastapi import Request
 from sqlalchemy import event
 from sqlalchemy import text
 from sqlalchemy.engine import create_engine
@@ -25,9 +17,6 @@ from sqlalchemy.orm import sessionmaker

 from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
 from danswer.configs.app_configs import LOG_POSTGRES_LATENCY
-from danswer.configs.app_configs import MULTI_TENANT
-from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
-from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
 from danswer.configs.app_configs import POSTGRES_DB
 from danswer.configs.app_configs import POSTGRES_HOST
 from danswer.configs.app_configs import POSTGRES_PASSWORD
@@ -35,24 +24,27 @@ from danswer.configs.app_configs import POSTGRES_POOL_PRE_PING
 from danswer.configs.app_configs import POSTGRES_POOL_RECYCLE
 from danswer.configs.app_configs import POSTGRES_PORT
 from danswer.configs.app_configs import POSTGRES_USER
-from danswer.configs.app_configs import SECRET_JWT_KEY
-from danswer.configs.constants import POSTGRES_DEFAULT_SCHEMA
 from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME
 from danswer.utils.logger import setup_logger
-from shared_configs.configs import current_tenant_id

 logger = setup_logger()

 SYNC_DB_API = "psycopg2"
 ASYNC_DB_API = "asyncpg"

+POSTGRES_APP_NAME = (
+    POSTGRES_UNKNOWN_APP_NAME  # helps to diagnose open connections in postgres
+)
+
 # global so we don't create more than one engine per process
 # outside of being best practice, this is needed so we can properly pool
 # connections and not create a new pool on every request
-
+_SYNC_ENGINE: Engine | None = None
 _ASYNC_ENGINE: AsyncEngine | None = None
+
 SessionFactory: sessionmaker[Session] | None = None

+
 if LOG_POSTGRES_LATENCY:
    # Function to log before query execution
    @event.listens_for(Engine, "before_cursor_execute")
@@ -116,78 +108,6 @@ def get_db_current_time(db_session: Session) -> datetime:
    return result


-# Regular expression to validate schema names to prevent SQL injection
-SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$")
-
-
-def is_valid_schema_name(name: str) -> bool:
-    return SCHEMA_NAME_REGEX.match(name) is not None
-
-
-class SqlEngine:
-    """Class to manage a global SQLAlchemy engine (needed for proper resource control).
-    Will eventually subsume most of the standalone functions in this file.
-    Sync only for now.
-    """
-
-    _engine: Engine | None = None
-    _lock: threading.Lock = threading.Lock()
-    _app_name: str = POSTGRES_UNKNOWN_APP_NAME
-
-    # Default parameters for engine creation
-    DEFAULT_ENGINE_KWARGS = {
-        "pool_size": 20,
-        "max_overflow": 5,
-        "pool_pre_ping": POSTGRES_POOL_PRE_PING,
-        "pool_recycle": POSTGRES_POOL_RECYCLE,
-    }
-
-    def __init__(self) -> None:
-        pass
-
-    @classmethod
-    def _init_engine(cls, **engine_kwargs: Any) -> Engine:
-        """Private helper method to create and return an Engine."""
-        connection_string = build_connection_string(
-            db_api=SYNC_DB_API, app_name=cls._app_name + "_sync"
-        )
-        merged_kwargs = {**cls.DEFAULT_ENGINE_KWARGS, **engine_kwargs}
-        return create_engine(connection_string, **merged_kwargs)
-
-    @classmethod
-    def init_engine(cls, **engine_kwargs: Any) -> None:
-        """Allow the caller to init the engine with extra params. Different clients
-        such as the API server and different Celery workers and tasks
-        need different settings.
-        """
-        with cls._lock:
-            if not cls._engine:
-                cls._engine = cls._init_engine(**engine_kwargs)
-
-    @classmethod
-    def get_engine(cls) -> Engine:
-        """Gets the SQLAlchemy engine. Will init a default engine if init hasn't
-        already been called. You probably want to init first!
-        """
-        if not cls._engine:
-            with cls._lock:
-                if not cls._engine:
-                    cls._engine = cls._init_engine()
-        return cls._engine
-
-    @classmethod
-    def set_app_name(cls, app_name: str) -> None:
-        """Class method to set the app name."""
-        cls._app_name = app_name
-
-    @classmethod
-    def get_app_name(cls) -> str:
-        """Class method to get current app name."""
-        if not cls._app_name:
-            return ""
-        return cls._app_name
-
-
 def build_connection_string(
    *,
    db_api: str = ASYNC_DB_API,
@@ -200,187 +120,69 @@ def build_connection_string(
 ) -> str:
    if app_name:
        return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}"
+
    return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"


+def init_sqlalchemy_engine(app_name: str) -> None:
+    global POSTGRES_APP_NAME
+    POSTGRES_APP_NAME = app_name
+
+
 def get_sqlalchemy_engine() -> Engine:
-    return SqlEngine.get_engine()
+    global _SYNC_ENGINE
+    if _SYNC_ENGINE is None:
+        connection_string = build_connection_string(
+            db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync"
+        )
+        _SYNC_ENGINE = create_engine(
+            connection_string,
+            pool_size=5,
+            max_overflow=0,
+            pool_pre_ping=POSTGRES_POOL_PRE_PING,
+            pool_recycle=POSTGRES_POOL_RECYCLE,
+        )
+    return _SYNC_ENGINE


 def get_sqlalchemy_async_engine() -> AsyncEngine:
    global _ASYNC_ENGINE
    if _ASYNC_ENGINE is None:
-        # Underlying asyncpg cannot accept application_name directly in the connection string
+        # underlying asyncpg cannot accept application_name directly in the connection string
        # https://github.com/MagicStack/asyncpg/issues/798
        connection_string = build_connection_string()
        _ASYNC_ENGINE = create_async_engine(
            connection_string,
            connect_args={
-                "server_settings": {
-                    "application_name": SqlEngine.get_app_name() + "_async"
-                }
+                "server_settings": {"application_name": POSTGRES_APP_NAME + "_async"}
            },
-            # async engine is only used by API server, so we can use those values
-            # here as well
-            pool_size=POSTGRES_API_SERVER_POOL_SIZE,
-            max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW,
+            pool_size=5,
+            max_overflow=0,
            pool_pre_ping=POSTGRES_POOL_PRE_PING,
            pool_recycle=POSTGRES_POOL_RECYCLE,
        )
    return _ASYNC_ENGINE


-# Dependency to get the current tenant ID
-# If no token is present, uses the default schema for this use case
-def get_current_tenant_id(request: Request) -> str:
-    """Dependency that extracts the tenant ID from the JWT token in the request and sets the context variable."""
-    if not MULTI_TENANT:
-        tenant_id = POSTGRES_DEFAULT_SCHEMA
-        current_tenant_id.set(tenant_id)
-        return tenant_id
-
-    token = request.cookies.get("tenant_details")
-    if not token:
-        current_value = current_tenant_id.get()
-        # If no token is present, use the default schema or handle accordingly
-        return current_value
-
-    try:
-        payload = jwt.decode(token, SECRET_JWT_KEY, algorithms=["HS256"])
-        tenant_id = payload.get("tenant_id")
-        if not tenant_id:
-            return current_tenant_id.get()
-        if not is_valid_schema_name(tenant_id):
-            raise HTTPException(status_code=400, detail="Invalid tenant ID format")
-        current_tenant_id.set(tenant_id)
-
-        return tenant_id
-    except jwt.InvalidTokenError:
-        return current_tenant_id.get()
-    except Exception as e:
-        logger.error(f"Unexpected error in get_current_tenant_id: {str(e)}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@asynccontextmanager
-async def get_async_session_with_tenant(
-    tenant_id: str | None = None,
-) -> AsyncGenerator[AsyncSession, None]:
-    if tenant_id is None:
-        tenant_id = current_tenant_id.get()
-
-    if not is_valid_schema_name(tenant_id):
-        logger.error(f"Invalid tenant ID: {tenant_id}")
-        raise Exception("Invalid tenant ID")
-
-    engine = get_sqlalchemy_async_engine()
-    async_session_factory = sessionmaker(
-        bind=engine, expire_on_commit=False, class_=AsyncSession
-    )  # type: ignore
-
-    async with async_session_factory() as session:
-        try:
-            # Set the search_path to the tenant's schema
-            await session.execute(text(f'SET search_path = "{tenant_id}"'))
-        except Exception as e:
-            logger.error(f"Error setting search_path: {str(e)}")
-            # You can choose to re-raise the exception or handle it
-            # Here, we'll re-raise to prevent proceeding with an incorrect session
-            raise
-        else:
-            yield session
-
-
-@contextmanager
-def get_session_with_tenant(
-    tenant_id: str | None = None,
-) -> Generator[Session, None, None]:
-    """Generate a database session with the appropriate tenant schema set."""
-    engine = get_sqlalchemy_engine()
-    if tenant_id is None:
-        tenant_id = current_tenant_id.get()
-
-    if not is_valid_schema_name(tenant_id):
-        raise HTTPException(status_code=400, detail="Invalid tenant ID")
-
-    # Establish a raw connection without starting a transaction
-    with engine.connect() as connection:
-        # Access the raw DBAPI connection
-        dbapi_connection = connection.connection
-
-        # Execute SET search_path outside of any transaction
-        cursor = dbapi_connection.cursor()
-        try:
-            cursor.execute(f'SET search_path TO "{tenant_id}"')
-            # Optionally verify the search_path was set correctly
-            cursor.execute("SHOW search_path")
-            cursor.fetchone()
-        finally:
-            cursor.close()
-
-        # Proceed to create a session using the connection
-        with Session(bind=connection, expire_on_commit=False) as session:
-            try:
-                yield session
-            finally:
-                # Reset search_path to default after the session is used
-                if MULTI_TENANT:
-                    cursor = dbapi_connection.cursor()
-                    try:
-                        cursor.execute('SET search_path TO "$user", public')
-                    finally:
-                        cursor.close()
-
-
-def get_session_generator_with_tenant(
-    tenant_id: str | None = None,
-) -> Generator[Session, None, None]:
-    with get_session_with_tenant(tenant_id) as session:
-        yield session
+def get_session_context_manager() -> ContextManager[Session]:
+    return contextlib.contextmanager(get_session)()


 def get_session() -> Generator[Session, None, None]:
-    """Generate a database session with the appropriate tenant schema set."""
-    tenant_id = current_tenant_id.get()
-    if tenant_id == "public" and MULTI_TENANT:
-        raise HTTPException(status_code=401, detail="User must authenticate")
-
-    engine = get_sqlalchemy_engine()
-    with Session(engine, expire_on_commit=False) as session:
-        if MULTI_TENANT:
-            if not is_valid_schema_name(tenant_id):
-                raise HTTPException(status_code=400, detail="Invalid tenant ID")
-            # Set the search_path to the tenant's schema
-            session.execute(text(f'SET search_path = "{tenant_id}"'))
+    # The line below was added to monitor the latency caused by Postgres connections
+    # during API calls.
+    # with tracer.trace("db.get_session"):
+    with Session(get_sqlalchemy_engine(), expire_on_commit=False) as session:
        yield session


 async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
-    """Generate an async database session with the appropriate tenant schema set."""
-    tenant_id = current_tenant_id.get()
-    engine = get_sqlalchemy_async_engine()
-    async with AsyncSession(engine, expire_on_commit=False) as async_session:
-        if MULTI_TENANT:
-            if not is_valid_schema_name(tenant_id):
-                raise HTTPException(status_code=400, detail="Invalid tenant ID")
-            # Set the search_path to the tenant's schema
-            await async_session.execute(text(f'SET search_path = "{tenant_id}"'))
+    async with AsyncSession(
+        get_sqlalchemy_async_engine(), expire_on_commit=False
+    ) as async_session:
        yield async_session


-def get_session_context_manager() -> ContextManager[Session]:
-    """Context manager for database sessions."""
-    return contextlib.contextmanager(get_session_generator_with_tenant)()
-
-
-def get_session_factory() -> sessionmaker[Session]:
-    """Get a session factory."""
-    global SessionFactory
-    if SessionFactory is None:
-        SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
-    return SessionFactory
-
-
 async def warm_up_connections(
    sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20
 ) -> None:
@@ -402,3 +204,10 @@ async def warm_up_connections(
        await async_conn.execute(text("SELECT 1"))
    for async_conn in async_connections:
        await async_conn.close()
+
+
+def get_session_factory() -> sessionmaker[Session]:
+    global SessionFactory
+    if SessionFactory is None:
+        SessionFactory = sessionmaker(bind=get_sqlalchemy_engine())
+    return SessionFactory
--- a/backend/danswer/db/llm.py
+++ b/backend/danswer/db/llm.py
@@ -64,12 +64,19 @@ def upsert_cloud_embedding_provider(
 def upsert_llm_provider(
    llm_provider: LLMProviderUpsertRequest,
    db_session: Session,
+    is_creation: bool = True,
 ) -> FullLLMProvider:
    existing_llm_provider = db_session.scalar(
        select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name)
    )
+    if existing_llm_provider and is_creation:
+        raise ValueError(f"LLM Provider with name {llm_provider.name} already exists")

    if not existing_llm_provider:
+        if not is_creation:
+            raise ValueError(
+                f"LLM Provider with name {llm_provider.name} does not exist"
+            )
        existing_llm_provider = LLMProviderModel(name=llm_provider.name)
        db_session.add(existing_llm_provider)

@@ -83,7 +90,6 @@ def upsert_llm_provider(
    existing_llm_provider.model_names = llm_provider.model_names
    existing_llm_provider.is_public = llm_provider.is_public
    existing_llm_provider.display_model_names = llm_provider.display_model_names
-    existing_llm_provider.deployment_name = llm_provider.deployment_name

    if not existing_llm_provider.id:
        # If its not already in the db, we need to generate an ID by flushing
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -50,7 +50,7 @@ from danswer.db.enums import IndexingStatus
 from danswer.db.enums import IndexModelStatus
 from danswer.db.enums import TaskStatus
 from danswer.db.pydantic_type import PydanticType
-from danswer.key_value_store.interface import JSON_ro
+from danswer.dynamic_configs.interface import JSON_ro
 from danswer.file_store.models import FileDescriptor
 from danswer.llm.override_models import LLMOverride
 from danswer.llm.override_models import PromptOverride
@@ -414,12 +414,6 @@ class ConnectorCredentialPair(Base):
    last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column(
        DateTime(timezone=True), default=None
    )
-
-    # last successful prune
-    last_pruned: Mapped[datetime.datetime | None] = mapped_column(
-        DateTime(timezone=True), nullable=True, index=True
-    )
-
    total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0)

    connector: Mapped["Connector"] = relationship(
@@ -1143,8 +1137,6 @@ class LLMProvider(Base):
        postgresql.ARRAY(String), nullable=True
    )

-    deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)
-
    # should only be set for a single provider
    is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True)
    # EE only
@@ -1733,9 +1725,7 @@ class User__ExternalUserGroupId(Base):
    user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True)
    # These group ids have been prefixed by the source type
    external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True)
-    cc_pair_id: Mapped[int] = mapped_column(
-        ForeignKey("connector_credential_pair.id"), primary_key=True
-    )
+    cc_pair_id: Mapped[int] = mapped_column(ForeignKey("connector_credential_pair.id"))


 class UsageReport(Base):
@@ -1763,23 +1753,3 @@ class UsageReport(Base):

    requestor = relationship("User")
    file = relationship("PGFileStore")
-
-
-"""
-Multi-tenancy related tables
-"""
-
-
-class PublicBase(DeclarativeBase):
-    __abstract__ = True
-
-
-class UserTenantMapping(Base):
-    __tablename__ = "user_tenant_mapping"
-    __table_args__ = (
-        UniqueConstraint("email", "tenant_id", name="uq_user_tenant"),
-        {"schema": "public"},
-    )
-
-    email: Mapped[str] = mapped_column(String, nullable=False, primary_key=True)
-    tenant_id: Mapped[str] = mapped_column(String, nullable=False)
--- a/backend/danswer/db/swap_index.py
+++ b/backend/danswer/db/swap_index.py
@@ -11,7 +11,7 @@ from danswer.db.index_attempt import (
 from danswer.db.search_settings import get_current_search_settings
 from danswer.db.search_settings import get_secondary_search_settings
 from danswer.db.search_settings import update_search_settings_status
-from danswer.key_value_store.factory import get_kv_store
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.utils.logger import setup_logger

 logger = setup_logger()
@@ -54,7 +54,7 @@ def check_index_swap(db_session: Session) -> None:
        )

        if cc_pair_count > 0:
-            kv_store = get_kv_store()
+            kv_store = get_dynamic_config_store()
            kv_store.store(KV_REINDEX_KEY, False)

            # Expire jobs for the now past index/embedding model
--- a/backend/danswer/db/tag.py
+++ b/backend/danswer/db/tag.py
@@ -1,4 +1,3 @@
-from sqlalchemy import and_
 from sqlalchemy import delete
 from sqlalchemy import func
 from sqlalchemy import or_
@@ -108,14 +107,12 @@ def create_or_add_document_tag_list(
    return all_tags


-def find_tags(
+def get_tags_by_value_prefix_for_source_types(
    tag_key_prefix: str | None,
    tag_value_prefix: str | None,
    sources: list[DocumentSource] | None,
    limit: int | None,
    db_session: Session,
-    # if set, both tag_key_prefix and tag_value_prefix must be a match
-    require_both_to_match: bool = False,
 ) -> list[Tag]:
    query = select(Tag)

@@ -125,11 +122,7 @@ def find_tags(
            conditions.append(Tag.tag_key.ilike(f"{tag_key_prefix}%"))
        if tag_value_prefix:
            conditions.append(Tag.tag_value.ilike(f"{tag_value_prefix}%"))
-
-        final_prefix_condition = (
-            and_(*conditions) if require_both_to_match else or_(*conditions)
-        )
-        query = query.where(final_prefix_condition)
+        query = query.where(or_(*conditions))

    if sources:
        query = query.where(Tag.source.in_(sources))
--- a/backend/danswer/document_index/factory.py
+++ b/backend/danswer/document_index/factory.py
@@ -1,6 +1,3 @@
-from sqlalchemy.orm import Session
-
-from danswer.db.search_settings import get_current_search_settings
 from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.vespa.index import VespaIndex

@@ -16,14 +13,3 @@ def get_default_document_index(
    return VespaIndex(
        index_name=primary_index_name, secondary_index_name=secondary_index_name
    )
-
-
-def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
-    """
-    TODO: Use redis to cache this or something
-    """
-    search_settings = get_current_search_settings(db_session)
-    return get_default_document_index(
-        primary_index_name=search_settings.index_name,
-        secondary_index_name=None,
-    )
--- a/backend/danswer/document_index/interfaces.py
+++ b/backend/danswer/document_index/interfaces.py
@@ -55,21 +55,6 @@ class DocumentMetadata:
    from_ingestion_api: bool = False


-@dataclass
-class VespaDocumentFields:
-    """
-    Specifies fields in Vespa for a document.  Fields set to None will be ignored.
-    Perhaps we should name this in an implementation agnostic fashion, but it's more
-    understandable like this for now.
-    """
-
-    # all other fields except these 4 will always be left alone by the update request
-    access: DocumentAccess | None = None
-    document_sets: set[str] | None = None
-    boost: float | None = None
-    hidden: bool | None = None
-
-
@dataclass
 class UpdateRequest:
    """
@@ -171,16 +156,6 @@ class Deletable(abc.ABC):
    Class must implement the ability to delete document by their unique document ids.
    """

-    @abc.abstractmethod
-    def delete_single(self, doc_id: str) -> int:
-        """
-        Given a single document id, hard delete it from the document index
-
-        Parameters:
-        - doc_id: document id as specified by the connector
-        """
-        raise NotImplementedError
-
    @abc.abstractmethod
    def delete(self, doc_ids: list[str]) -> None:
        """
@@ -203,9 +178,11 @@ class Updatable(abc.ABC):
    """

    @abc.abstractmethod
-    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+    def update_single(self, update_request: UpdateRequest) -> None:
        """
-        Updates all chunks for a document with the specified fields.
+        Updates some set of chunks for a document. The document and fields to update
+        are specified in the update request. Each update request in the list applies
+        its changes to a list of document ids.
        None values mean that the field does not need an update.

        The rationale for a single update function is that it allows retries and parallelism
@@ -213,10 +190,14 @@ class Updatable(abc.ABC):
        us to individually handle error conditions per document.

        Parameters:
-        - fields: the fields to update in the document. Any field set to None will not be changed.
+        - update_request: for a list of document ids in the update request, apply the same updates
+                to all of the documents with those ids.

        Return:
-            None
+        - an HTTPStatus code. The code can used to decide whether to fail immediately,
+        retry, etc.  Although this method likely hits an HTTP API behind the
+        scenes, the usage of HTTPStatus is a convenience and the interface is not
+        actually HTTP specific.
        """
        raise NotImplementedError

--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@@ -1,6 +1,5 @@
 import concurrent.futures
 import io
-import logging
 import os
 import re
 import time
@@ -14,8 +13,6 @@ from typing import cast
 import httpx
 import requests

-from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
-from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT
 from danswer.configs.chat_configs import DOC_TIME_DECAY
 from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -25,7 +22,6 @@ from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.interfaces import DocumentInsertionRecord
 from danswer.document_index.interfaces import UpdateRequest
 from danswer.document_index.interfaces import VespaChunkRequest
-from danswer.document_index.interfaces import VespaDocumentFields
 from danswer.document_index.vespa.chunk_retrieval import batch_search_api_retrieval
 from danswer.document_index.vespa.chunk_retrieval import (
    get_all_vespa_ids_for_document_id,
@@ -62,8 +58,8 @@ from danswer.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
 from danswer.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT
 from danswer.document_index.vespa_constants import VESPA_TIMEOUT
 from danswer.document_index.vespa_constants import YQL_BASE
+from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.indexing.models import DocMetadataAwareIndexChunk
-from danswer.key_value_store.factory import get_kv_store
 from danswer.search.models import IndexFilters
 from danswer.search.models import InferenceChunkUncleaned
 from danswer.utils.batching import batch_generator
@@ -72,10 +68,6 @@ from shared_configs.model_server_models import Embedding

 logger = setup_logger()

-# Set the logging level to WARNING to ignore INFO and DEBUG logs
-httpx_logger = logging.getLogger("httpx")
-httpx_logger.setLevel(logging.WARNING)
-

@dataclass
 class _VespaUpdateRequest:
@@ -148,7 +140,7 @@ class VespaIndex(DocumentIndex):
            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
        )

-        kv_store = get_kv_store()
+        kv_store = get_dynamic_config_store()

        needs_reindexing = False
        try:
@@ -212,7 +204,7 @@ class VespaIndex(DocumentIndex):
        # indexing / updates / deletes since we have to make a large volume of requests.
        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
+            httpx.Client(http2=True) as http_client,
        ):
            # Check for existing documents, existing documents need to have all of their chunks deleted
            # prior to indexing as the document size (num chunks) may have shrunk
@@ -276,7 +268,7 @@ class VespaIndex(DocumentIndex):
        # indexing / updates / deletes since we have to make a large volume of requests.
        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
+            httpx.Client(http2=True) as http_client,
        ):
            for update_batch in batch_generator(updates, batch_size):
                future_to_document_id = {
@@ -385,89 +377,90 @@ class VespaIndex(DocumentIndex):
            time.monotonic() - update_start,
        )

-    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+    def update_single(self, update_request: UpdateRequest) -> None:
        """Note: if the document id does not exist, the update will be a no-op and the
        function will complete with no errors or exceptions.
        Handle other exceptions if you wish to implement retry behavior
        """
-
-        total_chunks_updated = 0
+        if len(update_request.document_ids) != 1:
+            raise ValueError("update_request must contain a single document id")

        # Handle Vespa character limitations
        # Mutating update_request but it's not used later anyway
-        normalized_doc_id = replace_invalid_doc_id_characters(doc_id)
+        update_request.document_ids = [
+            replace_invalid_doc_id_characters(doc_id)
+            for doc_id in update_request.document_ids
+        ]

-        # Build the _VespaUpdateRequest objects
-        update_dict: dict[str, dict] = {"fields": {}}
-        if fields.boost is not None:
-            update_dict["fields"][BOOST] = {"assign": fields.boost}
-        if fields.document_sets is not None:
-            update_dict["fields"][DOCUMENT_SETS] = {
-                "assign": {document_set: 1 for document_set in fields.document_sets}
-            }
-        if fields.access is not None:
-            update_dict["fields"][ACCESS_CONTROL_LIST] = {
-                "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
-            }
-        if fields.hidden is not None:
-            update_dict["fields"][HIDDEN] = {"assign": fields.hidden}
-
-        if not update_dict["fields"]:
-            logger.error("Update request received but nothing to update")
-            return 0
+        # update_start = time.monotonic()

+        # Fetch all chunks for each document ahead of time
        index_names = [self.index_name]
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)

-        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
-            for index_name in index_names:
-                params = httpx.QueryParams(
-                    {
-                        "selection": f"{index_name}.document_id=='{normalized_doc_id}'",
-                        "cluster": DOCUMENT_INDEX_NAME,
-                    }
+        chunk_id_start_time = time.monotonic()
+        all_doc_chunk_ids: list[str] = []
+        for index_name in index_names:
+            for document_id in update_request.document_ids:
+                # this calls vespa and can raise http exceptions
+                doc_chunk_ids = get_all_vespa_ids_for_document_id(
+                    document_id=document_id,
+                    index_name=index_name,
+                    filters=None,
+                    get_large_chunks=True,
+                )
+                all_doc_chunk_ids.extend(doc_chunk_ids)
+        logger.debug(
+            f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs"
+        )
+
+        # Build the _VespaUpdateRequest objects
+        update_dict: dict[str, dict] = {"fields": {}}
+        if update_request.boost is not None:
+            update_dict["fields"][BOOST] = {"assign": update_request.boost}
+        if update_request.document_sets is not None:
+            update_dict["fields"][DOCUMENT_SETS] = {
+                "assign": {
+                    document_set: 1 for document_set in update_request.document_sets
+                }
+            }
+        if update_request.access is not None:
+            update_dict["fields"][ACCESS_CONTROL_LIST] = {
+                "assign": {acl_entry: 1 for acl_entry in update_request.access.to_acl()}
+            }
+        if update_request.hidden is not None:
+            update_dict["fields"][HIDDEN] = {"assign": update_request.hidden}
+
+        if not update_dict["fields"]:
+            logger.error("Update request received but nothing to update")
+            return
+
+        processed_update_requests: list[_VespaUpdateRequest] = []
+        for document_id in update_request.document_ids:
+            for doc_chunk_id in all_doc_chunk_ids:
+                processed_update_requests.append(
+                    _VespaUpdateRequest(
+                        document_id=document_id,
+                        url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}",
+                        update_request=update_dict,
+                    )
                )

-                while True:
-                    try:
-                        resp = http_client.put(
-                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}",
-                            params=params,
-                            headers={"Content-Type": "application/json"},
-                            json=update_dict,
-                        )
-
-                        resp.raise_for_status()
-                    except httpx.HTTPStatusError as e:
-                        logger.error(
-                            f"Failed to update chunks, details: {e.response.text}"
-                        )
-                        raise
-
-                    resp_data = resp.json()
-
-                    if "documentCount" in resp_data:
-                        chunks_updated = resp_data["documentCount"]
-                        total_chunks_updated += chunks_updated
-
-                    # Check for continuation token to handle pagination
-                    if "continuation" not in resp_data:
-                        break  # Exit loop if no continuation token
-
-                    if not resp_data["continuation"]:
-                        break  # Exit loop if continuation token is empty
-
-                    params = params.set("continuation", resp_data["continuation"])
-
-                logger.debug(
-                    f"VespaIndex.update_single: "
-                    f"index={index_name} "
-                    f"doc={normalized_doc_id} "
-                    f"chunks_updated={total_chunks_updated}"
+        with httpx.Client(http2=True) as http_client:
+            for update in processed_update_requests:
+                http_client.put(
+                    update.url,
+                    headers={"Content-Type": "application/json"},
+                    json=update.update_request,
                )

-        return total_chunks_updated
+        # logger.debug(
+        #     "Finished updating Vespa documents in %.2f seconds",
+        #     time.monotonic() - update_start,
+        # )
+
+        return

    def delete(self, doc_ids: list[str]) -> None:
        logger.info(f"Deleting {len(doc_ids)} documents from Vespa")
@@ -476,7 +469,7 @@ class VespaIndex(DocumentIndex):

        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
        # indexing / updates / deletes since we have to make a large volume of requests.
-        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
+        with httpx.Client(http2=True) as http_client:
            index_names = [self.index_name]
            if self.secondary_index_name:
                index_names.append(self.secondary_index_name)
@@ -485,70 +478,6 @@ class VespaIndex(DocumentIndex):
                delete_vespa_docs(
                    document_ids=doc_ids, index_name=index_name, http_client=http_client
                )
-        return
-
-    def delete_single(self, doc_id: str) -> int:
-        """Possibly faster overall than the delete method due to using a single
-        delete call with a selection query."""
-
-        total_chunks_deleted = 0
-
-        # Vespa deletion is poorly documented ... luckily we found this
-        # https://docs.vespa.ai/en/operations/batch-delete.html#example
-
-        doc_id = replace_invalid_doc_id_characters(doc_id)
-
-        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
-        # indexing / updates / deletes since we have to make a large volume of requests.
-        index_names = [self.index_name]
-        if self.secondary_index_name:
-            index_names.append(self.secondary_index_name)
-
-        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
-            for index_name in index_names:
-                params = httpx.QueryParams(
-                    {
-                        "selection": f"{index_name}.document_id=='{doc_id}'",
-                        "cluster": DOCUMENT_INDEX_NAME,
-                    }
-                )
-
-                while True:
-                    try:
-                        resp = http_client.delete(
-                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}",
-                            params=params,
-                        )
-                        resp.raise_for_status()
-                    except httpx.HTTPStatusError as e:
-                        logger.error(
-                            f"Failed to delete chunk, details: {e.response.text}"
-                        )
-                        raise
-
-                    resp_data = resp.json()
-
-                    if "documentCount" in resp_data:
-                        chunks_deleted = resp_data["documentCount"]
-                        total_chunks_deleted += chunks_deleted
-
-                    # Check for continuation token to handle pagination
-                    if "continuation" not in resp_data:
-                        break  # Exit loop if no continuation token
-
-                    if not resp_data["continuation"]:
-                        break  # Exit loop if continuation token is empty
-
-                    params = params.set("continuation", resp_data["continuation"])
-
-                logger.debug(
-                    f"VespaIndex.delete_single: "
-                    f"index={index_name} "
-                    f"doc={doc_id} "
-                    f"chunks_deleted={total_chunks_deleted}"
-                )
-
-        return total_chunks_deleted

    def id_based_retrieval(
        self,
--- a/backend/danswer/connectors/xenforo/init.py
+++ b/backend/danswer/connectors/xenforo/init.py
--- a/backend/danswer/dynamic_configs/factory.py
+++ b/backend/danswer/dynamic_configs/factory.py
@@ -0,0 +1,15 @@
+from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE
+from danswer.dynamic_configs.interface import DynamicConfigStore
+from danswer.dynamic_configs.store import FileSystemBackedDynamicConfigStore
+from danswer.dynamic_configs.store import PostgresBackedDynamicConfigStore
+
+
+def get_dynamic_config_store() -> DynamicConfigStore:
+    dynamic_config_store_type = DYNAMIC_CONFIG_STORE
+    if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__:
+        raise NotImplementedError("File based config store no longer supported")
+    if dynamic_config_store_type == PostgresBackedDynamicConfigStore.__name__:
+        return PostgresBackedDynamicConfigStore()
+
+    # TODO: change exception type
+    raise Exception("Unknown dynamic config store type")
--- a/backend/danswer/dynamic_configs/interface.py
+++ b/backend/danswer/dynamic_configs/interface.py
@@ -9,11 +9,11 @@ JSON_ro: TypeAlias = (
 )


-class KvKeyNotFoundError(Exception):
+class ConfigNotFoundError(Exception):
    pass


-class KeyValueStore:
+class DynamicConfigStore:
    @abc.abstractmethod
    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
        raise NotImplementedError
--- a/backend/danswer/dynamic_configs/store.py
+++ b/backend/danswer/dynamic_configs/store.py
@@ -0,0 +1,102 @@
+import json
+import os
+from collections.abc import Iterator
+from contextlib import contextmanager
+from pathlib import Path
+from typing import cast
+
+from filelock import FileLock
+from sqlalchemy.orm import Session
+
+from danswer.db.engine import get_session_factory
+from danswer.db.models import KVStore
+from danswer.dynamic_configs.interface import ConfigNotFoundError
+from danswer.dynamic_configs.interface import DynamicConfigStore
+from danswer.dynamic_configs.interface import JSON_ro
+
+
+FILE_LOCK_TIMEOUT = 10
+
+
+def _get_file_lock(file_name: Path) -> FileLock:
+    return FileLock(file_name.with_suffix(".lock"))
+
+
+class FileSystemBackedDynamicConfigStore(DynamicConfigStore):
+    def __init__(self, dir_path: str) -> None:
+        # TODO (chris): maybe require all possible keys to be passed in
+        # at app start somehow to prevent key overlaps
+        self.dir_path = Path(dir_path)
+
+    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
+        file_path = self.dir_path / key
+        lock = _get_file_lock(file_path)
+        with lock.acquire(timeout=FILE_LOCK_TIMEOUT):
+            with open(file_path, "w+") as f:
+                json.dump(val, f)
+
+    def load(self, key: str) -> JSON_ro:
+        file_path = self.dir_path / key
+        if not file_path.exists():
+            raise ConfigNotFoundError
+        lock = _get_file_lock(file_path)
+        with lock.acquire(timeout=FILE_LOCK_TIMEOUT):
+            with open(self.dir_path / key) as f:
+                return cast(JSON_ro, json.load(f))
+
+    def delete(self, key: str) -> None:
+        file_path = self.dir_path / key
+        if not file_path.exists():
+            raise ConfigNotFoundError
+        lock = _get_file_lock(file_path)
+        with lock.acquire(timeout=FILE_LOCK_TIMEOUT):
+            os.remove(file_path)
+
+
+class PostgresBackedDynamicConfigStore(DynamicConfigStore):
+    @contextmanager
+    def get_session(self) -> Iterator[Session]:
+        factory = get_session_factory()
+        session: Session = factory()
+        try:
+            yield session
+        finally:
+            session.close()
+
+    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
+        # The actual encryption/decryption is done in Postgres, we just need to choose
+        # which field to set
+        encrypted_val = val if encrypt else None
+        plain_val = val if not encrypt else None
+        with self.get_session() as session:
+            obj = session.query(KVStore).filter_by(key=key).first()
+            if obj:
+                obj.value = plain_val
+                obj.encrypted_value = encrypted_val
+            else:
+                obj = KVStore(
+                    key=key, value=plain_val, encrypted_value=encrypted_val
+                )  # type: ignore
+                session.query(KVStore).filter_by(key=key).delete()  # just in case
+                session.add(obj)
+            session.commit()
+
+    def load(self, key: str) -> JSON_ro:
+        with self.get_session() as session:
+            obj = session.query(KVStore).filter_by(key=key).first()
+            if not obj:
+                raise ConfigNotFoundError
+
+            if obj.value is not None:
+                return cast(JSON_ro, obj.value)
+            if obj.encrypted_value is not None:
+                return cast(JSON_ro, obj.encrypted_value)
+
+            return None
+
+    def delete(self, key: str) -> None:
+        with self.get_session() as session:
+            result = session.query(KVStore).filter_by(key=key).delete()  # type: ignore
+            if result == 0:
+                raise ConfigNotFoundError
+            session.commit()
--- a/backend/danswer/file_processing/extract_file_text.py
+++ b/backend/danswer/file_processing/extract_file_text.py
@@ -20,8 +20,6 @@ from pypdf.errors import PdfStreamError

 from danswer.configs.constants import DANSWER_METADATA_FILENAME
 from danswer.file_processing.html_utils import parse_html_page_basic
-from danswer.file_processing.unstructured import get_unstructured_api_key
-from danswer.file_processing.unstructured import unstructured_to_text
 from danswer.utils.logger import setup_logger

 logger = setup_logger()
@@ -333,10 +331,9 @@ def file_io_to_text(file: IO[Any]) -> str:


 def extract_file_text(
+    file_name: str | None,
    file: IO[Any],
-    file_name: str,
    break_on_unprocessable: bool = True,
-    extension: str | None = None,
 ) -> str:
    extension_to_function: dict[str, Callable[[IO[Any]], str]] = {
        ".pdf": pdf_to_text,
@@ -348,29 +345,22 @@ def extract_file_text(
        ".html": parse_html_page_basic,
    }

-    try:
-        if get_unstructured_api_key():
-            return unstructured_to_text(file, file_name)
+    def _process_file() -> str:
+        if file_name:
+            extension = get_file_ext(file_name)
+            if check_file_ext_is_valid(extension):
+                return extension_to_function.get(extension, file_io_to_text)(file)

-        if file_name or extension:
-            if extension is not None:
-                final_extension = extension
-            elif file_name is not None:
-                final_extension = get_file_ext(file_name)
-
-            if check_file_ext_is_valid(final_extension):
-                return extension_to_function.get(final_extension, file_io_to_text)(file)
-
-        # Either the file somehow has no name or the extension is not one that we recognize
+        # Either the file somehow has no name or the extension is not one that we are familiar with
        if is_text_file(file):
            return file_io_to_text(file)

        raise ValueError("Unknown file extension and unknown text encoding")

+    try:
+        return _process_file()
    except Exception as e:
        if break_on_unprocessable:
-            raise RuntimeError(
-                f"Failed to process file {file_name or 'Unknown'}: {str(e)}"
-            ) from e
-        logger.warning(f"Failed to process file {file_name or 'Unknown'}: {str(e)}")
+            raise RuntimeError(f"Failed to process file: {str(e)}") from e
+        logger.warning(f"Failed to process file: {str(e)}")
        return ""
--- a/backend/danswer/file_processing/unstructured.py
+++ b/backend/danswer/file_processing/unstructured.py
@@ -1,67 +0,0 @@
-from typing import Any
-from typing import cast
-from typing import IO
-
-from unstructured.staging.base import dict_to_elements
-from unstructured_client import UnstructuredClient  # type: ignore
-from unstructured_client.models import operations  # type: ignore
-from unstructured_client.models import shared
-
-from danswer.configs.constants import KV_UNSTRUCTURED_API_KEY
-from danswer.key_value_store.factory import get_kv_store
-from danswer.key_value_store.interface import KvKeyNotFoundError
-from danswer.utils.logger import setup_logger
-
-
-logger = setup_logger()
-
-
-def get_unstructured_api_key() -> str | None:
-    kv_store = get_kv_store()
-    try:
-        return cast(str, kv_store.load(KV_UNSTRUCTURED_API_KEY))
-    except KvKeyNotFoundError:
-        return None
-
-
-def update_unstructured_api_key(api_key: str) -> None:
-    kv_store = get_kv_store()
-    kv_store.store(KV_UNSTRUCTURED_API_KEY, api_key)
-
-
-def delete_unstructured_api_key() -> None:
-    kv_store = get_kv_store()
-    kv_store.delete(KV_UNSTRUCTURED_API_KEY)
-
-
-def _sdk_partition_request(
-    file: IO[Any], file_name: str, **kwargs: Any
-) -> operations.PartitionRequest:
-    try:
-        request = operations.PartitionRequest(
-            partition_parameters=shared.PartitionParameters(
-                files=shared.Files(content=file.read(), file_name=file_name),
-                **kwargs,
-            ),
-        )
-        return request
-    except Exception as e:
-        logger.error(f"Error creating partition request for file {file_name}: {str(e)}")
-        raise
-
-
-def unstructured_to_text(file: IO[Any], file_name: str) -> str:
-    logger.debug(f"Starting to read file: {file_name}")
-    req = _sdk_partition_request(file, file_name, strategy="auto")
-
-    unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())
-
-    response = unstructured_client.general.partition(req)  # type: ignore
-    elements = dict_to_elements(response.elements)
-
-    if response.status_code != 200:
-        err = f"Received unexpected status code {response.status_code} from Unstructured API."
-        logger.error(err)
-        raise ValueError(err)
-
-    return "\n\n".join(str(el) for el in elements)
--- a/backend/danswer/indexing/chunker.py
+++ b/backend/danswer/indexing/chunker.py
@@ -10,7 +10,6 @@ from danswer.connectors.cross_connector_utils.miscellaneous_utils import (
    get_metadata_keys_to_ignore,
 )
 from danswer.connectors.models import Document
-from danswer.indexing.indexing_heartbeat import Heartbeat
 from danswer.indexing.models import DocAwareChunk
 from danswer.natural_language_processing.utils import BaseTokenizer
 from danswer.utils.logger import setup_logger
@@ -27,7 +26,6 @@ CHUNK_OVERLAP = 0
 MAX_METADATA_PERCENTAGE = 0.25
 CHUNK_MIN_CONTENT = 256

-
 logger = setup_logger()


@@ -125,7 +123,6 @@ class Chunker:
        chunk_token_limit: int = DOC_EMBEDDING_CONTEXT_SIZE,
        chunk_overlap: int = CHUNK_OVERLAP,
        mini_chunk_size: int = MINI_CHUNK_SIZE,
-        heartbeat: Heartbeat | None = None,
    ) -> None:
        from llama_index.text_splitter import SentenceSplitter

@@ -134,7 +131,6 @@ class Chunker:
        self.enable_multipass = enable_multipass
        self.enable_large_chunks = enable_large_chunks
        self.tokenizer = tokenizer
-        self.heartbeat = heartbeat

        self.blurb_splitter = SentenceSplitter(
            tokenizer=tokenizer.tokenize,
@@ -259,7 +255,7 @@ class Chunker:
        # If the chunk does not have any useable content, it will not be indexed
        return chunks

-    def _handle_single_document(self, document: Document) -> list[DocAwareChunk]:
+    def chunk(self, document: Document) -> list[DocAwareChunk]:
        # Specifically for reproducing an issue with gmail
        if document.source == DocumentSource.GMAIL:
            logger.debug(f"Chunking {document.semantic_identifier}")
@@ -306,13 +302,3 @@ class Chunker:
            normal_chunks.extend(large_chunks)

        return normal_chunks
-
-    def chunk(self, documents: list[Document]) -> list[DocAwareChunk]:
-        final_chunks: list[DocAwareChunk] = []
-        for document in documents:
-            final_chunks.extend(self._handle_single_document(document))
-
-            if self.heartbeat:
-                self.heartbeat.heartbeat()
-
-        return final_chunks
--- a/backend/danswer/indexing/embedder.py
+++ b/backend/danswer/indexing/embedder.py
@@ -1,8 +1,12 @@
 from abc import ABC
 from abc import abstractmethod

+from sqlalchemy.orm import Session
+
+from danswer.db.models import IndexModelStatus
 from danswer.db.models import SearchSettings
-from danswer.indexing.indexing_heartbeat import Heartbeat
+from danswer.db.search_settings import get_current_search_settings
+from danswer.db.search_settings import get_secondary_search_settings
 from danswer.indexing.models import ChunkEmbedding
 from danswer.indexing.models import DocAwareChunk
 from danswer.indexing.models import IndexChunk
@@ -20,9 +24,6 @@ logger = setup_logger()


 class IndexingEmbedder(ABC):
-    """Converts chunks into chunks with embeddings. Note that one chunk may have
-    multiple embeddings associated with it."""
-
    def __init__(
        self,
        model_name: str,
@@ -32,7 +33,6 @@ class IndexingEmbedder(ABC):
        provider_type: EmbeddingProvider | None,
        api_key: str | None,
        api_url: str | None,
-        heartbeat: Heartbeat | None,
    ):
        self.model_name = model_name
        self.normalize = normalize
@@ -54,7 +54,6 @@ class IndexingEmbedder(ABC):
            server_host=INDEXING_MODEL_SERVER_HOST,
            server_port=INDEXING_MODEL_SERVER_PORT,
            retrim_content=True,
-            heartbeat=heartbeat,
        )

    @abstractmethod
@@ -75,7 +74,6 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
        provider_type: EmbeddingProvider | None = None,
        api_key: str | None = None,
        api_url: str | None = None,
-        heartbeat: Heartbeat | None = None,
    ):
        super().__init__(
            model_name,
@@ -85,7 +83,6 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
            provider_type,
            api_key,
            api_url,
-            heartbeat,
        )

    @log_function_time()
@@ -169,7 +166,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
                    title_embed_dict[title] = title_embedding

            new_embedded_chunk = IndexChunk(
-                **chunk.model_dump(),
+                **chunk.dict(),
                embeddings=ChunkEmbedding(
                    full_embedding=chunk_embeddings[0],
                    mini_chunk_embeddings=chunk_embeddings[1:],
@@ -183,7 +180,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):

    @classmethod
    def from_db_search_settings(
-        cls, search_settings: SearchSettings, heartbeat: Heartbeat | None = None
+        cls, search_settings: SearchSettings
    ) -> "DefaultIndexingEmbedder":
        return cls(
            model_name=search_settings.model_name,
@@ -193,5 +190,28 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
            provider_type=search_settings.provider_type,
            api_key=search_settings.api_key,
            api_url=search_settings.api_url,
-            heartbeat=heartbeat,
        )
+
+
+def get_embedding_model_from_search_settings(
+    db_session: Session, index_model_status: IndexModelStatus = IndexModelStatus.PRESENT
+) -> IndexingEmbedder:
+    search_settings: SearchSettings | None
+    if index_model_status == IndexModelStatus.PRESENT:
+        search_settings = get_current_search_settings(db_session)
+    elif index_model_status == IndexModelStatus.FUTURE:
+        search_settings = get_secondary_search_settings(db_session)
+        if not search_settings:
+            raise RuntimeError("No secondary index configured")
+    else:
+        raise RuntimeError("Not supporting embedding model rollbacks")
+
+    return DefaultIndexingEmbedder(
+        model_name=search_settings.model_name,
+        normalize=search_settings.normalize,
+        query_prefix=search_settings.query_prefix,
+        passage_prefix=search_settings.passage_prefix,
+        provider_type=search_settings.provider_type,
+        api_key=search_settings.api_key,
+        api_url=search_settings.api_url,
+    )
--- a/backend/danswer/indexing/indexing_heartbeat.py
+++ b/backend/danswer/indexing/indexing_heartbeat.py
@@ -1,41 +0,0 @@
-import abc
-from typing import Any
-
-from sqlalchemy import func
-from sqlalchemy.orm import Session
-
-from danswer.db.index_attempt import get_index_attempt
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class Heartbeat(abc.ABC):
-    """Useful for any long-running work that goes through a bunch of items
-    and needs to occasionally give updates on progress.
-    e.g. chunking, embedding, updating vespa, etc."""
-
-    @abc.abstractmethod
-    def heartbeat(self, metadata: Any = None) -> None:
-        raise NotImplementedError
-
-
-class IndexingHeartbeat(Heartbeat):
-    def __init__(self, index_attempt_id: int, db_session: Session, freq: int):
-        self.cnt = 0
-
-        self.index_attempt_id = index_attempt_id
-        self.db_session = db_session
-        self.freq = freq
-
-    def heartbeat(self, metadata: Any = None) -> None:
-        self.cnt += 1
-        if self.cnt % self.freq == 0:
-            index_attempt = get_index_attempt(
-                db_session=self.db_session, index_attempt_id=self.index_attempt_id
-            )
-            if index_attempt:
-                index_attempt.time_updated = func.now()
-                self.db_session.commit()
-            else:
-                logger.error("Index attempt not found, this should not happen!")
--- a/backend/danswer/indexing/indexing_pipeline.py
+++ b/backend/danswer/indexing/indexing_pipeline.py
@@ -31,7 +31,6 @@ from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.interfaces import DocumentMetadata
 from danswer.indexing.chunker import Chunker
 from danswer.indexing.embedder import IndexingEmbedder
-from danswer.indexing.indexing_heartbeat import IndexingHeartbeat
 from danswer.indexing.models import DocAwareChunk
 from danswer.indexing.models import DocMetadataAwareIndexChunk
 from danswer.utils.logger import setup_logger
@@ -137,7 +136,6 @@ def index_doc_batch_with_handler(
    attempt_id: int | None,
    db_session: Session,
    ignore_time_skip: bool = False,
-    tenant_id: str | None = None,
 ) -> tuple[int, int]:
    r = (0, 0)
    try:
@@ -149,7 +147,6 @@ def index_doc_batch_with_handler(
            index_attempt_metadata=index_attempt_metadata,
            db_session=db_session,
            ignore_time_skip=ignore_time_skip,
-            tenant_id=tenant_id,
        )
    except Exception as e:
        if INDEXING_EXCEPTION_LIMIT == 0:
@@ -223,8 +220,8 @@ def index_doc_batch_prepare(

    document_ids = [document.id for document in documents]
    db_docs: list[DBDocument] = get_documents_by_ids(
-        db_session=db_session,
        document_ids=document_ids,
+        db_session=db_session,
    )

    # Skip indexing docs that don't have a newer updated at
@@ -263,7 +260,6 @@ def index_doc_batch(
    index_attempt_metadata: IndexAttemptMetadata,
    db_session: Session,
    ignore_time_skip: bool = False,
-    tenant_id: str | None = None,
 ) -> tuple[int, int]:
    """Takes different pieces of the indexing pipeline and applies it to a batch of documents
    Note that the documents should already be batched at this point so that it does not inflate the
@@ -287,10 +283,18 @@ def index_doc_batch(
        return 0, 0

    logger.debug("Starting chunking")
-    chunks: list[DocAwareChunk] = chunker.chunk(ctx.updatable_docs)
+    chunks: list[DocAwareChunk] = []
+    for document in ctx.updatable_docs:
+        chunks.extend(chunker.chunk(document=document))

    logger.debug("Starting embedding")
-    chunks_with_embeddings = embedder.embed_chunks(chunks) if chunks else []
+    chunks_with_embeddings = (
+        embedder.embed_chunks(
+            chunks=chunks,
+        )
+        if chunks
+        else []
+    )

    updatable_ids = [doc.id for doc in ctx.updatable_docs]

@@ -327,7 +331,6 @@ def index_doc_batch(
                    if chunk.source_document.id in ctx.id_to_db_doc_map
                    else DEFAULT_BOOST
                ),
-                tenant_id=tenant_id,
            )
            for chunk in chunks_with_embeddings
        ]
@@ -377,7 +380,6 @@ def build_indexing_pipeline(
    chunker: Chunker | None = None,
    ignore_time_skip: bool = False,
    attempt_id: int | None = None,
-    tenant_id: str | None = None,
 ) -> IndexingPipelineProtocol:
    """Builds a pipeline which takes in a list (batch) of docs and indexes them."""
    search_settings = get_current_search_settings(db_session)
@@ -404,13 +406,6 @@ def build_indexing_pipeline(
        tokenizer=embedder.embedding_model.tokenizer,
        enable_multipass=multipass,
        enable_large_chunks=enable_large_chunks,
-        # after every doc, update status in case there are a bunch of
-        # really long docs
-        heartbeat=IndexingHeartbeat(
-            index_attempt_id=attempt_id, db_session=db_session, freq=1
-        )
-        if attempt_id
-        else None,
    )

    return partial(
@@ -421,5 +416,4 @@ def build_indexing_pipeline(
        ignore_time_skip=ignore_time_skip,
        attempt_id=attempt_id,
        db_session=db_session,
-        tenant_id=tenant_id,
    )
--- a/backend/danswer/indexing/models.py
+++ b/backend/danswer/indexing/models.py
@@ -75,7 +75,6 @@ class DocMetadataAwareIndexChunk(IndexChunk):
           negative -> ranked lower.
    """

-    tenant_id: str | None = None
    access: "DocumentAccess"
    document_sets: set[str]
    boost: int
@@ -87,7 +86,6 @@ class DocMetadataAwareIndexChunk(IndexChunk):
        access: "DocumentAccess",
        document_sets: set[str],
        boost: int,
-        tenant_id: str | None,
    ) -> "DocMetadataAwareIndexChunk":
        index_chunk_data = index_chunk.model_dump()
        return cls(
@@ -95,7 +93,6 @@ class DocMetadataAwareIndexChunk(IndexChunk):
            access=access,
            document_sets=document_sets,
            boost=boost,
-            tenant_id=tenant_id,
        )


--- a/backend/danswer/key_value_store/init.py
+++ b/backend/danswer/key_value_store/init.py
--- a/backend/danswer/key_value_store/factory.py
+++ b/backend/danswer/key_value_store/factory.py
@@ -1,7 +0,0 @@
-from danswer.key_value_store.interface import KeyValueStore
-from danswer.key_value_store.store import PgRedisKVStore
-
-
-def get_kv_store() -> KeyValueStore:
-    # this is the only one supported currently
-    return PgRedisKVStore()
--- a/backend/danswer/key_value_store/store.py
+++ b/backend/danswer/key_value_store/store.py
@@ -1,112 +0,0 @@
-import json
-from collections.abc import Iterator
-from contextlib import contextmanager
-from typing import cast
-
-from fastapi import HTTPException
-from sqlalchemy import text
-from sqlalchemy.orm import Session
-
-from danswer.configs.app_configs import MULTI_TENANT
-from danswer.db.engine import get_sqlalchemy_engine
-from danswer.db.engine import is_valid_schema_name
-from danswer.db.models import KVStore
-from danswer.key_value_store.interface import JSON_ro
-from danswer.key_value_store.interface import KeyValueStore
-from danswer.key_value_store.interface import KvKeyNotFoundError
-from danswer.redis.redis_pool import get_redis_client
-from danswer.utils.logger import setup_logger
-from shared_configs.configs import current_tenant_id
-
-
-logger = setup_logger()
-
-
-REDIS_KEY_PREFIX = "danswer_kv_store:"
-KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24  # 1 Day
-
-
-class PgRedisKVStore(KeyValueStore):
-    def __init__(self) -> None:
-        self.redis_client = get_redis_client()
-
-    @contextmanager
-    def get_session(self) -> Iterator[Session]:
-        engine = get_sqlalchemy_engine()
-        with Session(engine, expire_on_commit=False) as session:
-            if MULTI_TENANT:
-                tenant_id = current_tenant_id.get()
-                if tenant_id == "public":
-                    raise HTTPException(
-                        status_code=401, detail="User must authenticate"
-                    )
-                if not is_valid_schema_name(tenant_id):
-                    raise HTTPException(status_code=400, detail="Invalid tenant ID")
-                # Set the search_path to the tenant's schema
-                session.execute(text(f'SET search_path = "{tenant_id}"'))
-            yield session
-
-    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
-        # Not encrypted in Redis, but encrypted in Postgres
-        try:
-            self.redis_client.set(
-                REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION
-            )
-        except Exception as e:
-            # Fallback gracefully to Postgres if Redis fails
-            logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
-
-        encrypted_val = val if encrypt else None
-        plain_val = val if not encrypt else None
-        with self.get_session() as session:
-            obj = session.query(KVStore).filter_by(key=key).first()
-            if obj:
-                obj.value = plain_val
-                obj.encrypted_value = encrypted_val
-            else:
-                obj = KVStore(
-                    key=key, value=plain_val, encrypted_value=encrypted_val
-                )  # type: ignore
-                session.query(KVStore).filter_by(key=key).delete()  # just in case
-                session.add(obj)
-            session.commit()
-
-    def load(self, key: str) -> JSON_ro:
-        try:
-            redis_value = self.redis_client.get(REDIS_KEY_PREFIX + key)
-            if redis_value:
-                assert isinstance(redis_value, bytes)
-                return json.loads(redis_value.decode("utf-8"))
-        except Exception as e:
-            logger.error(f"Failed to get value from Redis for key '{key}': {str(e)}")
-
-        with self.get_session() as session:
-            obj = session.query(KVStore).filter_by(key=key).first()
-            if not obj:
-                raise KvKeyNotFoundError
-
-            if obj.value is not None:
-                value = obj.value
-            elif obj.encrypted_value is not None:
-                value = obj.encrypted_value
-            else:
-                value = None
-
-            try:
-                self.redis_client.set(REDIS_KEY_PREFIX + key, json.dumps(value))
-            except Exception as e:
-                logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}")
-
-            return cast(JSON_ro, value)
-
-    def delete(self, key: str) -> None:
-        try:
-            self.redis_client.delete(REDIS_KEY_PREFIX + key)
-        except Exception as e:
-            logger.error(f"Failed to delete value from Redis for key '{key}': {str(e)}")
-
-        with self.get_session() as session:
-            result = session.query(KVStore).filter_by(key=key).delete()  # type: ignore
-            if result == 0:
-                raise KvKeyNotFoundError
-            session.commit()
--- a/backend/danswer/llm/answering/answer.py
+++ b/backend/danswer/llm/answering/answer.py
@@ -1,4 +1,3 @@
-import itertools
 from collections.abc import Callable
 from collections.abc import Iterator
 from typing import Any
@@ -311,15 +310,13 @@ class Answer:
                    )
                )
            yield tool_runner.tool_final_result()
-            if not self.skip_gen_ai_answer_generation:
-                prompt = prompt_builder.build(tool_call_summary=tool_call_summary)

-                yield from self._process_llm_stream(
-                    prompt=prompt,
-                    # as of now, we don't support multiple tool calls in sequence, which is why
-                    # we don't need to pass this in here
-                    # tools=[tool.tool_definition() for tool in self.tools],
-                )
+            prompt = prompt_builder.build(tool_call_summary=tool_call_summary)
+
+            yield from self._process_llm_stream(
+                prompt=prompt,
+                tools=[tool.tool_definition() for tool in self.tools],
+            )

            return

@@ -415,10 +412,6 @@ class Answer:
            logger.notice(f"Chosen tool: {chosen_tool_and_args}")

        if not chosen_tool_and_args:
-            if self.skip_gen_ai_answer_generation:
-                raise ValueError(
-                    "skip_gen_ai_answer_generation is True, but no tool was chosen; no answer will be generated"
-                )
            prompt_builder.update_system_prompt(
                default_build_system_message(self.prompt_config)
            )
@@ -483,10 +476,10 @@ class Answer:
        final = tool_runner.tool_final_result()

        yield final
-        if not self.skip_gen_ai_answer_generation:
-            prompt = prompt_builder.build()

-            yield from self._process_llm_stream(prompt=prompt, tools=None)
+        prompt = prompt_builder.build()
+
+        yield from self._process_llm_stream(prompt=prompt, tools=None)

    @property
    def processed_streamed_output(self) -> AnswerStream:
@@ -561,7 +554,8 @@ class Answer:

                def _stream() -> Iterator[str]:
                    nonlocal stream_stop_info
-                    for item in itertools.chain([message], stream):
+                    yield cast(str, message)
+                    for item in stream:
                        if isinstance(item, StreamStopInfo):
                            stream_stop_info = item
                            return
--- a/backend/danswer/llm/answering/prompts/citations_prompt.py
+++ b/backend/danswer/llm/answering/prompts/citations_prompt.py
@@ -18,7 +18,6 @@ from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
 from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
 from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
 from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT_FOR_TOOL_CALLING
-from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
 from danswer.prompts.prompt_utils import add_date_time_to_prompt
 from danswer.prompts.prompt_utils import build_complete_context_str
 from danswer.prompts.prompt_utils import build_task_prompt_reminders
@@ -144,12 +143,6 @@ def build_citations_user_message(
        prompt=prompt_config, use_language_hint=bool(multilingual_expansion)
    )

-    history_block = (
-        HISTORY_BLOCK.format(history_str=history_message) + "\n"
-        if history_message
-        else ""
-    )
-
    if context_docs:
        context_docs_str = build_complete_context_str(context_docs)
        optional_ignore = "" if all_doc_useful else DEFAULT_IGNORE_STATEMENT
@@ -159,14 +152,14 @@ def build_citations_user_message(
            context_docs_str=context_docs_str,
            task_prompt=task_prompt_with_reminder,
            user_query=question,
-            history_block=history_block,
+            history_block=history_message,
        )
    else:
        # if no context docs provided, assume we're in the tool calling flow
        user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format(
            task_prompt=task_prompt_with_reminder,
            user_query=question,
-            history_block=history_block,
+            history_block=history_message,
        )

    user_prompt = user_prompt.strip()
--- a/backend/danswer/llm/chat_llm.py
+++ b/backend/danswer/llm/chat_llm.py
@@ -204,7 +204,6 @@ class DefaultMultiLLM(LLM):
        model_name: str,
        api_base: str | None = None,
        api_version: str | None = None,
-        deployment_name: str | None = None,
        max_output_tokens: int | None = None,
        custom_llm_provider: str | None = None,
        temperature: float = GEN_AI_TEMPERATURE,
@@ -216,7 +215,6 @@ class DefaultMultiLLM(LLM):
        self._model_version = model_name
        self._temperature = temperature
        self._api_key = api_key
-        self._deployment_name = deployment_name
        self._api_base = api_base
        self._api_version = api_version
        self._custom_llm_provider = custom_llm_provider
@@ -285,20 +283,17 @@ class DefaultMultiLLM(LLM):
                _convert_message_to_dict(msg) if isinstance(msg, BaseMessage) else msg
                for msg in prompt
            ]
-
        elif isinstance(prompt, str):
            prompt = [_convert_message_to_dict(HumanMessage(content=prompt))]

        try:
            return litellm.completion(
                # model choice
-                model=f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
-                # NOTE: have to pass in None instead of empty string for these
-                # otherwise litellm can have some issues with bedrock
-                api_key=self._api_key or None,
-                base_url=self._api_base or None,
-                api_version=self._api_version or None,
-                custom_llm_provider=self._custom_llm_provider or None,
+                model=f"{self.config.model_provider}/{self.config.model_name}",
+                api_key=self._api_key,
+                base_url=self._api_base,
+                api_version=self._api_version,
+                custom_llm_provider=self._custom_llm_provider,
                # actual input
                messages=prompt,
                tools=tools,
@@ -327,7 +322,6 @@ class DefaultMultiLLM(LLM):
            api_key=self._api_key,
            api_base=self._api_base,
            api_version=self._api_version,
-            deployment_name=self._deployment_name,
        )

    def _invoke_implementation(
--- a/backend/danswer/llm/factory.py
+++ b/backend/danswer/llm/factory.py
@@ -88,7 +88,6 @@ def get_default_llms(
        return get_llm(
            provider=llm_provider.provider,
            model=model,
-            deployment_name=llm_provider.deployment_name,
            api_key=llm_provider.api_key,
            api_base=llm_provider.api_base,
            api_version=llm_provider.api_version,
@@ -104,7 +103,6 @@ def get_default_llms(
 def get_llm(
    provider: str,
    model: str,
-    deployment_name: str | None = None,
    api_key: str | None = None,
    api_base: str | None = None,
    api_version: str | None = None,
--- a/backend/danswer/llm/interfaces.py
+++ b/backend/danswer/llm/interfaces.py
@@ -24,7 +24,7 @@ class LLMConfig(BaseModel):
    api_key: str | None = None
    api_base: str | None = None
    api_version: str | None = None
-    deployment_name: str | None = None
+
    # This disables the "model_" protected namespace for pydantic
    model_config = {"protected_namespaces": ()}

--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`backend/tests/integration/tests/pruning/website`