remove checkmark

various improvements
update file picker
2026-02-17 15:55:45 +00:00 · 2025-01-27 19:12:08 -08:00 · 2025-01-27 19:12:08 -08:00 · 2025-01-27 19:12:08 -08:00 · 2025-01-27 19:12:08 -08:00 · 2025-01-27 19:12:08 -08:00
1292 changed files with 69543 additions and 29946 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,29 +1,14 @@
 ## Description
+
 [Provide a brief description of the changes in this PR]

-
 ## How Has This Been Tested?
+
 [Describe the tests you ran to verify your changes]

-
-## Accepted Risk (provide if relevant)
-N/A
-
-
-## Related Issue(s) (provide if relevant)
-N/A
-
-
-## Mental Checklist:
- All of the automated tests pass
- All PR comments are addressed and marked resolved
- If there are migrations, they have been rebased to latest main
- If there are new dependencies, they are added to the requirements
- If there are new environment variables, they are added to all of the deployment methods
- If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS
- Docker images build and basic functionalities work
- Author has done a final read through of the PR right before merge
-
 ## Backporting (check the box to trigger backport action)
+
 Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
+
 - [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
+- [ ] [Optional] Override Linear Check
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -6,7 +6,7 @@ on:
      - "*"

 env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-backend-cloud' || 'danswer/danswer-backend' }}
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}

 jobs:
@@ -44,7 +44,7 @@ jobs:
            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
            ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
          build-args: |
-            DANSWER_VERSION=${{ github.ref_name }}
+            ONYX_VERSION=${{ github.ref_name }}

      # trivy has their own rate limiting issues causing this action to flake
      # we worked around it by hardcoding to different db repos in env
@@ -57,7 +57,7 @@ jobs:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
        with:
-          # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
+          # To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: "CRITICAL,HIGH"
          trivyignores: ./backend/.trivyignore
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -7,7 +7,7 @@ on:
      - "*"

 env:
-  REGISTRY_IMAGE: danswer/danswer-web-server-cloud
+  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}

 jobs:
@@ -60,12 +60,14 @@ jobs:
          platforms: ${{ matrix.platform }}
          push: true
          build-args: |
-            DANSWER_VERSION=${{ github.ref_name }}
+            ONYX_VERSION=${{ github.ref_name }}
            NEXT_PUBLIC_CLOUD_ENABLED=true
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NODE_OPTIONS=--max-old-space-size=8192
          # needed due to weird interactions with the builds for different platforms
          no-cache: true
          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -6,20 +6,31 @@ on:
      - "*"

 env:
-  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'danswer/danswer-model-server-cloud' || 'danswer/danswer-model-server' }}
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
+  DOCKER_BUILDKIT: 1
+  BUILDKIT_PROGRESS: plain

 jobs:
-  build-and-push:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
-
+  build-amd64:
+    runs-on:
+      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

+      - name: System Info
+        run: |
+          df -h
+          free -h
+          docker system prune -af --volumes
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host

      - name: Login to Docker Hub
        uses: docker/login-action@v3
@@ -27,29 +38,86 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Model Server Image Docker Build and Push
+      - name: Build and Push AMD64
        uses: docker/build-push-action@v5
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64,linux/arm64
+          platforms: linux/amd64
          push: true
-          tags: |
-            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-            ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
+          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
          build-args: |
            DANSWER_VERSION=${{ github.ref_name }}
+          outputs: type=registry
+          provenance: false
+
+  build-arm64:
+    runs-on:
+      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: System Info
+        run: |
+          df -h
+          free -h
+          docker system prune -af --volumes
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and Push ARM64
+        uses: docker/build-push-action@v5
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          push: true
+          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          build-args: |
+            DANSWER_VERSION=${{ github.ref_name }}
+          outputs: type=registry
+          provenance: false
+
+  merge-and-scan:
+    needs: [build-amd64, build-arm64]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create and Push Multi-arch Manifest
+        run: |
+          docker buildx create --use
+          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
+            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
+              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
+              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          fi

-      # trivy has their own rate limiting issues causing this action to flake
-      # we worked around it by hardcoding to different db repos in env
-      # can re-enable when they figure it out
-      # https://github.com/aquasecurity/trivy/discussions/7538
-      # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
        env:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
        with:
-          image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: "CRITICAL,HIGH"
+          timeout: "10m"
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -3,12 +3,12 @@ name: Build and Push Web Image on Tag
 on:
  push:
    tags:
-      - '*'
+      - "*"

 env:
-  REGISTRY_IMAGE: danswer/danswer-web-server
+  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  
+
 jobs:
  build:
    runs-on:
@@ -27,11 +27,11 @@ jobs:
      - name: Prepare
        run: |
          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV          
-      
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
      - name: Checkout
        uses: actions/checkout@v4
-      
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
@@ -40,16 +40,16 @@ jobs:
          tags: |
            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
-      
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      
+
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
-    
+
      - name: Build and push by digest
        id: build
        uses: docker/build-push-action@v5
@@ -59,18 +59,20 @@ jobs:
          platforms: ${{ matrix.platform }}
          push: true
          build-args: |
-            DANSWER_VERSION=${{ github.ref_name }}
-          # needed due to weird interactions with the builds for different platforms  
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+
+          # needed due to weird interactions with the builds for different platforms
          no-cache: true
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-      
+
      - name: Export digest
        run: |
          mkdir -p /tmp/digests
          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"          
-      
+          touch "/tmp/digests/${digest#sha256:}"
+
      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
@@ -90,42 +92,42 @@ jobs:
          path: /tmp/digests
          pattern: digests-*
          merge-multiple: true
-      
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      
+
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-      
+
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}
-      
+
      - name: Create manifest list and push
        working-directory: /tmp/digests
        run: |
          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)          
-      
+            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
      - name: Inspect image
        run: |
          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}

-    # trivy has their own rate limiting issues causing this action to flake
-    # we worked around it by hardcoding to different db repos in env
-    # can re-enable when they figure it out
-    # https://github.com/aquasecurity/trivy/discussions/7538
-    # https://github.com/aquasecurity/trivy-action/issues/389
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
        env:
-          TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-          TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: 'CRITICAL,HIGH'
+          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -7,31 +7,31 @@ on:
  workflow_dispatch:
    inputs:
      version:
-        description: 'The version (ie v0.0.1) to tag as latest'
+        description: "The version (ie v0.0.1) to tag as latest"
        required: true

 jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
    steps:
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v1
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1

-    - name: Login to Docker Hub
-      uses: docker/login-action@v1
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_TOKEN }}
+      - name: Login to Docker Hub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}

-    - name: Enable Docker CLI experimental features
-      run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
+      - name: Enable Docker CLI experimental features
+        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

-    - name: Pull, Tag and Push Web Server Image
-      run: |
-        docker buildx imagetools create -t danswer/danswer-web-server:latest danswer/danswer-web-server:${{ github.event.inputs.version }}
+      - name: Pull, Tag and Push Web Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}

-    - name: Pull, Tag and Push API Server Image
-      run: |
-        docker buildx imagetools create -t danswer/danswer-backend:latest danswer/danswer-backend:${{ github.event.inputs.version }}
+      - name: Pull, Tag and Push API Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
--- a/.github/workflows/hotfix-release-branches.yml
+++ b/.github/workflows/hotfix-release-branches.yml
@@ -8,43 +8,42 @@ on:
  workflow_dispatch:
    inputs:
      hotfix_commit:
-        description: 'Hotfix commit hash'
+        description: "Hotfix commit hash"
        required: true
      hotfix_suffix:
-        description: 'Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})'
+        description: "Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})"
        required: true
      release_branch_pattern:
-        description: 'Release branch pattern (regex)'
+        description: "Release branch pattern (regex)"
        required: true
-        default: 'release/.*'
+        default: "release/.*"
      auto_merge:
-        description: 'Automatically merge the hotfix PRs'
+        description: "Automatically merge the hotfix PRs"
        required: true
        type: choice
-        default: 'true'
+        default: "true"
        options:
          - true
          - false
-          
+
 jobs:
  hotfix_release_branches:
    permissions: write-all
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
    steps:
-    
      # needs RKUO_DEPLOY_KEY for write access to merge PR's
      - name: Checkout Repository
        uses: actions/checkout@v4
        with:
          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
          fetch-depth: 0
-          
+
      - name: Set up Git user
        run: |
          git config user.name "Richard Kuo [bot]"
-          git config user.email "rkuo[bot]@danswer.ai"
+          git config user.email "rkuo[bot]@onyx.app"

      - name: Fetch All Branches
        run: |
@@ -62,10 +61,10 @@ jobs:
            echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
            exit 1
          fi
-          
+
          echo "Found release branches:"
          echo "$BRANCHES"
-          
+
          # Join the branches into a single line separated by commas
          BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')

@@ -169,4 +168,4 @@ jobs:
                echo "Failed to merge pull request #$PR_NUMBER."
              fi
            fi
-          done
+          done
--- a/.github/workflows/pr-backport-autotrigger.yml
+++ b/.github/workflows/pr-backport-autotrigger.yml
@@ -4,7 +4,7 @@ name: Backport on Merge

 on:
  pull_request:
-    types: [closed]  # Later we check for merge so only PRs that go in can get backported
+    types: [closed] # Later we check for merge so only PRs that go in can get backported

 permissions:
  contents: write
@@ -26,9 +26,9 @@ jobs:
      - name: Set up Git user
        run: |
          git config user.name "Richard Kuo [bot]"
-          git config user.email "rkuo[bot]@danswer.ai"
+          git config user.email "rkuo[bot]@onyx.app"
          git fetch --prune
-      
+
      - name: Check for Backport Checkbox
        id: checkbox-check
        run: |
@@ -51,14 +51,14 @@ jobs:
          # Fetch latest tags for beta and stable
          LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
          LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
-          
+
          # Handle case where no beta tags exist
          if [[ -z "$LATEST_BETA_TAG" ]]; then
            NEW_BETA_TAG="v1.0.0-beta.1"
          else
            NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
          fi
-          
+
          # Increment latest stable tag
          NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
          echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
@@ -80,10 +80,10 @@ jobs:
        run: |
          set -e
          echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
-          
+
          # Echo the merge commit SHA
          echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
-          
+
          # Fetch all history for all branches and tags
          git fetch --prune

@@ -98,7 +98,7 @@ jobs:
            echo "Cherry-pick to beta failed due to conflicts."
            exit 1
          }
-          
+
          # Create new beta branch/tag
          git tag ${{ steps.list-branches.outputs.new_beta_tag }}
          # Push the changes and tag to the beta branch using PAT
@@ -110,13 +110,13 @@ jobs:
          echo "Last 5 commits on stable branch:"
          git log -n 5 --pretty=format:"%H"
          echo ""  # Newline for formatting
-          
+
          # Cherry-pick the merge commit from the merged PR
          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
            echo "Cherry-pick to stable failed due to conflicts."
            exit 1
          }
-          
+
          # Create new stable branch/tag
          git tag ${{ steps.list-branches.outputs.new_stable_tag }}
          # Push the changes and tag to the stable branch using PAT
--- a/.github/workflows/pr-chromatic-tests.yml
+++ b/.github/workflows/pr-chromatic-tests.yml
@@ -14,18 +14,24 @@ jobs:
    name: Playwright Tests

    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
-          
+
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: '3.11'
-          cache: 'pip'
+          python-version: "3.11"
+          cache: "pip"
          cache-dependency-path: |
            backend/requirements/default.txt
            backend/requirements/dev.txt
@@ -35,7 +41,7 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
-        
+
      - name: Setup node
        uses: actions/setup-node@v4
        with:
@@ -48,7 +54,7 @@ jobs:
      - name: Install playwright browsers
        working-directory: ./web
        run: npx playwright install --with-deps
-        
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@@ -60,13 +66,13 @@ jobs:

      # tag every docker image with "test" so that we can spin up the correct set
      # of images during testing
-      
+
      # we use the runs-on cache for docker builds
      # in conjunction with runs-on runners, it has better speed and unlimited caching
      # https://runs-on.com/caching/s3-cache-for-github-actions/
      # https://runs-on.com/caching/docker/
      # https://github.com/moby/buildkit#s3-cache-experimental
-      
+
      # images are built and run locally for testing purposes. Not pushed.

      - name: Build Web Docker image
@@ -75,7 +81,7 @@ jobs:
          context: ./web
          file: ./web/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-web-server:test
+          tags: onyxdotapp/onyx-web-server:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
@@ -87,7 +93,7 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-backend:test
+          tags: onyxdotapp/onyx-backend:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
@@ -99,7 +105,7 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/amd64
-          tags: danswer/danswer-model-server:test
+          tags: onyxdotapp/onyx-model-server:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
@@ -110,6 +116,7 @@ jobs:
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          AUTH_TYPE=basic \
+          GEN_AI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
@@ -119,12 +126,12 @@ jobs:
      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."
-          
+
          docker logs -f danswer-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
-          
+
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
@@ -152,7 +159,7 @@ jobs:

      - name: Run pytest playwright test init
        working-directory: ./backend
-        env: 
+        env:
          PYTEST_IGNORE_SKIP: true
        run: pytest -s tests/integration/tests/playwright/test_playwright.py

@@ -168,7 +175,7 @@ jobs:
          name: test-results
          path: ./web/test-results
          retention-days: 30
-                    
+
      # save before stopping the containers so the logs can be captured
      - name: Save Docker logs
        if: success() || failure()
@@ -176,7 +183,7 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
-      
+
      - name: Upload logs
        if: success() || failure()
        uses: actions/upload-artifact@v4
@@ -191,35 +198,41 @@ jobs:

  chromatic-tests:
    name: Chromatic Tests
-    
+
    needs: playwright-tests
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
-          
+
      - name: Setup node
        uses: actions/setup-node@v4
        with:
          node-version: 22
-          
+
      - name: Install node dependencies
        working-directory: ./web
        run: npm ci
-        
+
      - name: Download Playwright test results
        uses: actions/download-artifact@v4
        with:
          name: test-results
          path: ./web/test-results
-          
+
      - name: Run Chromatic
        uses: chromaui/action@latest
        with:
          playwright: true
          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
          workingDir: ./web
-        env: 
+        env:
          CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -8,7 +8,7 @@ on:
  pull_request:
    branches:
      - main
-      - 'release/**'
+      - "release/**"

 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -16,11 +16,11 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  
+
 jobs:
  integration-tests:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -36,21 +36,21 @@ jobs:

      # tag every docker image with "test" so that we can spin up the correct set
      # of images during testing
-      
+
      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds 
+      # in the integration tests. We have a separate action to verify that it builds
      # successfully.
      - name: Pull Web Docker image
        run: |
-          docker pull danswer/danswer-web-server:latest
-          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test
+          docker pull onyxdotapp/onyx-web-server:latest
+          docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test

      # we use the runs-on cache for docker builds
      # in conjunction with runs-on runners, it has better speed and unlimited caching
      # https://runs-on.com/caching/s3-cache-for-github-actions/
      # https://runs-on.com/caching/docker/
      # https://github.com/moby/buildkit#s3-cache-experimental
-      
+
      # images are built and run locally for testing purposes. Not pushed.
      - name: Build Backend Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -58,7 +58,7 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-backend:test
+          tags: onyxdotapp/onyx-backend:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
@@ -70,19 +70,19 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/amd64
-          tags: danswer/danswer-model-server:test
+          tags: onyxdotapp/onyx-model-server:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-      
+
      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/tests/integration/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-integration:test
+          tags: onyxdotapp/onyx-integration:test
          push: false
          load: true
          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
@@ -119,7 +119,7 @@ jobs:
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
            -e MULTI_TENANT=true \
-            danswer/danswer-integration:test \
+            onyxdotapp/onyx-integration:test \
            /app/tests/integration/multitenant_tests
        continue-on-error: true
        id: run_multitenant_tests
@@ -131,15 +131,14 @@ jobs:
            exit 1
          else
            echo "All integration tests passed successfully."
-          fi 
+          fi

      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

-
-      - name: Start Docker containers 
+      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
@@ -153,12 +152,12 @@ jobs:
      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."
-          
+
          docker logs -f danswer-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
-          
+
          while true; do
            current_time=$(date +%s)
            elapsed_time=$((current_time - start_time))
@@ -202,7 +201,7 @@ jobs:
            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
-            danswer/danswer-integration:test \
+            onyxdotapp/onyx-integration:test \
            /app/tests/integration/tests \
            /app/tests/integration/connector_job_tests
        continue-on-error: true
@@ -229,7 +228,7 @@ jobs:
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
-      
+
      - name: Upload logs
        if: success() || failure()
        uses: actions/upload-artifact@v4
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -0,0 +1,29 @@
+name: Ensure PR references Linear
+
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+
+jobs:
+  linear-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check PR body for Linear link or override
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
+        run: |
+          # Looking for "https://linear.app" in the body
+          if echo "$PR_BODY" | grep -qE "https://linear\.app"; then
+            echo "Found a Linear link. Check passed."
+            exit 0
+          fi
+
+          # Looking for a checked override: "[x] Override Linear Check"
+          if echo "$PR_BODY" | grep -q "\[x\].*Override Linear Check"; then
+            echo "Override box is checked. Check passed."
+            exit 0
+          fi
+
+          # Otherwise, fail the run
+          echo "No Linear link or override found in the PR description."
+          exit 1
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -26,7 +26,19 @@ env:
  GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
  # Slab
  SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
-
+  # Zendesk
+  ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
+  ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
+  ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
+  # Salesforce
+  SF_USERNAME: ${{ secrets.SF_USERNAME }}
+  SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
+  SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
+  # Airtable
+  AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
+  AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
+  AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
+  AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -2,53 +2,52 @@ name: Nightly Tag Push

 on:
  schedule:
-    - cron: '0 10 * * *' # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
+    - cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC

 permissions:
-  contents: write  # Allows pushing tags to the repository
+  contents: write # Allows pushing tags to the repository

 jobs:
  create-and-push-tag:
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]

    steps:
-    # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
-    # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
-    # implement here which needs an actual user's deploy key
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
+      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
+      # implement here which needs an actual user's deploy key
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"

-    - name: Set up Git user
-      run: |
-        git config user.name "Richard Kuo [bot]"
-        git config user.email "rkuo[bot]@danswer.ai"
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"

-    - name: Check for existing nightly tag
-      id: check_tag
-      run: |
-        if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
-          echo "A tag starting with 'nightly-latest' already exists on HEAD."
-          echo "tag_exists=true" >> $GITHUB_OUTPUT
-        else
-          echo "No tag starting with 'nightly-latest' exists on HEAD."
-          echo "tag_exists=false" >> $GITHUB_OUTPUT
-        fi
-        
-    # don't tag again if HEAD already has a nightly-latest tag on it
-    - name: Create Nightly Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      env:
-        DATE: ${{ github.run_id }}
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        echo "Creating tag: $TAG_NAME"
-        git tag $TAG_NAME
+      - name: Check for existing nightly tag
+        id: check_tag
+        run: |
+          if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
+            echo "A tag starting with 'nightly-latest' already exists on HEAD."
+            echo "tag_exists=true" >> $GITHUB_OUTPUT
+          else
+            echo "No tag starting with 'nightly-latest' exists on HEAD."
+            echo "tag_exists=false" >> $GITHUB_OUTPUT
+          fi

-    - name: Push Tag
-      if: steps.check_tag.outputs.tag_exists == 'false'
-      run: |
-        TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
-        git push origin $TAG_NAME
-        
+      # don't tag again if HEAD already has a nightly-latest tag on it
+      - name: Create Nightly Tag
+        if: steps.check_tag.outputs.tag_exists == 'false'
+        env:
+          DATE: ${{ github.run_id }}
+        run: |
+          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
+          echo "Creating tag: $TAG_NAME"
+          git tag $TAG_NAME
+
+      - name: Push Tag
+        if: steps.check_tag.outputs.tag_exists == 'false'
+        run: |
+          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
+          git push origin $TAG_NAME
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -5,6 +5,8 @@
 # For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled

+# Skip warm up for dev
+SKIP_WARM_UP=True

 # Always keep these on for Dev
 # Logs all model prompts to stdout
@@ -27,6 +29,7 @@ REQUIRE_EMAIL_VERIFICATION=False

 # Set these so if you wipe the DB, you don't end up having to go through the UI every time
 GEN_AI_API_KEY=<REPLACE THIS>
+OPENAI_API_KEY=<REPLACE THIS>
 # If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -17,7 +17,7 @@
 			 }
 		},
        {
-            "name": "Run All Danswer Services",
+            "name": "Run All Onyx Services",
            "configurations": [
                "Web Server",
                "Model Server",
@@ -28,6 +28,7 @@
 		  		"Celery heavy", 
 		  		"Celery indexing", 
 		  		"Celery beat",
+                "Celery monitoring",
            ],
 			"presentation": {
 				 "group": "1",
@@ -51,7 +52,8 @@
 		  		"Celery light", 
 		  		"Celery heavy", 
 		  		"Celery indexing", 
-		  		"Celery beat"
+		  		"Celery beat",
+                "Celery monitoring",
 		  	],
 			"presentation": {
 				 "group": "1",
@@ -122,7 +124,7 @@
                "PYTHONUNBUFFERED": "1"
            },
            "args": [
-                "danswer.main:app",
+                "onyx.main:app",
                "--reload",
                "--port",
                "8080"
@@ -139,7 +141,7 @@
            "consoleName": "Slack Bot",
            "type": "debugpy",
            "request": "launch",
-            "program": "danswer/danswerbot/slack/listener.py",
+            "program": "onyx/onyxbot/slack/listener.py",
            "cwd": "${workspaceFolder}/backend",
            "envFile": "${workspaceFolder}/.vscode/.env",
            "env": {
@@ -166,7 +168,7 @@
            },
            "args": [
                "-A",
-                "danswer.background.celery.versioned_apps.primary",
+                "onyx.background.celery.versioned_apps.primary",
                "worker",
                "--pool=threads",
                "--concurrency=4",
@@ -195,7 +197,7 @@
            },
            "args": [
                "-A",
-                "danswer.background.celery.versioned_apps.light",
+                "onyx.background.celery.versioned_apps.light",
                "worker",
                "--pool=threads",
                "--concurrency=64",
@@ -224,7 +226,7 @@
            },
            "args": [
                "-A",
-                "danswer.background.celery.versioned_apps.heavy",
+                "onyx.background.celery.versioned_apps.heavy",
                "worker",
                "--pool=threads",
                "--concurrency=4",
@@ -254,7 +256,7 @@
            },
            "args": [
                "-A",
-                "danswer.background.celery.versioned_apps.indexing",
+                "onyx.background.celery.versioned_apps.indexing",
                "worker",
                "--pool=threads",
                "--concurrency=1",
@@ -269,6 +271,31 @@
 			 },
            "consoleTitle": "Celery indexing Console"
        },
+        {
+            "name": "Celery monitoring",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "celery",
+            "cwd": "${workspaceFolder}/backend",
+            "envFile": "${workspaceFolder}/.vscode/.env",
+            "env": {},
+            "args": [
+                "-A",
+                "onyx.background.celery.versioned_apps.monitoring",
+                "worker",
+                "--pool=solo",
+                "--concurrency=1",
+                "--prefetch-multiplier=1",
+                "--loglevel=INFO",
+                "--hostname=monitoring@%n",
+                "-Q",
+                "monitoring",
+            ],
+            "presentation": {
+				 "group": "2",
+			 },
+            "consoleTitle": "Celery monitoring Console"
+        },
        {
            "name": "Celery beat",
            "type": "debugpy",
@@ -283,7 +310,7 @@
            },
            "args": [
                "-A",
-                "danswer.background.celery.versioned_apps.beat",
+                "onyx.background.celery.versioned_apps.beat",
                "beat",
                "--loglevel=INFO",
            ],
@@ -308,7 +335,7 @@
            "args": [
                "-v"
                // Specify a sepcific module/test to run or provide nothing to run all tests
-                //"tests/unit/danswer/llm/answering/test_prune_and_merge.py"
+                //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
            ],
            "presentation": {
 				 "group": "2",
@@ -355,5 +382,20 @@
                "PYTHONPATH": "."
            },
        },
+        {
+            "name": "Install Python Requirements",
+            "type": "node",
+            "request": "launch",
+            "runtimeExecutable": "bash",
+            "runtimeArgs": [
+                "-c",
+                "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+            ],
+            "cwd": "${workspaceFolder}",
+            "console": "integratedTerminal",
+            "presentation": {
+                 "group": "3"
+            }
+        },
    ]
 }
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,32 +1,38 @@
-<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->

-# Contributing to Danswer
-Hey there! We are so excited that you're interested in Danswer.
+# Contributing to Onyx
+
+Hey there! We are so excited that you're interested in Onyx.

 As an open source project in a rapidly changing space, we welcome all contributions.

-
 ## 💃 Guidelines
+
 ### Contribution Opportunities
-The [GitHub Issues](https://github.com/danswer-ai/danswer/issues) page is a great place to start for contribution ideas.
+
+The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
+
+To ensure that your contribution is aligned with the project's direction, please reach out to Hagen (or any other maintainer) on the Onyx team
+via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
+[Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).

 Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
 will be marked with the `approved by maintainers` label.
 Issues marked `good first issue` are an especially great place to start.

 **Connectors** to other tools are another great place to contribute. For details on how, refer to this
-[README.md](https://github.com/danswer-ai/danswer/blob/main/backend/danswer/connectors/README.md).
+[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).

 If you have a new/different contribution in mind, we'd love to hear about it!
-Your input is vital to making sure that Danswer moves in the right direction.
+Your input is vital to making sure that Onyx moves in the right direction.
 Before starting on implementation, please raise a GitHub issue.

-And always feel free to message us (Chris Weaver / Yuhong Sun) on 
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ) / 
-[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all. 
-
+Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
+[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
+[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.

 ### Contributing Code
+
 To contribute to this project, please follow the
 ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
 When opening a pull request, mention related issues and feel free to tag relevant maintainers.
@@ -34,72 +40,78 @@ When opening a pull request, mention related issues and feel free to tag relevan
 Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
 See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.

-
 ### Getting Help 🙋
+
 Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
 That way we can help future contributors and users can avoid the same issue.

 We also have support channels and generally interesting discussions on our
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ)
-and 
+[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA)
+and
 [Discord](https://discord.gg/TDJ59cGV2X).

 We would love to see you there!

-
 ## Get Started 🚀
-Danswer being a fully functional app, relies on some external software, specifically:
+
+Onyx being a fully functional app, relies on some external software, specifically:
+
 - [Postgres](https://www.postgresql.org/) (Relational DB)
 - [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
 - [Redis](https://redis.io/) (Cache)
 - [Nginx](https://nginx.org/) (Not needed for development flows generally)

-
 > **Note:**
-> This guide provides instructions to build and run Danswer locally from source with Docker containers providing the above external software. We believe this combination is easier for
-> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Danswer stack within Docker below.
-
+> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
+> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.

 ### Local Set Up
+
 Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.

 If using a lower version, modifications will have to be made to the code.
 If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).

-
 #### Backend: Python requirements
+
 Currently, we use pip and recommend creating a virtual environment.

 For convenience here's a command for it:
+
 ```bash
 python -m venv .venv
 source .venv/bin/activate
 ```

 > **Note:**
-> This virtual environment MUST NOT be set up WITHIN the danswer directory if you plan on using mypy within certain IDEs.
-> For simplicity, we recommend setting up the virtual environment outside of the danswer directory.
+> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
+> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.

 _For Windows, activate the virtual environment using Command Prompt:_
+
 ```bash
 .venv\Scripts\activate
 ```
+
 If using PowerShell, the command slightly differs:
+
 ```powershell
 .venv\Scripts\Activate.ps1
 ```

 Install the required python dependencies:
+
 ```bash
-pip install -r danswer/backend/requirements/default.txt
-pip install -r danswer/backend/requirements/dev.txt
-pip install -r danswer/backend/requirements/ee.txt
-pip install -r danswer/backend/requirements/model_server.txt
+pip install -r onyx/backend/requirements/default.txt
+pip install -r onyx/backend/requirements/dev.txt
+pip install -r onyx/backend/requirements/ee.txt
+pip install -r onyx/backend/requirements/model_server.txt
 ```

 Install Playwright for Python (headless browser required by the Web Connector)

 In the activated Python virtualenv, install Playwright for Python by running:
+
 ```bash
 playwright install
 ```
@@ -109,42 +121,90 @@ You may have to deactivate and reactivate your virtualenv for `playwright` to ap
 #### Frontend: Node dependencies

 Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
-Once the above is done, navigate to `danswer/web` run:
+Once the above is done, navigate to `onyx/web` run:
+
 ```bash
 npm i
 ```

-#### Docker containers for external software
+## Formatting and Linting
+
+### Backend
+
+For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
+First, install pre-commit (if you don't have it already) following the instructions
+[here](https://pre-commit.com/#installation).
+
+With the virtual environment active, install the pre-commit library with:
+
+```bash
+pip install pre-commit
+```
+
+Then, from the `onyx/backend` directory, run:
+
+```bash
+pre-commit install
+```
+
+Additionally, we use `mypy` for static type checking.
+Onyx is fully type-annotated, and we want to keep it that way!
+To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
+
+### Web
+
+We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `onyx/web` directory.
+To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
+Please double check that prettier passes before creating a pull request.
+
+# Running the application for development
+
+## Developing using VSCode Debugger (recommended)
+
+We highly recommend using VSCode debugger for development.
+See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
+
+Otherwise, you can follow the instructions below to run the application for development.
+
+## Manually running the application for development
+### Docker containers for external software
+
 You will need Docker installed to run these containers.

-First navigate to `danswer/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
+First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
+
 ```bash
-docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db cache
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache
 ```
+
 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)

+### Running Onyx locally
+
+To start the frontend, navigate to `onyx/web` and run:

-#### Running Danswer locally
-To start the frontend, navigate to `danswer/web` and run:
 ```bash
 npm run dev
 ```

 Next, start the model server which runs the local NLP models.
-Navigate to `danswer/backend` and run:
+Navigate to `onyx/backend` and run:
+
 ```bash
 uvicorn model_server.main:app --reload --port 9000
 ```

 _For Windows (for compatibility with both PowerShell and Command Prompt):_
+
 ```bash
 powershell -Command "uvicorn model_server.main:app --reload --port 9000"
 ```

-The first time running Danswer, you will need to run the DB migrations for Postgres.
+The first time running Onyx, you will need to run the DB migrations for Postgres.
 After the first time, this is no longer required unless the DB models change.

-Navigate to `danswer/backend` and with the venv active, run:
+Navigate to `onyx/backend` and with the venv active, run:
+
 ```bash
 alembic upgrade head
 ```
@@ -152,21 +212,24 @@ alembic upgrade head
 Next, start the task queue which orchestrates the background jobs.
 Jobs that take more time are run async from the API server.

-Still in `danswer/backend`, run:
+Still in `onyx/backend`, run:
+
 ```bash
 python ./scripts/dev_run_background_jobs.py
 ```

-To run the backend API server, navigate back to `danswer/backend` and run:
+To run the backend API server, navigate back to `onyx/backend` and run:
+
 ```bash
-AUTH_TYPE=disabled uvicorn danswer.main:app --reload --port 8080
+AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
 ```

 _For Windows (for compatibility with both PowerShell and Command Prompt):_
+
 ```bash
 powershell -Command "
    $env:AUTH_TYPE='disabled'
-    uvicorn danswer.main:app --reload --port 8080 
+    uvicorn onyx.main:app --reload --port 8080
 "
 ```

@@ -182,57 +245,32 @@ You should now have 4 servers running:
 - Model server
 - Background jobs

-Now, visit `http://localhost:3000` in your browser. You should see the Danswer onboarding wizard where you can connect your external LLM provider to Danswer.
+Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.

-You've successfully set up a local Danswer instance! 🏁
+You've successfully set up a local Onyx instance! 🏁

-#### Running the Danswer application in a container
+#### Running the Onyx application in a container

-You can run the full Danswer application stack from pre-built images including all external software dependencies.
+You can run the full Onyx application stack from pre-built images including all external software dependencies.

-Navigate to `danswer/deployment/docker_compose` and run:
+Navigate to `onyx/deployment/docker_compose` and run:

 ```bash
-docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d
 ```

-After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Danswer.
+After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.

-If you want to make changes to Danswer and run those changes in Docker, you can also build a local version of the Danswer container images that incorporates your changes like so:
+If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:

 ```bash
-docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
 ```

-### Formatting and Linting
-#### Backend
-For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
-First, install pre-commit (if you don't have it already) following the instructions
-[here](https://pre-commit.com/#installation).
-
-With the virtual environment active, install the pre-commit library with:
-```bash
-pip install pre-commit
-```
-
-Then, from the `danswer/backend` directory, run:
-```bash
-pre-commit install
-```
-
-Additionally, we use `mypy` for static type checking.
-Danswer is fully type-annotated, and we want to keep it that way! 
-To run the mypy checks manually, run `python -m mypy .` from the `danswer/backend` directory.
-
-
-#### Web
-We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `danswer/web` directory. 
-To run the formatter, use `npx prettier --write .` from the `danswer/web` directory.
-Please double check that prettier passes before creating a pull request.
-

 ### Release Process
-Danswer loosely follows the SemVer versioning standard.
+
+Onyx loosely follows the SemVer versioning standard.
 Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
 A set of Docker containers will be pushed automatically to DockerHub with every tag.
-You can see the containers [here](https://hub.docker.com/search?q=danswer%2F).
+You can see the containers [here](https://hub.docker.com/search?q=onyx%2F).
--- a/CONTRIBUTING_MACOS.md
+++ b/CONTRIBUTING_MACOS.md
@@ -1,15 +1,19 @@
 ## Some additional notes for Mac Users
-The base instructions to set up the development environment are located in [CONTRIBUTING.md](https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md).
+
+The base instructions to set up the development environment are located in [CONTRIBUTING.md](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md).

 ### Setting up Python
+
 Ensure [Homebrew](https://brew.sh/) is already set up.

 Then install python 3.11.
+
 ```bash
 brew install python@3.11
 ```

 Add python 3.11 to your path: add the following line to ~/.zshrc
+
 ```
 export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
 ```
@@ -17,15 +21,16 @@ export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
 > **Note:**
 > You will need to open a new terminal for the path change above to take effect.

-
 ### Setting up Docker
-On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and 
+
+On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and
 ensure it is running before continuing with the docker commands.

-
 ### Formatting and Linting
+
 MacOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly.
 After installing pre-commit, run the following command:
+
 ```bash
 sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit
-```
+```
--- a/CONTRIBUTING_VSCODE.md
+++ b/CONTRIBUTING_VSCODE.md
@@ -0,0 +1,30 @@
+# VSCode Debugging Setup
+
+This guide explains how to set up and use VSCode's debugging capabilities with this project.
+
+## Initial Setup
+
+1. **Environment Setup**:
+   - Copy `.vscode/.env.template` to `.vscode/.env`
+   - Fill in the necessary environment variables in `.vscode/.env`
+2. **launch.json**:
+   - Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
+
+## Using the Debugger
+
+Before starting, make sure the Docker Daemon is running.
+
+1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)
+2. From the dropdown at the top, select "Clear and Restart External Volumes and Containers" and press the green play button
+3. From the dropdown at the top, select "Run All Onyx Services" and press the green play button
+4. CD into web, run "npm i" followed by npm run dev.
+5. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
+6. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
+7. Use the debug toolbar to step through code, inspect variables, etc.
+
+## Features
+
+- Hot reload is enabled for the web server and API servers
+- Python debugging is configured with debugpy
+- Environment variables are loaded from `.vscode/.env`
+- Console output is organized in the integrated terminal with labeled tabs
--- a/6
+++ b/6
@@ -2,9 +2,9 @@ Copyright (c) 2023-present DanswerAI, Inc.

 Portions of this software are licensed as follows:

-* All content that resides under "ee" directories of this repository, if that directory exists, is licensed under the license defined in "backend/ee/LICENSE". Specifically all content under "backend/ee" and "web/src/app/ee" is licensed under the license defined in "backend/ee/LICENSE".
-* All third party components incorporated into the Danswer Software are licensed under the original license provided by the owner of the applicable component.
-* Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
+- All content that resides under "ee" directories of this repository, if that directory exists, is licensed under the license defined in "backend/ee/LICENSE". Specifically all content under "backend/ee" and "web/src/app/ee" is licensed under the license defined in "backend/ee/LICENSE".
+- All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.
+- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@@ -1,146 +1,135 @@
-<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/README.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
+
 <a name="readme-top"></a>

 <h2 align="center">
-<a href="https://www.danswer.ai/"> <img width="50%" src="https://github.com/danswer-owners/danswer/blob/1fabd9372d66cd54238847197c33f091a724803b/DanswerWithName.png?raw=true)" /></a>
+<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
 </h2>

 <p align="center">
-<p align="center">Open Source Gen-AI Chat + Unified Search.</p>
+<p align="center">Open Source Gen-AI + Enterprise Search.</p>

 <p align="center">
-<a href="https://docs.danswer.dev/" target="_blank">
+<a href="https://docs.onyx.app/" target="_blank">
    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
 </a>
-<a href="https://join.slack.com/t/danswer/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
+<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
 </a>
 <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
    <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
 </a>
-<a href="https://github.com/danswer-ai/danswer/blob/main/README.md" target="_blank">
+<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
    <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
 </a>
 </p>

-<strong>[Danswer](https://www.danswer.ai/)</strong> is the AI Assistant connected to your company's docs, apps, and people. 
-Danswer provides a Chat interface and plugs into any LLM of your choice. Danswer can be deployed anywhere and for any 
-scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your 
-own control. Danswer is MIT licensed and designed to be modular and easily extensible. The system also comes fully ready 
-for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for 
-configuring Personas (AI Assistants) and their Prompts.
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
+Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
+scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
+own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
+for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
+configuring AI Assistants.

-Danswer also serves as a Unified Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
-By combining LLMs and team specific knowledge, Danswer becomes a subject matter expert for the team. Imagine ChatGPT if
+Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
+By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
 it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
 supported?" or "Where's the pull request for feature Y?"

 <h3>Usage</h3>

-Danswer Web App:
+Onyx Web App:

-https://github.com/danswer-ai/danswer/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410
+https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410

+Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):

-Or, plug Danswer into your existing Slack workflows (more integrations to come 😁):
+https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b

-https://github.com/danswer-ai/danswer/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b
-
-
-For more details on the Admin UI to manage connectors and users, check out our 
+For more details on the Admin UI to manage connectors and users, check out our
 <strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

 ## Deployment

-Danswer can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
-`docker compose` command. Checkout our [docs](https://docs.danswer.dev/quickstart) to learn more.
+Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
+`docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.

-We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/danswer-ai/danswer/tree/main/deployment/kubernetes).
+We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).

+## 💃 Main Features

-## 💃 Main Features 
-* Chat UI with the ability to select documents to chat with.
-* Create custom AI Assistants with different prompts and backing knowledge sets.
-* Connect Danswer with LLM of your choice (self-host for a fully airgapped solution).
-* Document Search + AI Answers for natural language queries.
-* Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
-* Slack integration to get answers and search results directly in Slack.
-
+- Chat UI with the ability to select documents to chat with.
+- Create custom AI Assistants with different prompts and backing knowledge sets.
+- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
+- Document Search + AI Answers for natural language queries.
+- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
+- Slack integration to get answers and search results directly in Slack.

 ## 🚧 Roadmap
-* Chat/Prompt sharing with specific teammates and user groups.
-* Multimodal model support, chat with images, video etc.
-* Choosing between LLMs and parameters during chat session.
-* Tool calling and agent configurations options.
-* Organizational understanding and ability to locate and suggest experts from your team.

+- Chat/Prompt sharing with specific teammates and user groups.
+- Multimodal model support, chat with images, video etc.
+- Choosing between LLMs and parameters during chat session.
+- Tool calling and agent configurations options.
+- Organizational understanding and ability to locate and suggest experts from your team.

-## Other Notable Benefits of Danswer
-* User Authentication with document level access management.
-* Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
-* Admin Dashboard to configure connectors, document-sets, access, etc.
-* Custom deep learning models + learn from user feedback.
-* Easy deployment and ability to host Danswer anywhere of your choosing.
+## Other Notable Benefits of Onyx

+- User Authentication with document level access management.
+- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
+- Admin Dashboard to configure connectors, document-sets, access, etc.
+- Custom deep learning models + learn from user feedback.
+- Easy deployment and ability to host Onyx anywhere of your choosing.

 ## 🔌 Connectors
+
 Efficiently pulls the latest changes from:
-  * Slack
-  * GitHub
-  * Google Drive
-  * Confluence
-  * Jira
-  * Zendesk
-  * Gmail
-  * Notion
-  * Gong
-  * Slab
-  * Linear
-  * Productboard
-  * Guru
-  * Bookstack
-  * Document360
-  * Sharepoint
-  * Hubspot
-  * Local Files
-  * Websites
-  * And more ...
+
+- Slack
+- GitHub
+- Google Drive
+- Confluence
+- Jira
+- Zendesk
+- Gmail
+- Notion
+- Gong
+- Slab
+- Linear
+- Productboard
+- Guru
+- Bookstack
+- Document360
+- Sharepoint
+- Hubspot
+- Local Files
+- Websites
+- And more ...

 ## 📚 Editions

-There are two editions of Danswer:
+There are two editions of Onyx:

-  * Danswer Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Danswer you will get if you follow the Deployment guide above.
-  * Danswer Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
-    * Single Sign-On (SSO), with support for both SAML and OIDC
-    * Role-based access control
-    * Document permission inheritance from connected sources
-    * Usage analytics and query history accessible to admins
-    * Whitelabeling
-    * API key authentication
-    * Encryption of secrets
-    * Any many more! Checkout [our website](https://www.danswer.ai/) for the latest.
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
+  - Single Sign-On (SSO), with support for both SAML and OIDC
+  - Role-based access control
+  - Document permission inheritance from connected sources
+  - Usage analytics and query history accessible to admins
+  - Whitelabeling
+  - API key authentication
+  - Encryption of secrets
+  - And many more! Checkout [our website](https://www.onyx.app/) for the latest.

-To try the Danswer Enterprise Edition: 
+To try the Onyx Enterprise Edition:

-  1. Checkout our [Cloud product](https://app.danswer.ai/signup).
-  2. For self-hosting, contact us at [founders@danswer.ai](mailto:founders@danswer.ai) or book a call with us on our [Cal](https://cal.com/team/danswer/founders).
+1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
+2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/danswer/founders).

 ## 💡 Contributing
+
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.

 ## ⭐Star History

-[![Star History Chart](https://api.star-history.com/svg?repos=danswer-ai/danswer&type=Date)](https://star-history.com/#danswer-ai/danswer&Date)
-
-## ✨Contributors
-
-<a href="https://github.com/danswer-ai/danswer/graphs/contributors">
-  <img alt="contributors" src="https://contrib.rocks/image?repo=danswer-ai/danswer"/>
-</a>
-
-<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
-    <a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
-        ↑ Back to Top ↑
-    </a>
-</p>
+[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -9,3 +9,4 @@ api_keys.py
 vespa-app.zip
 dynamic_config_storage/
 celerybeat-schedule*
+onyx/connectors/salesforce/data/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,19 +1,21 @@
 FROM python:3.11.7-slim-bookworm

-LABEL com.danswer.maintainer="founders@danswer.ai"
-LABEL com.danswer.description="This image is the web/frontend container of Danswer which \
-contains code for both the Community and Enterprise editions of Danswer. If you do not \
+LABEL com.danswer.maintainer="founders@onyx.app"
+LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
+contains code for both the Community and Enterprise editions of Onyx. If you do not \
 have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
 Edition features outside of personal development or testing purposes. Please reach out to \
-founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer"
+founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"

-# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
-ARG DANSWER_VERSION=0.8-dev
-ENV DANSWER_VERSION=${DANSWER_VERSION} \
-    DANSWER_RUNNING_IN_DOCKER="true"
+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.8-dev
+# DO_NOT_TRACK is used to disable telemetry for Unstructured
+ENV ONYX_VERSION=${ONYX_VERSION} \
+    DANSWER_RUNNING_IN_DOCKER="true" \
+    DO_NOT_TRACK="true"


-RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
+RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
 # Install system dependencies
 # cmake needed for psycopg (postgres)
 # libpq-dev needed for psycopg (postgres)
@@ -56,7 +58,7 @@ RUN pip install --no-cache-dir --upgrade \
 # Cleanup for CVEs and size reduction
 # https://github.com/tornadoweb/tornado/issues/3107
 # xserver-common and xvfb included by playwright installation but not needed after
-# perl-base is part of the base Python Debian image but not needed for Danswer functionality
+# perl-base is part of the base Python Debian image but not needed for Onyx functionality
 # perl-base could only be removed with --allow-remove-essential
 RUN apt-get update && \
    apt-get remove -y --allow-remove-essential \
@@ -92,7 +94,7 @@ COPY ./ee /app/ee
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf

 # Set up application files
-COPY ./danswer /app/danswer
+COPY ./onyx /app/onyx
 COPY ./shared_configs /app/shared_configs
 COPY ./alembic /app/alembic
 COPY ./alembic_tenants /app/alembic_tenants
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,18 +1,18 @@
 FROM python:3.11.7-slim-bookworm

-LABEL com.danswer.maintainer="founders@danswer.ai"
-LABEL com.danswer.description="This image is for the Danswer model server which runs all of the \
-AI models for Danswer. This container and all the code is MIT Licensed and free for all to use. \
-You can find it at https://hub.docker.com/r/danswer/danswer-model-server. For more details, \
-visit https://github.com/danswer-ai/danswer."
+LABEL com.danswer.maintainer="founders@onyx.app"
+LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
+AI models for Onyx. This container and all the code is MIT Licensed and free for all to use. \
+You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
+visit https://github.com/onyx-dot-app/onyx."

-# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
-ARG DANSWER_VERSION=0.8-dev
-ENV DANSWER_VERSION=${DANSWER_VERSION} \
+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.8-dev
+ENV ONYX_VERSION=${ONYX_VERSION} \
    DANSWER_RUNNING_IN_DOCKER="true"


-RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
+RUN echo "ONYX_VERSION: ${ONYX_VERSION}"

 COPY ./requirements/model_server.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir --upgrade \
@@ -20,11 +20,11 @@ RUN pip install --no-cache-dir --upgrade \
        --timeout 30 \
        -r /tmp/requirements.txt

-RUN apt-get remove -y --allow-remove-essential perl-base && \
+RUN apt-get remove -y --allow-remove-essential perl-base && \ 
    apt-get autoremove -y

 # Pre-downloading models for setups with limited egress
-# Download tokenizers, distilbert for the Danswer model
+# Download tokenizers, distilbert for the Onyx model
 # Download model weights
 # Run Nomic to pull in the custom architecture and have it cached locally
 RUN python -c "from transformers import AutoTokenizer; \
@@ -38,18 +38,18 @@ from sentence_transformers import SentenceTransformer; \
 SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"

 # In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
-# running Danswer, don't overwrite it with the built in cache folder
+# running Onyx, don't overwrite it with the built in cache folder
 RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface

 WORKDIR /app

 # Utils used by model server
-COPY ./danswer/utils/logger.py /app/danswer/utils/logger.py
+COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py

 # Place to fetch version information
-COPY ./danswer/__init__.py /app/danswer/__init__.py
+COPY ./onyx/__init__.py /app/onyx/__init__.py

-# Shared between Danswer Backend and Model Server
+# Shared between Onyx Backend and Model Server
 COPY ./shared_configs /app/shared_configs

 # Model Server main code
--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -1,19 +1,22 @@
-<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/backend/alembic/README.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->

 # Alembic DB Migrations
-These files are for creating/updating the tables in the Relational DB (Postgres).
-Danswer migrations use a generic single-database configuration with an async dbapi.

-## To generate new migrations: 
-run from danswer/backend:
+These files are for creating/updating the tables in the Relational DB (Postgres).
+Onyx migrations use a generic single-database configuration with an async dbapi.
+
+## To generate new migrations:
+
+run from onyx/backend:
 `alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`

 More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

 ## Running migrations
+
 To run all un-applied migrations:
 `alembic upgrade head`

 To undo migrations:
-`alembic downgrade -X` 
+`alembic downgrade -X`
 where X is the number of migrations you want to undo from the current state
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -1,39 +1,49 @@
+from typing import Any, Literal
+from onyx.db.engine import get_iam_auth_token
+from onyx.configs.app_configs import USE_IAM_AUTH
+from onyx.configs.app_configs import POSTGRES_HOST
+from onyx.configs.app_configs import POSTGRES_PORT
+from onyx.configs.app_configs import POSTGRES_USER
+from onyx.configs.app_configs import AWS_REGION_NAME
+from onyx.db.engine import build_connection_string
+from onyx.db.engine import get_all_tenant_ids
+from sqlalchemy import event
+from sqlalchemy import pool
+from sqlalchemy import text
 from sqlalchemy.engine.base import Connection
-from typing import Literal
+import os
+import ssl
 import asyncio
-from logging.config import fileConfig
 import logging
+from logging.config import fileConfig

 from alembic import context
-from sqlalchemy import pool
 from sqlalchemy.ext.asyncio import create_async_engine
-from sqlalchemy.sql import text
 from sqlalchemy.sql.schema import SchemaItem
-
-from shared_configs.configs import MULTI_TENANT
-from danswer.db.engine import build_connection_string
-from danswer.db.models import Base
+from onyx.configs.constants import SSL_CERT_FILE
+from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
+from onyx.db.models import Base
 from celery.backends.database.session import ResultModelBase  # type: ignore
-from danswer.db.engine import get_all_tenant_ids
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA

 # Alembic Config object
 config = context.config

-# Interpret the config file for Python logging.
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
    fileConfig(config.config_file_name)

-# Add your model's MetaData object here for 'autogenerate' support
 target_metadata = [Base.metadata, ResultModelBase.metadata]

 EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
-
-# Set up logging
 logger = logging.getLogger(__name__)

+ssl_context: ssl.SSLContext | None = None
+if USE_IAM_AUTH:
+    if not os.path.exists(SSL_CERT_FILE):
+        raise FileNotFoundError(f"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.")
+    ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)
+

 def include_object(
    object: SchemaItem,
@@ -49,20 +59,12 @@ def include_object(
    reflected: bool,
    compare_to: SchemaItem | None,
 ) -> bool:
-    """
-    Determines whether a database object should be included in migrations.
-    Excludes specified tables from migrations.
-    """
    if type_ == "table" and name in EXCLUDE_TABLES:
        return False
    return True


 def get_schema_options() -> tuple[str, bool, bool]:
-    """
-    Parses command-line options passed via '-x' in Alembic commands.
-    Recognizes 'schema', 'create_schema', and 'upgrade_all_tenants' options.
-    """
    x_args_raw = context.get_x_argument()
    x_args = {}
    for arg in x_args_raw:
@@ -90,16 +92,12 @@ def get_schema_options() -> tuple[str, bool, bool]:
 def do_run_migrations(
    connection: Connection, schema_name: str, create_schema: bool
 ) -> None:
-    """
-    Executes migrations in the specified schema.
-    """
    logger.info(f"About to migrate schema: {schema_name}")

    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
        connection.execute(text("COMMIT"))

-    # Set search_path to the target schema
    connection.execute(text(f'SET search_path TO "{schema_name}"'))

    context.configure(
@@ -117,11 +115,25 @@ def do_run_migrations(
        context.run_migrations()


+def provide_iam_token_for_alembic(
+    dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+) -> None:
+    if USE_IAM_AUTH:
+        # Database connection settings
+        region = AWS_REGION_NAME
+        host = POSTGRES_HOST
+        port = POSTGRES_PORT
+        user = POSTGRES_USER
+
+        # Get IAM authentication token
+        token = get_iam_auth_token(host, port, user, region)
+
+        # For Alembic / SQLAlchemy in this context, set SSL and password
+        cparams["password"] = token
+        cparams["ssl"] = ssl_context
+
+
 async def run_async_migrations() -> None:
-    """
-    Determines whether to run migrations for a single schema or all schemas,
-    and executes migrations accordingly.
-    """
    schema_name, create_schema, upgrade_all_tenants = get_schema_options()

    engine = create_async_engine(
@@ -129,10 +141,16 @@ async def run_async_migrations() -> None:
        poolclass=pool.NullPool,
    )

-    if upgrade_all_tenants:
-        # Run migrations for all tenant schemas sequentially
-        tenant_schemas = get_all_tenant_ids()
+    if USE_IAM_AUTH:

+        @event.listens_for(engine.sync_engine, "do_connect")
+        def event_provide_iam_token_for_alembic(
+            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+        ) -> None:
+            provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
+
+    if upgrade_all_tenants:
+        tenant_schemas = get_all_tenant_ids()
        for schema in tenant_schemas:
            try:
                logger.info(f"Migrating schema: {schema}")
@@ -162,15 +180,20 @@ async def run_async_migrations() -> None:


 def run_migrations_offline() -> None:
-    """
-    Run migrations in 'offline' mode.
-    """
    schema_name, _, upgrade_all_tenants = get_schema_options()
    url = build_connection_string()

    if upgrade_all_tenants:
-        # Run offline migrations for all tenant schemas
        engine = create_async_engine(url)
+
+        if USE_IAM_AUTH:
+
+            @event.listens_for(engine.sync_engine, "do_connect")
+            def event_provide_iam_token_for_alembic_offline(
+                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+            ) -> None:
+                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
+
        tenant_schemas = get_all_tenant_ids()
        engine.sync_engine.dispose()

@@ -207,9 +230,6 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    """
-    Runs migrations in 'online' mode using an asynchronous engine.
-    """
    asyncio.run(run_async_migrations())


--- a/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
+++ b/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
@@ -0,0 +1,29 @@
+"""add shortcut option for users
+
+Revision ID: 027381bce97c
+Revises: 6fc7886d665d
+Create Date: 2025-01-14 12:14:00.814390
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "027381bce97c"
+down_revision = "6fc7886d665d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "shortcut_enabled", sa.Boolean(), nullable=False, server_default="false"
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "shortcut_enabled")
--- a/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
+++ b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
@@ -11,7 +11,7 @@ from sqlalchemy.sql import table
 from sqlalchemy.dialects import postgresql
 import json

-from danswer.utils.encryption import encrypt_string_to_bytes
+from onyx.utils.encryption import encrypt_string_to_bytes

 # revision identifiers, used by Alembic.
 revision = "0a98909f2757"
--- a/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
+++ b/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
@@ -0,0 +1,36 @@
+"""add index to index_attempt.time_created
+
+Revision ID: 0f7ff6d75b57
+Revises: 369644546676
+Create Date: 2025-01-10 14:01:14.067144
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "0f7ff6d75b57"
+down_revision = "fec3db967bf7"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        op.f("ix_index_attempt_status"),
+        "index_attempt",
+        ["status"],
+        unique=False,
+    )
+
+    op.create_index(
+        op.f("ix_index_attempt_time_created"),
+        "index_attempt",
+        ["time_created"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_index_attempt_time_created"), table_name="index_attempt")
+
+    op.drop_index(op.f("ix_index_attempt_status"), table_name="index_attempt")
--- a/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py
+++ b/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py
@@ -1,4 +1,4 @@
-"""Introduce Danswer APIs
+"""Introduce Onyx APIs

 Revision ID: 15326fcec57e
 Revises: 77d07dffae64
@@ -8,7 +8,7 @@ Create Date: 2023-11-11 20:51:24.228999
 from alembic import op
 import sqlalchemy as sa

-from danswer.configs.constants import DocumentSource
+from onyx.configs.constants import DocumentSource

 # revision identifiers, used by Alembic.
 revision = "15326fcec57e"
--- a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
+++ b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
@@ -10,7 +10,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
+from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS

 # revision identifiers, used by Alembic.
 revision = "1f60f60c3401"
--- a/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
+++ b/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
@@ -0,0 +1,24 @@
+"""add chunk count to document
+
+Revision ID: 2955778aa44c
+Revises: c0aab6edb6dd
+Create Date: 2025-01-04 11:39:43.268612
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2955778aa44c"
+down_revision = "c0aab6edb6dd"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column("document", sa.Column("chunk_count", sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("document", "chunk_count")
--- a/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
+++ b/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
@@ -0,0 +1,121 @@
+"""properly_cascade
+
+Revision ID: 35e518e0ddf4
+Revises: 91a0a4d62b14
+Create Date: 2024-09-20 21:24:04.891018
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "35e518e0ddf4"
+down_revision = "91a0a4d62b14"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Update chat_message foreign key constraint
+    op.drop_constraint(
+        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_message_chat_session_id_fkey",
+        "chat_message",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Update chat_message__search_doc foreign key constraints
+    op.drop_constraint(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+
+    op.create_foreign_key(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        "search_doc",
+        ["search_doc_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Add CASCADE delete for tool_call foreign key
+    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
+    op.create_foreign_key(
+        "tool_call_message_id_fkey",
+        "tool_call",
+        "chat_message",
+        ["message_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    # Revert chat_message foreign key constraint
+    op.drop_constraint(
+        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_message_chat_session_id_fkey",
+        "chat_message",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+    )
+
+    # Revert chat_message__search_doc foreign key constraints
+    op.drop_constraint(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+
+    op.create_foreign_key(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        "search_doc",
+        ["search_doc_id"],
+        ["id"],
+    )
+
+    # Revert tool_call foreign key constraint
+    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
+    op.create_foreign_key(
+        "tool_call_message_id_fkey",
+        "tool_call",
+        "chat_message",
+        ["message_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
+++ b/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
@@ -0,0 +1,35 @@
+"""add composite index for index attempt time updated
+
+Revision ID: 369644546676
+Revises: 2955778aa44c
+Create Date: 2025-01-08 15:38:17.224380
+
+"""
+from alembic import op
+from sqlalchemy import text
+
+# revision identifiers, used by Alembic.
+revision = "369644546676"
+down_revision = "2955778aa44c"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        "ix_index_attempt_ccpair_search_settings_time_updated",
+        "index_attempt",
+        [
+            "connector_credential_pair_id",
+            "search_settings_id",
+            text("time_updated DESC"),
+        ],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_index_attempt_ccpair_search_settings_time_updated",
+        table_name="index_attempt",
+    )
--- a/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
+++ b/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
@@ -0,0 +1,59 @@
+"""add back input prompts
+
+Revision ID: 3c6531f32351
+Revises: aeda5f2df4f6
+Create Date: 2025-01-13 12:49:51.705235
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+# revision identifiers, used by Alembic.
+revision = "3c6531f32351"
+down_revision = "aeda5f2df4f6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "inputprompt",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("prompt", sa.String(), nullable=False),
+        sa.Column("content", sa.String(), nullable=False),
+        sa.Column("active", sa.Boolean(), nullable=False),
+        sa.Column("is_public", sa.Boolean(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "inputprompt__user",
+        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
+        ),
+        sa.Column("disabled", sa.Boolean(), nullable=False, default=False),
+        sa.ForeignKeyConstraint(
+            ["input_prompt_id"],
+            ["inputprompt.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("inputprompt__user")
+    op.drop_table("inputprompt")
--- a/backend/alembic/versions/46625e4745d4_remove_native_enum.py
+++ b/backend/alembic/versions/46625e4745d4_remove_native_enum.py
@@ -17,7 +17,7 @@ depends_on: None = None

 def upgrade() -> None:
    # At this point, we directly changed some previous migrations,
-    # https://github.com/danswer-ai/danswer/pull/637
+    # https://github.com/onyx-dot-app/onyx/pull/637
    # Due to using Postgres native Enums, it caused some complications for first time users.
    # To remove those complications, all Enums are only handled application side moving forward.
    # This migration exists to ensure that existing users don't run into upgrade issues.
--- a/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
+++ b/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
@@ -40,6 +40,6 @@ def upgrade() -> None:


 def downgrade() -> None:
-    op.drop_constraint("fk_persona_category", "persona", type_="foreignkey")
+    op.drop_constraint("persona_category_id_fkey", "persona", type_="foreignkey")
    op.drop_column("persona", "category_id")
    op.drop_table("persona_category")
--- a/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
+++ b/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
@@ -10,8 +10,8 @@ from typing import cast
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.orm import Session
-from danswer.key_value_store.factory import get_kv_store
-from danswer.db.models import SlackBot
+from onyx.key_value_store.factory import get_kv_store
+from onyx.db.models import SlackBot
 from sqlalchemy.dialects import postgresql

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py
+++ b/backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py
@@ -0,0 +1,23 @@
+"""danswerbot -> onyxbot
+
+Revision ID: 54a74a0417fc
+Revises: 94dc3d0236f8
+Create Date: 2024-12-11 18:05:05.490737
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "54a74a0417fc"
+down_revision = "94dc3d0236f8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.alter_column("chat_session", "danswerbot_flow", new_column_name="onyxbot_flow")
+
+
+def downgrade() -> None:
+    op.alter_column("chat_session", "onyxbot_flow", new_column_name="danswerbot_flow")
--- a/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py
+++ b/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py
@@ -1,4 +1,4 @@
-"""Track Danswerbot Explicitly
+"""Track Onyxbot Explicitly

 Revision ID: 570282d33c49
 Revises: 7547d982db8f
--- a/backend/alembic/versions/6fc7886d665d_make_categories_labels_and_many_to_many.py
+++ b/backend/alembic/versions/6fc7886d665d_make_categories_labels_and_many_to_many.py
@@ -0,0 +1,80 @@
+"""make categories labels and many to many
+
+Revision ID: 6fc7886d665d
+Revises: 3c6531f32351
+Create Date: 2025-01-13 18:12:18.029112
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "6fc7886d665d"
+down_revision = "3c6531f32351"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Rename persona_category table to persona_label
+    op.rename_table("persona_category", "persona_label")
+
+    # Create the new association table
+    op.create_table(
+        "persona__persona_label",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("persona_label_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["persona_label_id"],
+            ["persona_label.id"],
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "persona_label_id"),
+    )
+
+    # Copy existing relationships to the new table
+    op.execute(
+        """
+        INSERT INTO persona__persona_label (persona_id, persona_label_id)
+        SELECT id, category_id FROM persona WHERE category_id IS NOT NULL
+    """
+    )
+
+    # Remove the old category_id column from persona table
+    op.drop_column("persona", "category_id")
+
+
+def downgrade() -> None:
+    # Rename persona_label table back to persona_category
+    op.rename_table("persona_label", "persona_category")
+
+    # Add back the category_id column to persona table
+    op.add_column("persona", sa.Column("category_id", sa.Integer(), nullable=True))
+    op.create_foreign_key(
+        "persona_category_id_fkey",
+        "persona",
+        "persona_category",
+        ["category_id"],
+        ["id"],
+    )
+
+    # Copy the first label relationship back to the persona table
+    op.execute(
+        """
+        UPDATE persona
+        SET category_id = (
+            SELECT persona_label_id
+            FROM persona__persona_label
+            WHERE persona__persona_label.persona_id = persona.id
+            LIMIT 1
+        )
+    """
+    )
+
+    # Drop the association table
+    op.drop_table("persona__persona_label")
--- a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
+++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
@@ -9,7 +9,7 @@ import json
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
-from danswer.key_value_store.factory import get_kv_store
+from onyx.key_value_store.factory import get_kv_store

 # revision identifiers, used by Alembic.
 revision = "703313b75876"
--- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
+++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
@@ -8,9 +8,9 @@ Create Date: 2024-03-22 21:34:27.629444
 from alembic import op
 import sqlalchemy as sa

-from danswer.db.models import IndexModelStatus
-from danswer.context.search.enums import RecencyBiasSetting
-from danswer.context.search.enums import SearchType
+from onyx.db.models import IndexModelStatus
+from onyx.context.search.enums import RecencyBiasSetting
+from onyx.context.search.enums import SearchType

 # revision identifiers, used by Alembic.
 revision = "776b3bbe9092"
--- a/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py
+++ b/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py
@@ -18,7 +18,7 @@ depends_on: None = None

 def upgrade() -> None:
    # In a PR:
-    # https://github.com/danswer-ai/danswer/pull/397/files#diff-f05fb341f6373790b91852579631b64ca7645797a190837156a282b67e5b19c2
+    # https://github.com/onyx-dot-app/onyx/pull/397/files#diff-f05fb341f6373790b91852579631b64ca7645797a190837156a282b67e5b19c2
    # we directly changed some previous migrations. This caused some users to have native enums
    # while others wouldn't. This has caused some issues when adding new fields to these enums.
    # This migration manually changes the enum types to ensure that nobody uses native enums.
--- a/backend/alembic/versions/91a0a4d62b14_milestone.py
+++ b/backend/alembic/versions/91a0a4d62b14_milestone.py
@@ -0,0 +1,45 @@
+"""Milestone
+
+Revision ID: 91a0a4d62b14
+Revises: dab04867cd88
+Create Date: 2024-12-13 19:03:30.947551
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "91a0a4d62b14"
+down_revision = "dab04867cd88"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "milestone",
+        sa.Column("id", sa.UUID(), nullable=False),
+        sa.Column("tenant_id", sa.String(), nullable=True),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("event_type", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("milestone")
--- a/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py
+++ b/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py
@@ -7,7 +7,7 @@ Create Date: 2024-03-21 12:05:23.956734
 """
 from alembic import op
 import sqlalchemy as sa
-from danswer.configs.constants import DocumentSource
+from onyx.configs.constants import DocumentSource

 # revision identifiers, used by Alembic.
 revision = "91fd3b470d1a"
--- a/backend/alembic/versions/949b4a92a401_remove_rt.py
+++ b/backend/alembic/versions/949b4a92a401_remove_rt.py
@@ -10,7 +10,7 @@ from sqlalchemy.orm import Session
 from sqlalchemy import text

 # Import your models and constants
-from danswer.db.models import (
+from onyx.db.models import (
    Connector,
    ConnectorCredentialPair,
    Credential,
--- a/backend/alembic/versions/94dc3d0236f8_make_document_set_description_optional.py
+++ b/backend/alembic/versions/94dc3d0236f8_make_document_set_description_optional.py
@@ -0,0 +1,30 @@
+"""make document set description optional
+
+Revision ID: 94dc3d0236f8
+Revises: bf7a81109301
+Create Date: 2024-12-11 11:26:10.616722
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "94dc3d0236f8"
+down_revision = "bf7a81109301"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Make document_set.description column nullable
+    op.alter_column(
+        "document_set", "description", existing_type=sa.String(), nullable=True
+    )
+
+
+def downgrade() -> None:
+    # Revert document_set.description column to non-nullable
+    op.alter_column(
+        "document_set", "description", existing_type=sa.String(), nullable=False
+    )
--- a/backend/alembic/versions/97dbb53fa8c8_add_syncrecord.py
+++ b/backend/alembic/versions/97dbb53fa8c8_add_syncrecord.py
@@ -0,0 +1,72 @@
+"""Add SyncRecord
+
+Revision ID: 97dbb53fa8c8
+Revises: 369644546676
+Create Date: 2025-01-11 19:39:50.426302
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "97dbb53fa8c8"
+down_revision = "be2ab2aa50ee"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "sync_record",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("entity_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "sync_type",
+            sa.Enum(
+                "DOCUMENT_SET",
+                "USER_GROUP",
+                "CONNECTOR_DELETION",
+                name="synctype",
+                native_enum=False,
+                length=40,
+            ),
+            nullable=False,
+        ),
+        sa.Column(
+            "sync_status",
+            sa.Enum(
+                "IN_PROGRESS",
+                "SUCCESS",
+                "FAILED",
+                "CANCELED",
+                name="syncstatus",
+                native_enum=False,
+                length=40,
+            ),
+            nullable=False,
+        ),
+        sa.Column("num_docs_synced", sa.Integer(), nullable=False),
+        sa.Column("sync_start_time", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("sync_end_time", sa.DateTime(timezone=True), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Add index for fetch_latest_sync_record query
+    op.create_index(
+        "ix_sync_record_entity_id_sync_type_sync_start_time",
+        "sync_record",
+        ["entity_id", "sync_type", "sync_start_time"],
+    )
+
+    # Add index for cleanup_sync_records query
+    op.create_index(
+        "ix_sync_record_entity_id_sync_type_sync_status",
+        "sync_record",
+        ["entity_id", "sync_type", "sync_status"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_sync_record_entity_id_sync_type_sync_status")
+    op.drop_index("ix_sync_record_entity_id_sync_type_sync_start_time")
+    op.drop_table("sync_record")
--- a/backend/alembic/versions/9aadf32dfeb4_add_user_files.py
+++ b/backend/alembic/versions/9aadf32dfeb4_add_user_files.py
@@ -0,0 +1,75 @@
+"""add user files
+
+Revision ID: 9aadf32dfeb4
+Revises: f1ca58b2f2ec
+Create Date: 2025-01-26 16:08:21.551022
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import datetime
+
+
+# revision identifiers, used by Alembic.
+revision = "9aadf32dfeb4"
+down_revision = "f1ca58b2f2ec"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create user_folder table without parent_id
+    op.create_table(
+        "user_folder",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+        sa.Column("name", sa.String(length=255), nullable=True),
+        sa.Column("description", sa.String(length=255), nullable=True),
+        sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
+        sa.Column("created_at", sa.DateTime(), default=datetime.datetime.utcnow),
+    )
+
+    # Create user_file table with folder_id instead of parent_folder_id
+    op.create_table(
+        "user_file",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+        sa.Column(
+            "folder_id",
+            sa.Integer(),
+            sa.ForeignKey("user_folder.id"),
+            nullable=True,
+        ),
+        sa.Column("file_type", sa.String(), nullable=True),
+        sa.Column("file_id", sa.String(length=255), nullable=False),
+        sa.Column("document_id", sa.String(length=255), nullable=False),
+        sa.Column("name", sa.String(length=255), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(),
+            default=datetime.datetime.utcnow,
+        ),
+    )
+
+    # Create persona__user_file table
+    op.create_table(
+        "persona__user_file",
+        sa.Column(
+            "persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True
+        ),
+        sa.Column(
+            "user_file_id",
+            sa.Integer(),
+            sa.ForeignKey("user_file.id"),
+            primary_key=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    # Drop the persona__user_file table
+    op.drop_table("persona__user_file")
+    # Drop the user_file table
+    op.drop_table("user_file")
+    # Drop the user_folder table
+    op.drop_table("user_folder")
--- a/backend/alembic/versions/9f696734098f_combine_search_and_chat.py
+++ b/backend/alembic/versions/9f696734098f_combine_search_and_chat.py
@@ -0,0 +1,36 @@
+"""Combine Search and Chat
+
+Revision ID: 9f696734098f
+Revises: a8c2065484e6
+Create Date: 2024-11-27 15:32:19.694972
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "9f696734098f"
+down_revision = "a8c2065484e6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.alter_column("chat_session", "description", nullable=True)
+    op.drop_column("chat_session", "one_shot")
+    op.drop_column("slack_channel_config", "response_type")
+
+
+def downgrade() -> None:
+    op.execute("UPDATE chat_session SET description = '' WHERE description IS NULL")
+    op.alter_column("chat_session", "description", nullable=False)
+    op.add_column(
+        "chat_session",
+        sa.Column("one_shot", sa.Boolean(), nullable=False, server_default=sa.false()),
+    )
+    op.add_column(
+        "slack_channel_config",
+        sa.Column(
+            "response_type", sa.String(), nullable=False, server_default="citations"
+        ),
+    )
--- a/backend/alembic/versions/aeda5f2df4f6_add_pinned_assistants.py
+++ b/backend/alembic/versions/aeda5f2df4f6_add_pinned_assistants.py
@@ -0,0 +1,27 @@
+"""add pinned assistants
+
+Revision ID: aeda5f2df4f6
+Revises: c5eae4a75a1b
+Create Date: 2025-01-09 16:04:10.770636
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "aeda5f2df4f6"
+down_revision = "c5eae4a75a1b"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user", sa.Column("pinned_assistants", postgresql.JSONB(), nullable=True)
+    )
+    op.execute('UPDATE "user" SET pinned_assistants = chosen_assistants')
+
+
+def downgrade() -> None:
+    op.drop_column("user", "pinned_assistants")
--- a/backend/alembic/versions/b156fa702355_chat_reworked.py
+++ b/backend/alembic/versions/b156fa702355_chat_reworked.py
@@ -10,7 +10,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 from sqlalchemy.dialects.postgresql import ENUM
-from danswer.configs.constants import DocumentSource
+from onyx.configs.constants import DocumentSource

 # revision identifiers, used by Alembic.
 revision = "b156fa702355"
--- a/backend/alembic/versions/be2ab2aa50ee_fix_capitalization.py
+++ b/backend/alembic/versions/be2ab2aa50ee_fix_capitalization.py
@@ -0,0 +1,38 @@
+"""fix_capitalization
+
+Revision ID: be2ab2aa50ee
+Revises: 369644546676
+Create Date: 2025-01-10 13:13:26.228960
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "be2ab2aa50ee"
+down_revision = "369644546676"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        UPDATE document
+        SET
+            external_user_group_ids = ARRAY(
+                SELECT LOWER(unnest(external_user_group_ids))
+            ),
+            last_modified = NOW()
+        WHERE
+            external_user_group_ids IS NOT NULL
+            AND external_user_group_ids::text[] <> ARRAY(
+                SELECT LOWER(unnest(external_user_group_ids))
+            )::text[]
+    """
+    )
+
+
+def downgrade() -> None:
+    # No way to cleanly persist the bad state through an upgrade/downgrade
+    # cycle, so we just pass
+    pass
--- a/backend/alembic/versions/bf7a81109301_delete_input_prompts.py
+++ b/backend/alembic/versions/bf7a81109301_delete_input_prompts.py
@@ -0,0 +1,57 @@
+"""delete_input_prompts
+
+Revision ID: bf7a81109301
+Revises: f7a894b06d02
+Create Date: 2024-12-09 12:00:49.884228
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+
+# revision identifiers, used by Alembic.
+revision = "bf7a81109301"
+down_revision = "f7a894b06d02"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_table("inputprompt__user")
+    op.drop_table("inputprompt")
+
+
+def downgrade() -> None:
+    op.create_table(
+        "inputprompt",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("prompt", sa.String(), nullable=False),
+        sa.Column("content", sa.String(), nullable=False),
+        sa.Column("active", sa.Boolean(), nullable=False),
+        sa.Column("is_public", sa.Boolean(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "inputprompt__user",
+        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["input_prompt_id"],
+            ["inputprompt.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["inputprompt.id"],
+        ),
+        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
+    )
--- a/backend/alembic/versions/c0aab6edb6dd_delete_workspace.py
+++ b/backend/alembic/versions/c0aab6edb6dd_delete_workspace.py
@@ -0,0 +1,87 @@
+"""delete workspace
+
+Revision ID: c0aab6edb6dd
+Revises: 35e518e0ddf4
+Create Date: 2024-12-17 14:37:07.660631
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "c0aab6edb6dd"
+down_revision = "35e518e0ddf4"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+    UPDATE connector
+    SET connector_specific_config = connector_specific_config - 'workspace'
+    WHERE source = 'SLACK'
+    """
+    )
+
+
+def downgrade() -> None:
+    import json
+    from sqlalchemy import text
+    from slack_sdk import WebClient
+
+    conn = op.get_bind()
+
+    # Fetch all Slack credentials
+    creds_result = conn.execute(
+        text("SELECT id, credential_json FROM credential WHERE source = 'SLACK'")
+    )
+    all_slack_creds = creds_result.fetchall()
+    if not all_slack_creds:
+        return
+
+    for cred_row in all_slack_creds:
+        credential_id, credential_json = cred_row
+
+        credential_json = (
+            credential_json.tobytes().decode("utf-8")
+            if isinstance(credential_json, memoryview)
+            else credential_json.decode("utf-8")
+        )
+        credential_data = json.loads(credential_json)
+        slack_bot_token = credential_data.get("slack_bot_token")
+        if not slack_bot_token:
+            print(
+                f"No slack_bot_token found for credential {credential_id}. "
+                "Your Slack connector will not function until you upgrade and provide a valid token."
+            )
+            continue
+
+        client = WebClient(token=slack_bot_token)
+        try:
+            auth_response = client.auth_test()
+            workspace = auth_response["url"].split("//")[1].split(".")[0]
+
+            # Update only the connectors linked to this credential
+            # (and which are Slack connectors).
+            op.execute(
+                f"""
+                UPDATE connector AS c
+                SET connector_specific_config = jsonb_set(
+                    connector_specific_config,
+                    '{{workspace}}',
+                    to_jsonb('{workspace}'::text)
+                )
+                FROM connector_credential_pair AS ccp
+                WHERE ccp.connector_id = c.id
+                  AND c.source = 'SLACK'
+                  AND ccp.credential_id = {credential_id}
+            """
+            )
+        except Exception:
+            print(
+                f"We were unable to get the workspace url for your Slack Connector with id {credential_id}."
+            )
+            print("This connector will no longer work until you upgrade.")
+            continue
--- a/backend/alembic/versions/c5eae4a75a1b_add_chat_message__standard_answer_table.py
+++ b/backend/alembic/versions/c5eae4a75a1b_add_chat_message__standard_answer_table.py
@@ -0,0 +1,36 @@
+"""Add chat_message__standard_answer table
+
+Revision ID: c5eae4a75a1b
+Revises: 0f7ff6d75b57
+Create Date: 2025-01-15 14:08:49.688998
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "c5eae4a75a1b"
+down_revision = "0f7ff6d75b57"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "chat_message__standard_answer",
+        sa.Column("chat_message_id", sa.Integer(), nullable=False),
+        sa.Column("standard_answer_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["chat_message_id"],
+            ["chat_message.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["standard_answer_id"],
+            ["standard_answer.id"],
+        ),
+        sa.PrimaryKeyConstraint("chat_message_id", "standard_answer_id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("chat_message__standard_answer")
--- a/backend/alembic/versions/c7bf5721733e_add_has_been_indexed_to_.py
+++ b/backend/alembic/versions/c7bf5721733e_add_has_been_indexed_to_.py
@@ -0,0 +1,48 @@
+"""Add has_been_indexed to DocumentByConnectorCredentialPair
+
+Revision ID: c7bf5721733e
+Revises: fec3db967bf7
+Create Date: 2025-01-13 12:39:05.831693
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "c7bf5721733e"
+down_revision = "027381bce97c"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # assume all existing rows have been indexed, no better approach
+    op.add_column(
+        "document_by_connector_credential_pair",
+        sa.Column("has_been_indexed", sa.Boolean(), nullable=True),
+    )
+    op.execute(
+        "UPDATE document_by_connector_credential_pair SET has_been_indexed = TRUE"
+    )
+    op.alter_column(
+        "document_by_connector_credential_pair",
+        "has_been_indexed",
+        nullable=False,
+    )
+
+    # Add index to optimize get_document_counts_for_cc_pairs query pattern
+    op.create_index(
+        "idx_document_cc_pair_counts",
+        "document_by_connector_credential_pair",
+        ["connector_id", "credential_id", "has_been_indexed"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    # Remove the index first before removing the column
+    op.drop_index(
+        "idx_document_cc_pair_counts",
+        table_name="document_by_connector_credential_pair",
+    )
+    op.drop_column("document_by_connector_credential_pair", "has_been_indexed")
--- a/backend/alembic/versions/dab04867cd88_add_composite_index_to_document_by_.py
+++ b/backend/alembic/versions/dab04867cd88_add_composite_index_to_document_by_.py
@@ -0,0 +1,32 @@
+"""Add composite index to document_by_connector_credential_pair
+
+Revision ID: dab04867cd88
+Revises: 54a74a0417fc
+Create Date: 2024-12-13 22:43:20.119990
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "dab04867cd88"
+down_revision = "54a74a0417fc"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Composite index on (connector_id, credential_id)
+    op.create_index(
+        "idx_document_cc_pair_connector_credential",
+        "document_by_connector_credential_pair",
+        ["connector_id", "credential_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "idx_document_cc_pair_connector_credential",
+        table_name="document_by_connector_credential_pair",
+    )
--- a/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py
+++ b/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py
@@ -1,4 +1,4 @@
-"""Danswer Custom Tool Flow
+"""Onyx Custom Tool Flow

 Revision ID: dba7f71618f5
 Revises: d5645c915d0e
--- a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py
+++ b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py
@@ -9,12 +9,12 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy import table, column, String, Integer, Boolean

-from danswer.db.search_settings import (
+from onyx.db.search_settings import (
    get_new_default_embedding_model,
    get_old_default_embedding_model,
    user_has_overridden_embedding_model,
 )
-from danswer.db.models import IndexModelStatus
+from onyx.db.models import IndexModelStatus

 # revision identifiers, used by Alembic.
 revision = "dbaa756c2ccf"
--- a/backend/alembic/versions/e50154680a5c_no_source_enum.py
+++ b/backend/alembic/versions/e50154680a5c_no_source_enum.py
@@ -8,7 +8,7 @@ Create Date: 2024-03-14 18:06:08.523106
 from alembic import op
 import sqlalchemy as sa

-from danswer.configs.constants import DocumentSource
+from onyx.configs.constants import DocumentSource

 # revision identifiers, used by Alembic.
 revision = "e50154680a5c"
--- a/backend/alembic/versions/f1ca58b2f2ec_add_passthrough_auth_to_tool.py
+++ b/backend/alembic/versions/f1ca58b2f2ec_add_passthrough_auth_to_tool.py
@@ -0,0 +1,33 @@
+"""add passthrough auth to tool
+
+Revision ID: f1ca58b2f2ec
+Revises: c7bf5721733e
+Create Date: 2024-03-19
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = "f1ca58b2f2ec"
+down_revision: Union[str, None] = "c7bf5721733e"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Add passthrough_auth column to tool table with default value of False
+    op.add_column(
+        "tool",
+        sa.Column(
+            "passthrough_auth", sa.Boolean(), nullable=False, server_default=sa.false()
+        ),
+    )
+
+
+def downgrade() -> None:
+    # Remove passthrough_auth column from tool table
+    op.drop_column("tool", "passthrough_auth")
--- a/backend/alembic/versions/f7a894b06d02_non_nullbale_slack_bot_id_in_channel_.py
+++ b/backend/alembic/versions/f7a894b06d02_non_nullbale_slack_bot_id_in_channel_.py
@@ -0,0 +1,40 @@
+"""non-nullbale slack bot id in channel config
+
+Revision ID: f7a894b06d02
+Revises: 9f696734098f
+Create Date: 2024-12-06 12:55:42.845723
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "f7a894b06d02"
+down_revision = "9f696734098f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Delete all rows with null slack_bot_id
+    op.execute("DELETE FROM slack_channel_config WHERE slack_bot_id IS NULL")
+
+    # Make slack_bot_id non-nullable
+    op.alter_column(
+        "slack_channel_config",
+        "slack_bot_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+
+
+def downgrade() -> None:
+    # Make slack_bot_id nullable again
+    op.alter_column(
+        "slack_channel_config",
+        "slack_bot_id",
+        existing_type=sa.Integer(),
+        nullable=True,
+    )
--- a/backend/alembic/versions/fec3db967bf7_add_time_updated_to_usergroup_and_.py
+++ b/backend/alembic/versions/fec3db967bf7_add_time_updated_to_usergroup_and_.py
@@ -0,0 +1,41 @@
+"""Add time_updated to UserGroup and DocumentSet
+
+Revision ID: fec3db967bf7
+Revises: 97dbb53fa8c8
+Create Date: 2025-01-12 15:49:02.289100
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "fec3db967bf7"
+down_revision = "97dbb53fa8c8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "document_set",
+        sa.Column(
+            "time_last_modified_by_user",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.add_column(
+        "user_group",
+        sa.Column(
+            "time_last_modified_by_user",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user_group", "time_last_modified_by_user")
+    op.drop_column("document_set", "time_last_modified_by_user")
--- a/backend/alembic_tenants/README.md
+++ b/backend/alembic_tenants/README.md
@@ -1,3 +1,3 @@
 These files are for public table migrations when operating with multi tenancy.

-If you are not a Danswer developer, you can ignore this directory entirely.
+If you are not a Onyx developer, you can ignore this directory entirely.
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -8,8 +8,8 @@ from sqlalchemy.ext.asyncio import create_async_engine
 from sqlalchemy.schema import SchemaItem

 from alembic import context
-from danswer.db.engine import build_connection_string
-from danswer.db.models import PublicBase
+from onyx.db.engine import build_connection_string
+from onyx.db.models import PublicBase

 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
--- a/backend/alembic_tenants/versions/a4f6ee863c47_mapping_for_anonymous_user_path.py
+++ b/backend/alembic_tenants/versions/a4f6ee863c47_mapping_for_anonymous_user_path.py
@@ -0,0 +1,31 @@
+"""mapping for anonymous user path
+
+Revision ID: a4f6ee863c47
+Revises: 14a83a331951
+Create Date: 2025-01-04 14:16:58.697451
+
+"""
+import sqlalchemy as sa
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "a4f6ee863c47"
+down_revision = "14a83a331951"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tenant_anonymous_user_path",
+        sa.Column("tenant_id", sa.String(), primary_key=True, nullable=False),
+        sa.Column("anonymous_user_path", sa.String(), nullable=False),
+        sa.PrimaryKeyConstraint("tenant_id"),
+        sa.UniqueConstraint("anonymous_user_path"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("tenant_anonymous_user_path")
--- a/backend/danswer/init.py
+++ b/backend/danswer/init.py
@@ -1,3 +0,0 @@
-import os
-
-__version__ = os.environ.get("DANSWER_VERSION", "") or "Development"
--- a/backend/danswer/background/celery/apps/beat.py
+++ b/backend/danswer/background/celery/apps/beat.py
@@ -1,172 +0,0 @@
-from datetime import timedelta
-from typing import Any
-
-from celery import Celery
-from celery import signals
-from celery.beat import PersistentScheduler  # type: ignore
-from celery.signals import beat_init
-
-import danswer.background.celery.apps.app_base as app_base
-from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME
-from danswer.db.engine import get_all_tenant_ids
-from danswer.db.engine import SqlEngine
-from danswer.utils.logger import setup_logger
-from danswer.utils.variable_functionality import fetch_versioned_implementation
-from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
-from shared_configs.configs import MULTI_TENANT
-
-logger = setup_logger(__name__)
-
-celery_app = Celery(__name__)
-celery_app.config_from_object("danswer.background.celery.configs.beat")
-
-
-class DynamicTenantScheduler(PersistentScheduler):
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        logger.info("Initializing DynamicTenantScheduler")
-        super().__init__(*args, **kwargs)
-        self._reload_interval = timedelta(minutes=2)
-        self._last_reload = self.app.now() - self._reload_interval
-        # Let the parent class handle store initialization
-        self.setup_schedule()
-        self._update_tenant_tasks()
-        logger.info(f"Set reload interval to {self._reload_interval}")
-
-    def setup_schedule(self) -> None:
-        logger.info("Setting up initial schedule")
-        super().setup_schedule()
-        logger.info("Initial schedule setup complete")
-
-    def tick(self) -> float:
-        retval = super().tick()
-        now = self.app.now()
-        if (
-            self._last_reload is None
-            or (now - self._last_reload) > self._reload_interval
-        ):
-            logger.info("Reload interval reached, initiating tenant task update")
-            self._update_tenant_tasks()
-            self._last_reload = now
-            logger.info("Tenant task update completed, reset reload timer")
-        return retval
-
-    def _update_tenant_tasks(self) -> None:
-        logger.info("Starting tenant task update process")
-        try:
-            logger.info("Fetching all tenant IDs")
-            tenant_ids = get_all_tenant_ids()
-            logger.info(f"Found {len(tenant_ids)} tenants")
-
-            logger.info("Fetching tasks to schedule")
-            tasks_to_schedule = fetch_versioned_implementation(
-                "danswer.background.celery.tasks.beat_schedule", "get_tasks_to_schedule"
-            )
-
-            new_beat_schedule: dict[str, dict[str, Any]] = {}
-
-            current_schedule = self.schedule.items()
-
-            existing_tenants = set()
-            for task_name, _ in current_schedule:
-                if "-" in task_name:
-                    existing_tenants.add(task_name.split("-")[-1])
-            logger.info(f"Found {len(existing_tenants)} existing tenants in schedule")
-
-            for tenant_id in tenant_ids:
-                if (
-                    IGNORED_SYNCING_TENANT_LIST
-                    and tenant_id in IGNORED_SYNCING_TENANT_LIST
-                ):
-                    logger.info(
-                        f"Skipping tenant {tenant_id} as it is in the ignored syncing list"
-                    )
-                    continue
-
-                if tenant_id not in existing_tenants:
-                    logger.info(f"Processing new tenant: {tenant_id}")
-
-                for task in tasks_to_schedule():
-                    task_name = f"{task['name']}-{tenant_id}"
-                    logger.debug(f"Creating task configuration for {task_name}")
-                    new_task = {
-                        "task": task["task"],
-                        "schedule": task["schedule"],
-                        "kwargs": {"tenant_id": tenant_id},
-                    }
-                    if options := task.get("options"):
-                        logger.debug(f"Adding options to task {task_name}: {options}")
-                        new_task["options"] = options
-                    new_beat_schedule[task_name] = new_task
-
-            if self._should_update_schedule(current_schedule, new_beat_schedule):
-                logger.info(
-                    "Schedule update required",
-                    extra={
-                        "new_tasks": len(new_beat_schedule),
-                        "current_tasks": len(current_schedule),
-                    },
-                )
-
-                # Create schedule entries
-                entries = {}
-                for name, entry in new_beat_schedule.items():
-                    entries[name] = self.Entry(
-                        name=name,
-                        app=self.app,
-                        task=entry["task"],
-                        schedule=entry["schedule"],
-                        options=entry.get("options", {}),
-                        kwargs=entry.get("kwargs", {}),
-                    )
-
-                # Update the schedule using the scheduler's methods
-                self.schedule.clear()
-                self.schedule.update(entries)
-
-                # Ensure changes are persisted
-                self.sync()
-
-                logger.info("Schedule update completed successfully")
-            else:
-                logger.info("Schedule is up to date, no changes needed")
-
-        except (AttributeError, KeyError):
-            logger.exception("Failed to process task configuration")
-        except Exception:
-            logger.exception("Unexpected error updating tenant tasks")
-
-    def _should_update_schedule(
-        self, current_schedule: dict, new_schedule: dict
-    ) -> bool:
-        """Compare schedules to determine if an update is needed."""
-        logger.debug("Comparing current and new schedules")
-        current_tasks = set(name for name, _ in current_schedule)
-        new_tasks = set(new_schedule.keys())
-        needs_update = current_tasks != new_tasks
-        logger.debug(f"Schedule update needed: {needs_update}")
-        return needs_update
-
-
-@beat_init.connect
-def on_beat_init(sender: Any, **kwargs: Any) -> None:
-    logger.info("beat_init signal received.")
-
-    # Celery beat shouldn't touch the db at all. But just setting a low minimum here.
-    SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME)
-    SqlEngine.init_engine(pool_size=2, max_overflow=0)
-
-    # Startup checks are not needed in multi-tenant case
-    if MULTI_TENANT:
-        return
-
-    app_base.wait_for_redis(sender, **kwargs)
-
-
-@signals.setup_logging.connect
-def on_setup_logging(
-    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
-) -> None:
-    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
-
-
-celery_app.conf.beat_scheduler = DynamicTenantScheduler
--- a/backend/danswer/background/celery/celery_redis.py
+++ b/backend/danswer/background/celery/celery_redis.py
@@ -1,25 +0,0 @@
-# These are helper objects for tracking the keys we need to write in redis
-from typing import cast
-
-from redis import Redis
-
-from danswer.background.celery.configs.base import CELERY_SEPARATOR
-from danswer.configs.constants import DanswerCeleryPriority
-
-
-def celery_get_queue_length(queue: str, r: Redis) -> int:
-    """This is a redis specific way to get the length of a celery queue.
-    It is priority aware and knows how to count across the multiple redis lists
-    used to implement task prioritization.
-    This operation is not atomic."""
-    total_length = 0
-    for i in range(len(DanswerCeleryPriority)):
-        queue_name = queue
-        if i > 0:
-            queue_name += CELERY_SEPARATOR
-            queue_name += str(i)
-
-        length = r.llen(queue_name)
-        total_length += cast(int, length)
-
-    return total_length
--- a/backend/danswer/background/celery/tasks/beat_schedule.py
+++ b/backend/danswer/background/celery/tasks/beat_schedule.py
@@ -1,61 +0,0 @@
-from datetime import timedelta
-from typing import Any
-
-from danswer.configs.constants import DanswerCeleryPriority
-from danswer.configs.constants import DanswerCeleryTask
-
-
-tasks_to_schedule = [
-    {
-        "name": "check-for-vespa-sync",
-        "task": DanswerCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
-        "schedule": timedelta(seconds=20),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-connector-deletion",
-        "task": DanswerCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
-        "schedule": timedelta(seconds=20),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-indexing",
-        "task": DanswerCeleryTask.CHECK_FOR_INDEXING,
-        "schedule": timedelta(seconds=15),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-prune",
-        "task": DanswerCeleryTask.CHECK_FOR_PRUNING,
-        "schedule": timedelta(seconds=15),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "kombu-message-cleanup",
-        "task": DanswerCeleryTask.KOMBU_MESSAGE_CLEANUP_TASK,
-        "schedule": timedelta(seconds=3600),
-        "options": {"priority": DanswerCeleryPriority.LOWEST},
-    },
-    {
-        "name": "monitor-vespa-sync",
-        "task": DanswerCeleryTask.MONITOR_VESPA_SYNC,
-        "schedule": timedelta(seconds=5),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-doc-permissions-sync",
-        "task": DanswerCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
-        "schedule": timedelta(seconds=30),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-    {
-        "name": "check-for-external-group-sync",
-        "task": DanswerCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
-        "schedule": timedelta(seconds=20),
-        "options": {"priority": DanswerCeleryPriority.HIGH},
-    },
-]
-
-
-def get_tasks_to_schedule() -> list[dict[str, Any]]:
-    return tasks_to_schedule
--- a/backend/danswer/background/celery/versioned_apps/primary.py
+++ b/backend/danswer/background/celery/versioned_apps/primary.py
@@ -1,10 +0,0 @@
-"""Factory stub for running celery worker / celery beat."""
-from celery import Celery
-
-from danswer.utils.variable_functionality import fetch_versioned_implementation
-from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable
-
-set_is_ee_based_on_env_variable()
-app: Celery = fetch_versioned_implementation(
-    "danswer.background.celery.apps.primary", "celery_app"
-)
--- a/backend/danswer/background/indexing/run_indexing.py
+++ b/backend/danswer/background/indexing/run_indexing.py
@@ -1,464 +0,0 @@
-import time
-import traceback
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
-
-from sqlalchemy.orm import Session
-
-from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt
-from danswer.background.indexing.tracer import DanswerTracer
-from danswer.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD
-from danswer.configs.app_configs import INDEXING_TRACER_INTERVAL
-from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET
-from danswer.connectors.connector_runner import ConnectorRunner
-from danswer.connectors.factory import instantiate_connector
-from danswer.connectors.models import IndexAttemptMetadata
-from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id
-from danswer.db.connector_credential_pair import get_last_successful_attempt_time
-from danswer.db.connector_credential_pair import update_connector_credential_pair
-from danswer.db.engine import get_session_with_tenant
-from danswer.db.enums import ConnectorCredentialPairStatus
-from danswer.db.index_attempt import mark_attempt_canceled
-from danswer.db.index_attempt import mark_attempt_failed
-from danswer.db.index_attempt import mark_attempt_partially_succeeded
-from danswer.db.index_attempt import mark_attempt_succeeded
-from danswer.db.index_attempt import transition_attempt_to_in_progress
-from danswer.db.index_attempt import update_docs_indexed
-from danswer.db.models import IndexAttempt
-from danswer.db.models import IndexingStatus
-from danswer.db.models import IndexModelStatus
-from danswer.document_index.factory import get_default_document_index
-from danswer.indexing.embedder import DefaultIndexingEmbedder
-from danswer.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from danswer.indexing.indexing_pipeline import build_indexing_pipeline
-from danswer.utils.logger import setup_logger
-from danswer.utils.logger import TaskAttemptSingleton
-from danswer.utils.variable_functionality import global_version
-
-logger = setup_logger()
-
-INDEXING_TRACER_NUM_PRINT_ENTRIES = 5
-
-
-def _get_connector_runner(
-    db_session: Session,
-    attempt: IndexAttempt,
-    start_time: datetime,
-    end_time: datetime,
-    tenant_id: str | None,
-) -> ConnectorRunner:
-    """
-    NOTE: `start_time` and `end_time` are only used for poll connectors
-
-    Returns an iterator of document batches and whether the returned documents
-    are the complete list of existing documents of the connector. If the task
-    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
-    """
-    task = attempt.connector_credential_pair.connector.input_type
-
-    try:
-        runnable_connector = instantiate_connector(
-            db_session=db_session,
-            source=attempt.connector_credential_pair.connector.source,
-            input_type=task,
-            connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config,
-            credential=attempt.connector_credential_pair.credential,
-            tenant_id=tenant_id,
-        )
-    except Exception as e:
-        logger.exception(f"Unable to instantiate connector due to {e}")
-        # since we failed to even instantiate the connector, we pause the CCPair since
-        # it will never succeed
-
-        cc_pair = get_connector_credential_pair_from_id(
-            attempt.connector_credential_pair.id, db_session
-        )
-        if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE:
-            update_connector_credential_pair(
-                db_session=db_session,
-                connector_id=attempt.connector_credential_pair.connector.id,
-                credential_id=attempt.connector_credential_pair.credential.id,
-                status=ConnectorCredentialPairStatus.PAUSED,
-            )
-        raise e
-
-    return ConnectorRunner(
-        connector=runnable_connector, time_range=(start_time, end_time)
-    )
-
-
-class ConnectorStopSignal(Exception):
-    """A custom exception used to signal a stop in processing."""
-
-
-def _run_indexing(
-    db_session: Session,
-    index_attempt: IndexAttempt,
-    tenant_id: str | None,
-    callback: IndexingHeartbeatInterface | None = None,
-) -> None:
-    """
-    1. Get documents which are either new or updated from specified application
-    2. Embed and index these documents into the chosen datastore (vespa)
-    3. Updates Postgres to record the indexed documents + the outcome of this run
-
-    TODO: do not change index attempt statuses here ... instead, set signals in redis
-    and allow the monitor function to clean them up
-    """
-    start_time = time.time()
-
-    if index_attempt.search_settings is None:
-        raise ValueError(
-            "Search settings must be set for indexing. This should not be possible."
-        )
-
-    search_settings = index_attempt.search_settings
-
-    index_name = search_settings.index_name
-
-    # Only update cc-pair status for primary index jobs
-    # Secondary index syncs at the end when swapping
-    is_primary = search_settings.status == IndexModelStatus.PRESENT
-
-    # Indexing is only done into one index at a time
-    document_index = get_default_document_index(
-        primary_index_name=index_name, secondary_index_name=None
-    )
-
-    embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
-        search_settings=search_settings,
-        callback=callback,
-    )
-
-    indexing_pipeline = build_indexing_pipeline(
-        attempt_id=index_attempt.id,
-        embedder=embedding_model,
-        document_index=document_index,
-        ignore_time_skip=(
-            index_attempt.from_beginning
-            or (search_settings.status == IndexModelStatus.FUTURE)
-        ),
-        db_session=db_session,
-        tenant_id=tenant_id,
-        callback=callback,
-    )
-
-    db_cc_pair = index_attempt.connector_credential_pair
-    db_connector = index_attempt.connector_credential_pair.connector
-    db_credential = index_attempt.connector_credential_pair.credential
-    earliest_index_time = (
-        db_connector.indexing_start.timestamp() if db_connector.indexing_start else 0
-    )
-
-    last_successful_index_time = (
-        earliest_index_time
-        if index_attempt.from_beginning
-        else get_last_successful_attempt_time(
-            connector_id=db_connector.id,
-            credential_id=db_credential.id,
-            earliest_index=earliest_index_time,
-            search_settings=index_attempt.search_settings,
-            db_session=db_session,
-        )
-    )
-
-    if INDEXING_TRACER_INTERVAL > 0:
-        logger.debug(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}")
-        tracer = DanswerTracer()
-        tracer.start()
-        tracer.snap()
-
-    index_attempt_md = IndexAttemptMetadata(
-        connector_id=db_connector.id,
-        credential_id=db_credential.id,
-    )
-
-    batch_num = 0
-    net_doc_change = 0
-    document_count = 0
-    chunk_count = 0
-    run_end_dt = None
-    for ind, (window_start, window_end) in enumerate(
-        get_time_windows_for_index_attempt(
-            last_successful_run=datetime.fromtimestamp(
-                last_successful_index_time, tz=timezone.utc
-            ),
-            source_type=db_connector.source,
-        )
-    ):
-        try:
-            window_start = max(
-                window_start - timedelta(minutes=POLL_CONNECTOR_OFFSET),
-                datetime(1970, 1, 1, tzinfo=timezone.utc),
-            )
-
-            connector_runner = _get_connector_runner(
-                db_session=db_session,
-                attempt=index_attempt,
-                start_time=window_start,
-                end_time=window_end,
-                tenant_id=tenant_id,
-            )
-
-            all_connector_doc_ids: set[str] = set()
-
-            tracer_counter = 0
-            if INDEXING_TRACER_INTERVAL > 0:
-                tracer.snap()
-            for doc_batch in connector_runner.run():
-                # Check if connector is disabled mid run and stop if so unless it's the secondary
-                # index being built. We want to populate it even for paused connectors
-                # Often paused connectors are sources that aren't updated frequently but the
-                # contents still need to be initially pulled.
-                if callback:
-                    if callback.should_stop():
-                        raise ConnectorStopSignal("Connector stop signal detected")
-
-                # TODO: should we move this into the above callback instead?
-                db_session.refresh(db_cc_pair)
-                if (
-                    (
-                        db_cc_pair.status == ConnectorCredentialPairStatus.PAUSED
-                        and search_settings.status != IndexModelStatus.FUTURE
-                    )
-                    # if it's deleting, we don't care if this is a secondary index
-                    or db_cc_pair.status == ConnectorCredentialPairStatus.DELETING
-                ):
-                    # let the `except` block handle this
-                    raise RuntimeError("Connector was disabled mid run")
-
-                db_session.refresh(index_attempt)
-                if index_attempt.status != IndexingStatus.IN_PROGRESS:
-                    # Likely due to user manually disabling it or model swap
-                    raise RuntimeError(
-                        f"Index Attempt was canceled, status is {index_attempt.status}"
-                    )
-
-                batch_description = []
-                for doc in doc_batch:
-                    batch_description.append(doc.to_short_descriptor())
-
-                    doc_size = 0
-                    for section in doc.sections:
-                        doc_size += len(section.text)
-
-                    if doc_size > INDEXING_SIZE_WARNING_THRESHOLD:
-                        logger.warning(
-                            f"Document size: doc='{doc.to_short_descriptor()}' "
-                            f"size={doc_size} "
-                            f"threshold={INDEXING_SIZE_WARNING_THRESHOLD}"
-                        )
-
-                logger.debug(f"Indexing batch of documents: {batch_description}")
-
-                index_attempt_md.batch_num = batch_num + 1  # use 1-index for this
-
-                # real work happens here!
-                new_docs, total_batch_chunks = indexing_pipeline(
-                    document_batch=doc_batch,
-                    index_attempt_metadata=index_attempt_md,
-                )
-
-                batch_num += 1
-                net_doc_change += new_docs
-                chunk_count += total_batch_chunks
-                document_count += len(doc_batch)
-                all_connector_doc_ids.update(doc.id for doc in doc_batch)
-
-                # commit transaction so that the `update` below begins
-                # with a brand new transaction. Postgres uses the start
-                # of the transactions when computing `NOW()`, so if we have
-                # a long running transaction, the `time_updated` field will
-                # be inaccurate
-                db_session.commit()
-
-                if callback:
-                    callback.progress("_run_indexing", len(doc_batch))
-
-                # This new value is updated every batch, so UI can refresh per batch update
-                update_docs_indexed(
-                    db_session=db_session,
-                    index_attempt=index_attempt,
-                    total_docs_indexed=document_count,
-                    new_docs_indexed=net_doc_change,
-                    docs_removed_from_index=0,
-                )
-
-                tracer_counter += 1
-                if (
-                    INDEXING_TRACER_INTERVAL > 0
-                    and tracer_counter % INDEXING_TRACER_INTERVAL == 0
-                ):
-                    logger.debug(
-                        f"Running trace comparison for batch {tracer_counter}. interval={INDEXING_TRACER_INTERVAL}"
-                    )
-                    tracer.snap()
-                    tracer.log_previous_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES)
-
-            run_end_dt = window_end
-            if is_primary:
-                update_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=db_connector.id,
-                    credential_id=db_credential.id,
-                    net_docs=net_doc_change,
-                    run_dt=run_end_dt,
-                )
-        except Exception as e:
-            logger.exception(
-                f"Connector run exceptioned after elapsed time: {time.time() - start_time} seconds"
-            )
-
-            if isinstance(e, ConnectorStopSignal):
-                mark_attempt_canceled(
-                    index_attempt.id,
-                    db_session,
-                    reason=str(e),
-                )
-
-                if is_primary:
-                    update_connector_credential_pair(
-                        db_session=db_session,
-                        connector_id=db_connector.id,
-                        credential_id=db_credential.id,
-                        net_docs=net_doc_change,
-                    )
-
-                if INDEXING_TRACER_INTERVAL > 0:
-                    tracer.stop()
-                raise e
-            else:
-                # Only mark the attempt as a complete failure if this is the first indexing window.
-                # Otherwise, some progress was made - the next run will not start from the beginning.
-                # In this case, it is not accurate to mark it as a failure. When the next run begins,
-                # if that fails immediately, it will be marked as a failure.
-                #
-                # NOTE: if the connector is manually disabled, we should mark it as a failure regardless
-                # to give better clarity in the UI, as the next run will never happen.
-                if (
-                    ind == 0
-                    or not db_cc_pair.status.is_active()
-                    or index_attempt.status != IndexingStatus.IN_PROGRESS
-                ):
-                    mark_attempt_failed(
-                        index_attempt.id,
-                        db_session,
-                        failure_reason=str(e),
-                        full_exception_trace=traceback.format_exc(),
-                    )
-
-                    if is_primary:
-                        update_connector_credential_pair(
-                            db_session=db_session,
-                            connector_id=db_connector.id,
-                            credential_id=db_credential.id,
-                            net_docs=net_doc_change,
-                        )
-
-                    if INDEXING_TRACER_INTERVAL > 0:
-                        tracer.stop()
-                    raise e
-
-            # break => similar to success case. As mentioned above, if the next run fails for the same
-            # reason it will then be marked as a failure
-            break
-
-    if INDEXING_TRACER_INTERVAL > 0:
-        logger.debug(
-            f"Running trace comparison between start and end of indexing. {tracer_counter} batches processed."
-        )
-        tracer.snap()
-        tracer.log_first_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES)
-        tracer.stop()
-        logger.debug("Memory tracer stopped.")
-
-    if (
-        index_attempt_md.num_exceptions > 0
-        and index_attempt_md.num_exceptions >= batch_num
-    ):
-        mark_attempt_failed(
-            index_attempt.id,
-            db_session,
-            failure_reason="All batches exceptioned.",
-        )
-        if is_primary:
-            update_connector_credential_pair(
-                db_session=db_session,
-                connector_id=index_attempt.connector_credential_pair.connector.id,
-                credential_id=index_attempt.connector_credential_pair.credential.id,
-            )
-        raise Exception(
-            f"Connector failed - All batches exceptioned: batches={batch_num}"
-        )
-
-    elapsed_time = time.time() - start_time
-
-    if index_attempt_md.num_exceptions == 0:
-        mark_attempt_succeeded(index_attempt, db_session)
-        logger.info(
-            f"Connector succeeded: "
-            f"docs={document_count} chunks={chunk_count} elapsed={elapsed_time:.2f}s"
-        )
-    else:
-        mark_attempt_partially_succeeded(index_attempt, db_session)
-        logger.info(
-            f"Connector completed with some errors: "
-            f"exceptions={index_attempt_md.num_exceptions} "
-            f"batches={batch_num} "
-            f"docs={document_count} "
-            f"chunks={chunk_count} "
-            f"elapsed={elapsed_time:.2f}s"
-        )
-
-    if is_primary:
-        update_connector_credential_pair(
-            db_session=db_session,
-            connector_id=db_connector.id,
-            credential_id=db_credential.id,
-            run_dt=run_end_dt,
-        )
-
-
-def run_indexing_entrypoint(
-    index_attempt_id: int,
-    tenant_id: str | None,
-    connector_credential_pair_id: int,
-    is_ee: bool = False,
-    callback: IndexingHeartbeatInterface | None = None,
-) -> None:
-    try:
-        if is_ee:
-            global_version.set_ee()
-
-        # set the indexing attempt ID so that all log messages from this process
-        # will have it added as a prefix
-        TaskAttemptSingleton.set_cc_and_index_id(
-            index_attempt_id, connector_credential_pair_id
-        )
-        with get_session_with_tenant(tenant_id) as db_session:
-            attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)
-
-            tenant_str = ""
-            if tenant_id is not None:
-                tenant_str = f" for tenant {tenant_id}"
-
-            logger.info(
-                f"Indexing starting{tenant_str}: "
-                f"connector='{attempt.connector_credential_pair.connector.name}' "
-                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
-                f"credentials='{attempt.connector_credential_pair.connector_id}'"
-            )
-
-            _run_indexing(db_session, attempt, tenant_id, callback)
-
-            logger.info(
-                f"Indexing finished{tenant_str}: "
-                f"connector='{attempt.connector_credential_pair.connector.name}' "
-                f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' "
-                f"credentials='{attempt.connector_credential_pair.connector_id}'"
-            )
-    except Exception as e:
-        logger.exception(
-            f"Indexing job with ID '{index_attempt_id}' for tenant {tenant_id} failed due to {e}"
-        )
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@@ -1,186 +0,0 @@
-from collections.abc import Iterator
-from datetime import datetime
-from enum import Enum
-from typing import Any
-
-from pydantic import BaseModel
-
-from danswer.configs.constants import DocumentSource
-from danswer.context.search.enums import QueryFlow
-from danswer.context.search.enums import SearchType
-from danswer.context.search.models import RetrievalDocs
-from danswer.context.search.models import SearchResponse
-from danswer.tools.tool_implementations.custom.base_tool_types import ToolResultType
-
-
-class LlmDoc(BaseModel):
-    """This contains the minimal set information for the LLM portion including citations"""
-
-    document_id: str
-    content: str
-    blurb: str
-    semantic_identifier: str
-    source_type: DocumentSource
-    metadata: dict[str, str | list[str]]
-    updated_at: datetime | None
-    link: str | None
-    source_links: dict[int, str] | None
-    match_highlights: list[str] | None
-
-
-# First chunk of info for streaming QA
-class QADocsResponse(RetrievalDocs):
-    rephrased_query: str | None = None
-    predicted_flow: QueryFlow | None
-    predicted_search: SearchType | None
-    applied_source_filters: list[DocumentSource] | None
-    applied_time_cutoff: datetime | None
-    recency_bias_multiplier: float
-
-    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
-        initial_dict = super().model_dump(mode="json", *args, **kwargs)  # type: ignore
-        initial_dict["applied_time_cutoff"] = (
-            self.applied_time_cutoff.isoformat() if self.applied_time_cutoff else None
-        )
-
-        return initial_dict
-
-
-class StreamStopReason(Enum):
-    CONTEXT_LENGTH = "context_length"
-    CANCELLED = "cancelled"
-
-
-class StreamStopInfo(BaseModel):
-    stop_reason: StreamStopReason
-
-    def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]:  # type: ignore
-        data = super().model_dump(mode="json", *args, **kwargs)  # type: ignore
-        data["stop_reason"] = self.stop_reason.name
-        return data
-
-
-class LLMRelevanceFilterResponse(BaseModel):
-    llm_selected_doc_indices: list[int]
-
-
-class FinalUsedContextDocsResponse(BaseModel):
-    final_context_docs: list[LlmDoc]
-
-
-class RelevanceAnalysis(BaseModel):
-    relevant: bool
-    content: str | None = None
-
-
-class SectionRelevancePiece(RelevanceAnalysis):
-    """LLM analysis mapped to an Inference Section"""
-
-    document_id: str
-    chunk_id: int  # ID of the center chunk for a given inference section
-
-
-class DocumentRelevance(BaseModel):
-    """Contains all relevance information for a given search"""
-
-    relevance_summaries: dict[str, RelevanceAnalysis]
-
-
-class DanswerAnswerPiece(BaseModel):
-    # A small piece of a complete answer. Used for streaming back answers.
-    answer_piece: str | None  # if None, specifies the end of an Answer
-
-
-# An intermediate representation of citations, later translated into
-# a mapping of the citation [n] number to SearchDoc
-class CitationInfo(BaseModel):
-    citation_num: int
-    document_id: str
-
-
-class AllCitations(BaseModel):
-    citations: list[CitationInfo]
-
-
-# This is a mapping of the citation number to the document index within
-# the result search doc set
-class MessageSpecificCitations(BaseModel):
-    citation_map: dict[int, int]
-
-
-class MessageResponseIDInfo(BaseModel):
-    user_message_id: int | None
-    reserved_assistant_message_id: int
-
-
-class StreamingError(BaseModel):
-    error: str
-    stack_trace: str | None = None
-
-
-class DanswerQuote(BaseModel):
-    # This is during inference so everything is a string by this point
-    quote: str
-    document_id: str
-    link: str | None
-    source_type: str
-    semantic_identifier: str
-    blurb: str
-
-
-class DanswerQuotes(BaseModel):
-    quotes: list[DanswerQuote]
-
-
-class DanswerContext(BaseModel):
-    content: str
-    document_id: str
-    semantic_identifier: str
-    blurb: str
-
-
-class DanswerContexts(BaseModel):
-    contexts: list[DanswerContext]
-
-
-class DanswerAnswer(BaseModel):
-    answer: str | None
-
-
-class QAResponse(SearchResponse, DanswerAnswer):
-    quotes: list[DanswerQuote] | None
-    contexts: list[DanswerContexts] | None
-    predicted_flow: QueryFlow
-    predicted_search: SearchType
-    eval_res_valid: bool | None = None
-    llm_selected_doc_indices: list[int] | None = None
-    error_msg: str | None = None
-
-
-class FileChatDisplay(BaseModel):
-    file_ids: list[str]
-
-
-class CustomToolResponse(BaseModel):
-    response: ToolResultType
-    tool_name: str
-
-
-AnswerQuestionPossibleReturn = (
-    DanswerAnswerPiece
-    | DanswerQuotes
-    | CitationInfo
-    | DanswerContexts
-    | FileChatDisplay
-    | CustomToolResponse
-    | StreamingError
-    | StreamStopInfo
-)
-
-
-AnswerQuestionStreamReturn = Iterator[AnswerQuestionPossibleReturn]
-
-
-class LLMMetricsContainer(BaseModel):
-    prompt_tokens: int
-    response_tokens: int
--- a/backend/danswer/connectors/google_utils/google_auth.py
+++ b/backend/danswer/connectors/google_utils/google_auth.py
@@ -1,107 +0,0 @@
-import json
-from typing import cast
-
-from google.auth.transport.requests import Request  # type: ignore
-from google.oauth2.credentials import Credentials as OAuthCredentials  # type: ignore
-from google.oauth2.service_account import Credentials as ServiceAccountCredentials  # type: ignore
-
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
-)
-from danswer.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_DICT_TOKEN_KEY,
-)
-from danswer.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
-)
-from danswer.connectors.google_utils.shared_constants import (
-    GOOGLE_SCOPES,
-)
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def get_google_oauth_creds(
-    token_json_str: str, source: DocumentSource
-) -> OAuthCredentials | None:
-    creds_json = json.loads(token_json_str)
-    creds = OAuthCredentials.from_authorized_user_info(
-        info=creds_json,
-        scopes=GOOGLE_SCOPES[source],
-    )
-    if creds.valid:
-        return creds
-
-    if creds.expired and creds.refresh_token:
-        try:
-            creds.refresh(Request())
-            if creds.valid:
-                logger.notice("Refreshed Google Drive tokens.")
-                return creds
-        except Exception:
-            logger.exception("Failed to refresh google drive access token due to:")
-            return None
-
-    return None
-
-
-def get_google_creds(
-    credentials: dict[str, str],
-    source: DocumentSource,
-) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]:
-    """Checks for two different types of credentials.
-    (1) A credential which holds a token acquired via a user going thorough
-    the Google OAuth flow.
-    (2) A credential which holds a service account key JSON file, which
-    can then be used to impersonate any user in the workspace.
-    """
-    oauth_creds = None
-    service_creds = None
-    new_creds_dict = None
-    if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials:
-        # OAUTH
-        access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY])
-        oauth_creds = get_google_oauth_creds(
-            token_json_str=access_token_json_str, source=source
-        )
-
-        # tell caller to update token stored in DB if it has changed
-        # (e.g. the token has been refreshed)
-        new_creds_json_str = oauth_creds.to_json() if oauth_creds else ""
-        if new_creds_json_str != access_token_json_str:
-            new_creds_dict = {
-                DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str,
-                DB_CREDENTIALS_PRIMARY_ADMIN_KEY: credentials[
-                    DB_CREDENTIALS_PRIMARY_ADMIN_KEY
-                ],
-            }
-    elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials:
-        # SERVICE ACCOUNT
-        service_account_key_json_str = credentials[
-            DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY
-        ]
-        service_account_key = json.loads(service_account_key_json_str)
-
-        service_creds = ServiceAccountCredentials.from_service_account_info(
-            service_account_key, scopes=GOOGLE_SCOPES[source]
-        )
-
-        if not service_creds.valid or not service_creds.expired:
-            service_creds.refresh(Request())
-
-        if not service_creds.valid:
-            raise PermissionError(
-                f"Unable to access {source} - service account credentials are invalid."
-            )
-
-    creds: ServiceAccountCredentials | OAuthCredentials | None = (
-        oauth_creds or service_creds
-    )
-    if creds is None:
-        raise PermissionError(
-            f"Unable to access {source} - unknown credential structure."
-        )
-
-    return creds, new_creds_dict
--- a/backend/danswer/connectors/salesforce/connector.py
+++ b/backend/danswer/connectors/salesforce/connector.py
@@ -1,289 +0,0 @@
-import os
-from collections.abc import Iterator
-from datetime import datetime
-from datetime import timezone
-from typing import Any
-
-from simple_salesforce import Salesforce
-from simple_salesforce import SFType
-
-from danswer.configs.app_configs import INDEX_BATCH_SIZE
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
-from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import GenerateSlimDocumentOutput
-from danswer.connectors.interfaces import LoadConnector
-from danswer.connectors.interfaces import PollConnector
-from danswer.connectors.interfaces import SecondsSinceUnixEpoch
-from danswer.connectors.interfaces import SlimConnector
-from danswer.connectors.models import BasicExpertInfo
-from danswer.connectors.models import ConnectorMissingCredentialError
-from danswer.connectors.models import Document
-from danswer.connectors.models import Section
-from danswer.connectors.models import SlimDocument
-from danswer.connectors.salesforce.utils import extract_dict_text
-from danswer.utils.logger import setup_logger
-
-
-# TODO: this connector does not work well at large scales
-# the large query against a large Salesforce instance has been reported to take 1.5 hours.
-# Additionally it seems to eat up more memory over time if the connection is long running (again a scale issue).
-
-
-DEFAULT_PARENT_OBJECT_TYPES = ["Account"]
-MAX_QUERY_LENGTH = 10000  # max query length is 20,000 characters
-ID_PREFIX = "SALESFORCE_"
-
-logger = setup_logger()
-
-
-class SalesforceConnector(LoadConnector, PollConnector, SlimConnector):
-    def __init__(
-        self,
-        batch_size: int = INDEX_BATCH_SIZE,
-        requested_objects: list[str] = [],
-    ) -> None:
-        self.batch_size = batch_size
-        self.sf_client: Salesforce | None = None
-        self.parent_object_list = (
-            [obj.capitalize() for obj in requested_objects]
-            if requested_objects
-            else DEFAULT_PARENT_OBJECT_TYPES
-        )
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        self.sf_client = Salesforce(
-            username=credentials["sf_username"],
-            password=credentials["sf_password"],
-            security_token=credentials["sf_security_token"],
-        )
-
-        return None
-
-    def _get_sf_type_object_json(self, type_name: str) -> Any:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-        sf_object = SFType(
-            type_name, self.sf_client.session_id, self.sf_client.sf_instance
-        )
-        return sf_object.describe()
-
-    def _get_name_from_id(self, id: str) -> str:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-        try:
-            user_object_info = self.sf_client.query(
-                f"SELECT Name FROM User WHERE Id = '{id}'"
-            )
-            name = user_object_info.get("Records", [{}])[0].get("Name", "Null User")
-            return name
-        except Exception:
-            logger.warning(f"Couldnt find name for object id: {id}")
-            return "Null User"
-
-    def _convert_object_instance_to_document(
-        self, object_dict: dict[str, Any]
-    ) -> Document:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-
-        salesforce_id = object_dict["Id"]
-        danswer_salesforce_id = f"{ID_PREFIX}{salesforce_id}"
-        extracted_link = f"https://{self.sf_client.sf_instance}/{salesforce_id}"
-        extracted_doc_updated_at = time_str_to_utc(object_dict["LastModifiedDate"])
-        extracted_object_text = extract_dict_text(object_dict)
-        extracted_semantic_identifier = object_dict.get("Name", "Unknown Object")
-        extracted_primary_owners = [
-            BasicExpertInfo(
-                display_name=self._get_name_from_id(object_dict["LastModifiedById"])
-            )
-        ]
-
-        doc = Document(
-            id=danswer_salesforce_id,
-            sections=[Section(link=extracted_link, text=extracted_object_text)],
-            source=DocumentSource.SALESFORCE,
-            semantic_identifier=extracted_semantic_identifier,
-            doc_updated_at=extracted_doc_updated_at,
-            primary_owners=extracted_primary_owners,
-            metadata={},
-        )
-        return doc
-
-    def _is_valid_child_object(self, child_relationship: dict) -> bool:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-
-        if not child_relationship["childSObject"]:
-            return False
-        if not child_relationship["relationshipName"]:
-            return False
-
-        sf_type = child_relationship["childSObject"]
-        object_description = self._get_sf_type_object_json(sf_type)
-        if not object_description["queryable"]:
-            return False
-
-        try:
-            query = f"SELECT Count() FROM {sf_type} LIMIT 1"
-            result = self.sf_client.query(query)
-            if result["totalSize"] == 0:
-                return False
-        except Exception as e:
-            logger.warning(f"Object type {sf_type} doesn't support query: {e}")
-            return False
-
-        if child_relationship["field"]:
-            if child_relationship["field"] == "RelatedToId":
-                return False
-        else:
-            return False
-
-        return True
-
-    def _get_all_children_of_sf_type(self, sf_type: str) -> list[dict]:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-
-        object_description = self._get_sf_type_object_json(sf_type)
-
-        children_objects: list[dict] = []
-        for child_relationship in object_description["childRelationships"]:
-            if self._is_valid_child_object(child_relationship):
-                children_objects.append(
-                    {
-                        "relationship_name": child_relationship["relationshipName"],
-                        "object_type": child_relationship["childSObject"],
-                    }
-                )
-        return children_objects
-
-    def _get_all_fields_for_sf_type(self, sf_type: str) -> list[str]:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-
-        object_description = self._get_sf_type_object_json(sf_type)
-
-        fields = [
-            field.get("name")
-            for field in object_description["fields"]
-            if field.get("type", "base64") != "base64"
-        ]
-
-        return fields
-
-    def _generate_query_per_parent_type(self, parent_sf_type: str) -> Iterator[str]:
-        """
-        This function takes in an object_type and generates query(s) designed to grab
-        information associated to objects of that type.
-        It does that by getting all the fields of the parent object type.
-        Then it gets all the child objects of that object type and all the fields of
-        those children as well.
-        """
-        parent_fields = self._get_all_fields_for_sf_type(parent_sf_type)
-        child_sf_types = self._get_all_children_of_sf_type(parent_sf_type)
-
-        query = f"SELECT {', '.join(parent_fields)}"
-        for child_object_dict in child_sf_types:
-            fields = self._get_all_fields_for_sf_type(child_object_dict["object_type"])
-            query_addition = f", \n(SELECT {', '.join(fields)} FROM {child_object_dict['relationship_name']})"
-
-            if len(query_addition) + len(query) > MAX_QUERY_LENGTH:
-                query += f"\n FROM {parent_sf_type}"
-                yield query
-                query = "SELECT Id" + query_addition
-            else:
-                query += query_addition
-
-        query += f"\n FROM {parent_sf_type}"
-
-        yield query
-
-    def _fetch_from_salesforce(
-        self,
-        start: datetime | None = None,
-        end: datetime | None = None,
-    ) -> GenerateDocumentsOutput:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-
-        doc_batch: list[Document] = []
-        for parent_object_type in self.parent_object_list:
-            logger.debug(f"Processing: {parent_object_type}")
-
-            query_results: dict = {}
-            for query in self._generate_query_per_parent_type(parent_object_type):
-                if start is not None and end is not None:
-                    if start and start.tzinfo is None:
-                        start = start.replace(tzinfo=timezone.utc)
-                    if end and end.tzinfo is None:
-                        end = end.replace(tzinfo=timezone.utc)
-                    query += f" WHERE LastModifiedDate > {start.isoformat()} AND LastModifiedDate < {end.isoformat()}"
-
-                query_result = self.sf_client.query_all(query)
-
-                for record_dict in query_result["records"]:
-                    query_results.setdefault(record_dict["Id"], {}).update(record_dict)
-
-            logger.info(
-                f"Number of {parent_object_type} Objects processed: {len(query_results)}"
-            )
-
-            for combined_object_dict in query_results.values():
-                doc_batch.append(
-                    self._convert_object_instance_to_document(combined_object_dict)
-                )
-
-                if len(doc_batch) > self.batch_size:
-                    yield doc_batch
-                    doc_batch = []
-        yield doc_batch
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        return self._fetch_from_salesforce()
-
-    def poll_source(
-        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
-    ) -> GenerateDocumentsOutput:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-        start_datetime = datetime.utcfromtimestamp(start)
-        end_datetime = datetime.utcfromtimestamp(end)
-        return self._fetch_from_salesforce(start=start_datetime, end=end_datetime)
-
-    def retrieve_all_slim_documents(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-    ) -> GenerateSlimDocumentOutput:
-        if self.sf_client is None:
-            raise ConnectorMissingCredentialError("Salesforce")
-        doc_metadata_list: list[SlimDocument] = []
-        for parent_object_type in self.parent_object_list:
-            query = f"SELECT Id FROM {parent_object_type}"
-            query_result = self.sf_client.query_all(query)
-            doc_metadata_list.extend(
-                SlimDocument(
-                    id=f"{ID_PREFIX}{instance_dict.get('Id', '')}",
-                    perm_sync_data={},
-                )
-                for instance_dict in query_result["records"]
-            )
-
-        yield doc_metadata_list
-
-
-if __name__ == "__main__":
-    connector = SalesforceConnector(
-        requested_objects=os.environ["REQUESTED_OBJECTS"].split(",")
-    )
-
-    connector.load_credentials(
-        {
-            "sf_username": os.environ["SF_USERNAME"],
-            "sf_password": os.environ["SF_PASSWORD"],
-            "sf_security_token": os.environ["SF_SECURITY_TOKEN"],
-        }
-    )
-    document_batches = connector.load_from_state()
-    print(next(document_batches))
--- a/backend/danswer/connectors/salesforce/utils.py
+++ b/backend/danswer/connectors/salesforce/utils.py
@@ -1,66 +0,0 @@
-import re
-from typing import Union
-
-SF_JSON_FILTER = r"Id$|Date$|stamp$|url$"
-
-
-def _clean_salesforce_dict(data: Union[dict, list]) -> Union[dict, list]:
-    if isinstance(data, dict):
-        if "records" in data.keys():
-            data = data["records"]
-    if isinstance(data, dict):
-        if "attributes" in data.keys():
-            if isinstance(data["attributes"], dict):
-                data.update(data.pop("attributes"))
-
-    if isinstance(data, dict):
-        filtered_dict = {}
-        for key, value in data.items():
-            if not re.search(SF_JSON_FILTER, key, re.IGNORECASE):
-                if "__c" in key:  # remove the custom object indicator for display
-                    key = key[:-3]
-                if isinstance(value, (dict, list)):
-                    filtered_value = _clean_salesforce_dict(value)
-                    if filtered_value:  # Only add non-empty dictionaries or lists
-                        filtered_dict[key] = filtered_value
-                elif value is not None:
-                    filtered_dict[key] = value
-        return filtered_dict
-    elif isinstance(data, list):
-        filtered_list = []
-        for item in data:
-            if isinstance(item, (dict, list)):
-                filtered_item = _clean_salesforce_dict(item)
-                if filtered_item:  # Only add non-empty dictionaries or lists
-                    filtered_list.append(filtered_item)
-            elif item is not None:
-                filtered_list.append(filtered_item)
-        return filtered_list
-    else:
-        return data
-
-
-def _json_to_natural_language(data: Union[dict, list], indent: int = 0) -> str:
-    result = []
-    indent_str = " " * indent
-
-    if isinstance(data, dict):
-        for key, value in data.items():
-            if isinstance(value, (dict, list)):
-                result.append(f"{indent_str}{key}:")
-                result.append(_json_to_natural_language(value, indent + 2))
-            else:
-                result.append(f"{indent_str}{key}: {value}")
-    elif isinstance(data, list):
-        for item in data:
-            result.append(_json_to_natural_language(item, indent))
-    else:
-        result.append(f"{indent_str}{data}")
-
-    return "\n".join(result)
-
-
-def extract_dict_text(raw_dict: dict) -> str:
-    processed_dict = _clean_salesforce_dict(raw_dict)
-    natural_language_dict = _json_to_natural_language(processed_dict)
-    return natural_language_dict
--- a/backend/danswer/connectors/slack/load_connector.py
+++ b/backend/danswer/connectors/slack/load_connector.py
@@ -1,140 +0,0 @@
-import json
-import os
-from datetime import datetime
-from datetime import timezone
-from pathlib import Path
-from typing import Any
-from typing import cast
-
-from danswer.configs.app_configs import INDEX_BATCH_SIZE
-from danswer.configs.constants import DocumentSource
-from danswer.connectors.interfaces import GenerateDocumentsOutput
-from danswer.connectors.interfaces import LoadConnector
-from danswer.connectors.models import Document
-from danswer.connectors.models import Section
-from danswer.connectors.slack.connector import filter_channels
-from danswer.connectors.slack.utils import get_message_link
-from danswer.utils.logger import setup_logger
-
-
-logger = setup_logger()
-
-
-def get_event_time(event: dict[str, Any]) -> datetime | None:
-    ts = event.get("ts")
-    if not ts:
-        return None
-    return datetime.fromtimestamp(float(ts), tz=timezone.utc)
-
-
-class SlackLoadConnector(LoadConnector):
-    # WARNING: DEPRECATED, DO NOT USE
-    def __init__(
-        self,
-        workspace: str,
-        export_path_str: str,
-        channels: list[str] | None = None,
-        # if specified, will treat the specified channel strings as
-        # regexes, and will only index channels that fully match the regexes
-        channel_regex_enabled: bool = False,
-        batch_size: int = INDEX_BATCH_SIZE,
-    ) -> None:
-        self.workspace = workspace
-        self.channels = channels
-        self.channel_regex_enabled = channel_regex_enabled
-        self.export_path_str = export_path_str
-        self.batch_size = batch_size
-
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        if credentials:
-            logger.warning("Unexpected credentials provided for Slack Load Connector")
-        return None
-
-    @staticmethod
-    def _process_batch_event(
-        slack_event: dict[str, Any],
-        channel: dict[str, Any],
-        matching_doc: Document | None,
-        workspace: str,
-    ) -> Document | None:
-        if (
-            slack_event["type"] == "message"
-            and slack_event.get("subtype") != "channel_join"
-        ):
-            if matching_doc:
-                return Document(
-                    id=matching_doc.id,
-                    sections=matching_doc.sections
-                    + [
-                        Section(
-                            link=get_message_link(
-                                event=slack_event,
-                                workspace=workspace,
-                                channel_id=channel["id"],
-                            ),
-                            text=slack_event["text"],
-                        )
-                    ],
-                    source=matching_doc.source,
-                    semantic_identifier=matching_doc.semantic_identifier,
-                    title="",  # slack docs don't really have a "title"
-                    doc_updated_at=get_event_time(slack_event),
-                    metadata=matching_doc.metadata,
-                )
-
-            return Document(
-                id=slack_event["ts"],
-                sections=[
-                    Section(
-                        link=get_message_link(
-                            event=slack_event,
-                            workspace=workspace,
-                            channel_id=channel["id"],
-                        ),
-                        text=slack_event["text"],
-                    )
-                ],
-                source=DocumentSource.SLACK,
-                semantic_identifier=channel["name"],
-                title="",  # slack docs don't really have a "title"
-                doc_updated_at=get_event_time(slack_event),
-                metadata={},
-            )
-
-        return None
-
-    def load_from_state(self) -> GenerateDocumentsOutput:
-        export_path = Path(self.export_path_str)
-
-        with open(export_path / "channels.json") as f:
-            all_channels = json.load(f)
-
-        filtered_channels = filter_channels(
-            all_channels, self.channels, self.channel_regex_enabled
-        )
-
-        document_batch: dict[str, Document] = {}
-        for channel_info in filtered_channels:
-            channel_dir_path = export_path / cast(str, channel_info["name"])
-            channel_file_paths = [
-                channel_dir_path / file_name
-                for file_name in os.listdir(channel_dir_path)
-            ]
-            for path in channel_file_paths:
-                with open(path) as f:
-                    events = cast(list[dict[str, Any]], json.load(f))
-                for slack_event in events:
-                    doc = self._process_batch_event(
-                        slack_event=slack_event,
-                        channel=channel_info,
-                        matching_doc=document_batch.get(
-                            slack_event.get("thread_ts", "")
-                        ),
-                        workspace=self.workspace,
-                    )
-                    if doc:
-                        document_batch[doc.id] = doc
-                        if len(document_batch) >= self.batch_size:
-                            yield list(document_batch.values())
-
-        yield list(document_batch.values())
--- a/backend/danswer/danswerbot/slack/handlers/utils.py
+++ b/backend/danswer/danswerbot/slack/handlers/utils.py
@@ -1,19 +0,0 @@
-from slack_sdk import WebClient
-
-from danswer.danswerbot.slack.utils import respond_in_thread
-
-
-def send_team_member_message(
-    client: WebClient,
-    channel: str,
-    thread_ts: str,
-) -> None:
-    respond_in_thread(
-        client=client,
-        channel=channel,
-        text=(
-            "👋 Hi, we've just gathered and forwarded the relevant "
-            + "information to the team. They'll get back to you shortly!"
-        ),
-        thread_ts=thread_ts,
-    )
--- a/backend/danswer/danswerbot/slack/icons.py
+++ b/backend/danswer/danswerbot/slack/icons.py
@@ -1,58 +0,0 @@
-from danswer.configs.constants import DocumentSource
-
-
-def source_to_github_img_link(source: DocumentSource) -> str | None:
-    # TODO: store these images somewhere better
-    if source == DocumentSource.WEB.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/Web.png"
-    if source == DocumentSource.FILE.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/File.png"
-    if source == DocumentSource.GOOGLE_SITES.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png"
-    if source == DocumentSource.SLACK.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png"
-    if source == DocumentSource.GMAIL.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png"
-    if source == DocumentSource.GOOGLE_DRIVE.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png"
-    if source == DocumentSource.GITHUB.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png"
-    if source == DocumentSource.GITLAB.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png"
-    if source == DocumentSource.CONFLUENCE.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/Confluence.png"
-    if source == DocumentSource.JIRA.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/Jira.png"
-    if source == DocumentSource.NOTION.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png"
-    if source == DocumentSource.ZENDESK.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/Zendesk.png"
-    if source == DocumentSource.GONG.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png"
-    if source == DocumentSource.LINEAR.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png"
-    if source == DocumentSource.PRODUCTBOARD.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp"
-    if source == DocumentSource.SLAB.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png"
-    if source == DocumentSource.ZULIP.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png"
-    if source == DocumentSource.GURU.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/Guru.png"
-    if source == DocumentSource.HUBSPOT.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png"
-    if source == DocumentSource.DOCUMENT360.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png"
-    if source == DocumentSource.BOOKSTACK.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png"
-    if source == DocumentSource.LOOPIO.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png"
-    if source == DocumentSource.SHAREPOINT.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png"
-    if source == DocumentSource.REQUESTTRACKER.value:
-        # just use file icon for now
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/File.png"
-    if source == DocumentSource.INGESTION_API.value:
-        return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/File.png"
-
-    return "https://raw.githubusercontent.com/danswer-ai/danswer/main/backend/slackbot_images/File.png"
--- a/backend/danswer/danswerbot/slack/models.py
+++ b/backend/danswer/danswerbot/slack/models.py
@@ -1,15 +0,0 @@
-from pydantic import BaseModel
-
-from danswer.one_shot_answer.models import ThreadMessage
-
-
-class SlackMessageInfo(BaseModel):
-    thread_messages: list[ThreadMessage]
-    channel_to_respond: str
-    msg_to_respond: str | None
-    thread_to_respond: str | None
-    sender: str | None
-    email: str | None
-    bypass_filters: bool  # User has tagged @DanswerBot
-    is_bot_msg: bool  # User is using /DanswerBot
-    is_bot_dm: bool  # User is direct messaging to DanswerBot
--- a/backend/danswer/document_index/document_index_utils.py
+++ b/backend/danswer/document_index/document_index_utils.py
@@ -1,60 +0,0 @@
-import math
-import uuid
-
-from sqlalchemy.orm import Session
-
-from danswer.context.search.models import InferenceChunk
-from danswer.db.search_settings import get_current_search_settings
-from danswer.db.search_settings import get_secondary_search_settings
-from danswer.indexing.models import IndexChunk
-
-
-DEFAULT_BATCH_SIZE = 30
-DEFAULT_INDEX_NAME = "danswer_chunk"
-
-
-def get_both_index_names(db_session: Session) -> tuple[str, str | None]:
-    search_settings = get_current_search_settings(db_session)
-
-    search_settings_new = get_secondary_search_settings(db_session)
-    if not search_settings_new:
-        return search_settings.index_name, None
-
-    return search_settings.index_name, search_settings_new.index_name
-
-
-def translate_boost_count_to_multiplier(boost: int) -> float:
-    """Mapping boost integer values to a multiplier according to a sigmoid curve
-    Piecewise such that at many downvotes, its 0.5x the score and with many upvotes
-    it is 2x the score. This should be in line with the Vespa calculation."""
-    # 3 in the equation below stretches it out to hit asymptotes slower
-    if boost < 0:
-        # 0.5 + sigmoid -> range of 0.5 to 1
-        return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))
-
-    # 2 x sigmoid -> range of 1 to 2
-    return 2 / (1 + math.exp(-1 * boost / 3))
-
-
-def get_uuid_from_chunk(
-    chunk: IndexChunk | InferenceChunk, mini_chunk_ind: int = 0
-) -> uuid.UUID:
-    doc_str = (
-        chunk.document_id
-        if isinstance(chunk, InferenceChunk)
-        else chunk.source_document.id
-    )
-    # Web parsing URL duplicate catching
-    if doc_str and doc_str[-1] == "/":
-        doc_str = doc_str[:-1]
-    unique_identifier_string = "_".join(
-        [doc_str, str(chunk.chunk_id), str(mini_chunk_ind)]
-    )
-    if chunk.large_chunk_reference_ids:
-        unique_identifier_string += "_large" + "_".join(
-            [
-                str(referenced_chunk_id)
-                for referenced_chunk_id in chunk.large_chunk_reference_ids
-            ]
-        )
-    return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
--- a/backend/danswer/document_index/vespa/deletion.py
+++ b/backend/danswer/document_index/vespa/deletion.py
@@ -1,65 +0,0 @@
-import concurrent.futures
-
-import httpx
-from retry import retry
-
-from danswer.document_index.vespa.chunk_retrieval import (
-    get_all_vespa_ids_for_document_id,
-)
-from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
-from danswer.document_index.vespa_constants import NUM_THREADS
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-CONTENT_SUMMARY = "content_summary"
-
-
-@retry(tries=3, delay=1, backoff=2)
-def _delete_vespa_doc_chunks(
-    document_id: str, index_name: str, http_client: httpx.Client
-) -> None:
-    doc_chunk_ids = get_all_vespa_ids_for_document_id(
-        document_id=document_id,
-        index_name=index_name,
-        get_large_chunks=True,
-    )
-
-    for chunk_id in doc_chunk_ids:
-        try:
-            res = http_client.delete(
-                f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
-            )
-            res.raise_for_status()
-        except httpx.HTTPStatusError as e:
-            logger.error(f"Failed to delete chunk, details: {e.response.text}")
-            raise
-
-
-def delete_vespa_docs(
-    document_ids: list[str],
-    index_name: str,
-    http_client: httpx.Client,
-    executor: concurrent.futures.ThreadPoolExecutor | None = None,
-) -> None:
-    external_executor = True
-
-    if not executor:
-        external_executor = False
-        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)
-
-    try:
-        doc_deletion_future = {
-            executor.submit(
-                _delete_vespa_doc_chunks, doc_id, index_name, http_client
-            ): doc_id
-            for doc_id in document_ids
-        }
-        for future in concurrent.futures.as_completed(doc_deletion_future):
-            # Will raise exception if the deletion raised an exception
-            future.result()
-
-    finally:
-        if not external_executor:
-            executor.shutdown(wait=True)
--- a/backend/danswer/file_store/utils.py
+++ b/backend/danswer/file_store/utils.py
@@ -1,85 +0,0 @@
-from collections.abc import Callable
-from io import BytesIO
-from typing import Any
-from typing import cast
-from uuid import uuid4
-
-import requests
-from sqlalchemy.orm import Session
-
-from danswer.configs.constants import FileOrigin
-from danswer.db.engine import get_session_with_tenant
-from danswer.db.models import ChatMessage
-from danswer.file_store.file_store import get_default_file_store
-from danswer.file_store.models import FileDescriptor
-from danswer.file_store.models import InMemoryChatFile
-from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-
-
-def load_chat_file(
-    file_descriptor: FileDescriptor, db_session: Session
-) -> InMemoryChatFile:
-    file_io = get_default_file_store(db_session).read_file(
-        file_descriptor["id"], mode="b"
-    )
-    return InMemoryChatFile(
-        file_id=file_descriptor["id"],
-        content=file_io.read(),
-        file_type=file_descriptor["type"],
-        filename=file_descriptor.get("name"),
-    )
-
-
-def load_all_chat_files(
-    chat_messages: list[ChatMessage],
-    file_descriptors: list[FileDescriptor],
-    db_session: Session,
-) -> list[InMemoryChatFile]:
-    file_descriptors_for_history: list[FileDescriptor] = []
-    for chat_message in chat_messages:
-        if chat_message.files:
-            file_descriptors_for_history.extend(chat_message.files)
-
-    files = cast(
-        list[InMemoryChatFile],
-        run_functions_tuples_in_parallel(
-            [
-                (load_chat_file, (file, db_session))
-                for file in file_descriptors + file_descriptors_for_history
-            ]
-        ),
-    )
-    return files
-
-
-def save_file_from_url(url: str, tenant_id: str) -> str:
-    """NOTE: using multiple sessions here, since this is often called
-    using multithreading. In practice, sharing a session has resulted in
-    weird errors."""
-    with get_session_with_tenant(tenant_id) as db_session:
-        response = requests.get(url)
-        response.raise_for_status()
-
-        unique_id = str(uuid4())
-
-        file_io = BytesIO(response.content)
-        file_store = get_default_file_store(db_session)
-        file_store.save_file(
-            file_name=unique_id,
-            content=file_io,
-            display_name="GeneratedImage",
-            file_origin=FileOrigin.CHAT_IMAGE_GEN,
-            file_type="image/png;base64",
-        )
-        return unique_id
-
-
-def save_files_from_urls(urls: list[str]) -> list[str]:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-
-    funcs: list[tuple[Callable[..., Any], tuple[Any, ...]]] = [
-        (save_file_from_url, (url, tenant_id)) for url in urls
-    ]
-    # Must pass in tenant_id here, since this is called by multithreading
-    return run_functions_tuples_in_parallel(funcs)
--- a/backend/danswer/key_value_store/factory.py
+++ b/backend/danswer/key_value_store/factory.py
@@ -1,8 +0,0 @@
-from danswer.key_value_store.interface import KeyValueStore
-from danswer.key_value_store.store import PgRedisKVStore
-
-
-def get_kv_store() -> KeyValueStore:
-    # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in
-    # It's read from the global thread level variable
-    return PgRedisKVStore()
--- a/backend/danswer/llm/answering/llm_response_handler.py
+++ b/backend/danswer/llm/answering/llm_response_handler.py
@@ -1,84 +0,0 @@
-from collections.abc import Callable
-from collections.abc import Generator
-from collections.abc import Iterator
-from typing import TYPE_CHECKING
-
-from langchain_core.messages import BaseMessage
-from pydantic.v1 import BaseModel as BaseModel__v1
-
-from danswer.chat.models import CitationInfo
-from danswer.chat.models import DanswerAnswerPiece
-from danswer.chat.models import DanswerQuotes
-from danswer.chat.models import StreamStopInfo
-from danswer.chat.models import StreamStopReason
-from danswer.file_store.models import InMemoryChatFile
-from danswer.llm.answering.prompts.build import AnswerPromptBuilder
-from danswer.tools.force import ForceUseTool
-from danswer.tools.models import ToolCallFinalResult
-from danswer.tools.models import ToolCallKickoff
-from danswer.tools.models import ToolResponse
-from danswer.tools.tool import Tool
-
-
-if TYPE_CHECKING:
-    from danswer.llm.answering.stream_processing.answer_response_handler import (
-        AnswerResponseHandler,
-    )
-    from danswer.llm.answering.tool.tool_response_handler import ToolResponseHandler
-
-
-ResponsePart = (
-    DanswerAnswerPiece
-    | CitationInfo
-    | DanswerQuotes
-    | ToolCallKickoff
-    | ToolResponse
-    | ToolCallFinalResult
-    | StreamStopInfo
-)
-
-
-class LLMCall(BaseModel__v1):
-    prompt_builder: AnswerPromptBuilder
-    tools: list[Tool]
-    force_use_tool: ForceUseTool
-    files: list[InMemoryChatFile]
-    tool_call_info: list[ToolCallKickoff | ToolResponse | ToolCallFinalResult]
-    using_tool_calling_llm: bool
-
-    class Config:
-        arbitrary_types_allowed = True
-
-
-class LLMResponseHandlerManager:
-    def __init__(
-        self,
-        tool_handler: "ToolResponseHandler",
-        answer_handler: "AnswerResponseHandler",
-        is_cancelled: Callable[[], bool],
-    ):
-        self.tool_handler = tool_handler
-        self.answer_handler = answer_handler
-        self.is_cancelled = is_cancelled
-
-    def handle_llm_response(
-        self,
-        stream: Iterator[BaseMessage],
-    ) -> Generator[ResponsePart, None, None]:
-        all_messages: list[BaseMessage] = []
-        for message in stream:
-            if self.is_cancelled():
-                yield StreamStopInfo(stop_reason=StreamStopReason.CANCELLED)
-                return
-            # tool handler doesn't do anything until the full message is received
-            # NOTE: still need to run list() to get this to run
-            list(self.tool_handler.handle_response_part(message, all_messages))
-            yield from self.answer_handler.handle_response_part(message, all_messages)
-            all_messages.append(message)
-
-        # potentially give back all info on the selected tool call + its result
-        yield from self.tool_handler.handle_response_part(None, all_messages)
-        yield from self.answer_handler.handle_response_part(None, all_messages)
-
-    def next_llm_call(self, llm_call: LLMCall) -> LLMCall | None:
-        return self.tool_handler.next_llm_call(llm_call)
--- a/backend/danswer/llm/answering/models.py
+++ b/backend/danswer/llm/answering/models.py
@@ -1,163 +0,0 @@
-from collections.abc import Callable
-from collections.abc import Iterator
-from typing import TYPE_CHECKING
-
-from langchain.schema.messages import AIMessage
-from langchain.schema.messages import BaseMessage
-from langchain.schema.messages import HumanMessage
-from langchain.schema.messages import SystemMessage
-from pydantic import BaseModel
-from pydantic import ConfigDict
-from pydantic import Field
-from pydantic import model_validator
-
-from danswer.chat.models import AnswerQuestionStreamReturn
-from danswer.configs.constants import MessageType
-from danswer.file_store.models import InMemoryChatFile
-from danswer.llm.override_models import PromptOverride
-from danswer.llm.utils import build_content_with_imgs
-from danswer.tools.models import ToolCallFinalResult
-
-if TYPE_CHECKING:
-    from danswer.db.models import ChatMessage
-    from danswer.db.models import Prompt
-
-
-StreamProcessor = Callable[[Iterator[str]], AnswerQuestionStreamReturn]
-
-
-class PreviousMessage(BaseModel):
-    """Simplified version of `ChatMessage`"""
-
-    message: str
-    token_count: int
-    message_type: MessageType
-    files: list[InMemoryChatFile]
-    tool_call: ToolCallFinalResult | None
-
-    @classmethod
-    def from_chat_message(
-        cls, chat_message: "ChatMessage", available_files: list[InMemoryChatFile]
-    ) -> "PreviousMessage":
-        message_file_ids = (
-            [file["id"] for file in chat_message.files] if chat_message.files else []
-        )
-        return cls(
-            message=chat_message.message,
-            token_count=chat_message.token_count,
-            message_type=chat_message.message_type,
-            files=[
-                file
-                for file in available_files
-                if str(file.file_id) in message_file_ids
-            ],
-            tool_call=ToolCallFinalResult(
-                tool_name=chat_message.tool_call.tool_name,
-                tool_args=chat_message.tool_call.tool_arguments,
-                tool_result=chat_message.tool_call.tool_result,
-            )
-            if chat_message.tool_call
-            else None,
-        )
-
-    def to_langchain_msg(self) -> BaseMessage:
-        content = build_content_with_imgs(self.message, self.files)
-        if self.message_type == MessageType.USER:
-            return HumanMessage(content=content)
-        elif self.message_type == MessageType.ASSISTANT:
-            return AIMessage(content=content)
-        else:
-            return SystemMessage(content=content)
-
-
-class DocumentPruningConfig(BaseModel):
-    max_chunks: int | None = None
-    max_window_percentage: float | None = None
-    max_tokens: int | None = None
-    # different pruning behavior is expected when the
-    # user manually selects documents they want to chat with
-    # e.g. we don't want to truncate each document to be no more
-    # than one chunk long
-    is_manually_selected_docs: bool = False
-    # If user specifies to include additional context Chunks for each match, then different pruning
-    # is used. As many Sections as possible are included, and the last Section is truncated
-    # If this is false, all of the Sections are truncated if they are longer than the expected Chunk size.
-    # Sections are often expected to be longer than the maximum Chunk size but Chunks should not be.
-    use_sections: bool = True
-    # If using tools, then we need to consider the tool length
-    tool_num_tokens: int = 0
-    # If using a tool message to represent the docs, then we have to JSON serialize
-    # the document content, which adds to the token count.
-    using_tool_message: bool = False
-
-
-class ContextualPruningConfig(DocumentPruningConfig):
-    num_chunk_multiple: int
-
-    @classmethod
-    def from_doc_pruning_config(
-        cls, num_chunk_multiple: int, doc_pruning_config: DocumentPruningConfig
-    ) -> "ContextualPruningConfig":
-        return cls(num_chunk_multiple=num_chunk_multiple, **doc_pruning_config.dict())
-
-
-class CitationConfig(BaseModel):
-    all_docs_useful: bool = False
-
-
-class QuotesConfig(BaseModel):
-    pass
-
-
-class AnswerStyleConfig(BaseModel):
-    citation_config: CitationConfig | None = None
-    quotes_config: QuotesConfig | None = None
-    document_pruning_config: DocumentPruningConfig = Field(
-        default_factory=DocumentPruningConfig
-    )
-    # forces the LLM to return a structured response, see
-    # https://platform.openai.com/docs/guides/structured-outputs/introduction
-    # right now, only used by the simple chat API
-    structured_response_format: dict | None = None
-
-    @model_validator(mode="after")
-    def check_quotes_and_citation(self) -> "AnswerStyleConfig":
-        if self.citation_config is None and self.quotes_config is None:
-            raise ValueError(
-                "One of `citation_config` or `quotes_config` must be provided"
-            )
-
-        if self.citation_config is not None and self.quotes_config is not None:
-            raise ValueError(
-                "Only one of `citation_config` or `quotes_config` must be provided"
-            )
-
-        return self
-
-
-class PromptConfig(BaseModel):
-    """Final representation of the Prompt configuration passed
-    into the `Answer` object."""
-
-    system_prompt: str
-    task_prompt: str
-    datetime_aware: bool
-    include_citations: bool
-
-    @classmethod
-    def from_model(
-        cls, model: "Prompt", prompt_override: PromptOverride | None = None
-    ) -> "PromptConfig":
-        override_system_prompt = (
-            prompt_override.system_prompt if prompt_override else None
-        )
-        override_task_prompt = prompt_override.task_prompt if prompt_override else None
-
-        return cls(
-            system_prompt=override_system_prompt or model.system_prompt,
-            task_prompt=override_task_prompt or model.task_prompt,
-            datetime_aware=model.datetime_aware,
-            include_citations=model.include_citations,
-        )
-
-    model_config = ConfigDict(frozen=True)
--- a/backend/danswer/llm/answering/prompts/utils.py
+++ b/backend/danswer/llm/answering/prompts/utils.py
@@ -1,20 +0,0 @@
-from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT
-from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTEXT
-
-
-def build_dummy_prompt(
-    system_prompt: str, task_prompt: str, retrieval_disabled: bool
-) -> str:
-    if retrieval_disabled:
-        return PARAMATERIZED_PROMPT_WITHOUT_CONTEXT.format(
-            user_query="<USER_QUERY>",
-            system_prompt=system_prompt,
-            task_prompt=task_prompt,
-        ).strip()
-
-    return PARAMATERIZED_PROMPT.format(
-        context_docs_str="<CONTEXT_DOCS>",
-        user_query="<USER_QUERY>",
-        system_prompt=system_prompt,
-        task_prompt=task_prompt,
-    ).strip()
--- a/backend/danswer/llm/answering/stream_processing/answer_response_handler.py
+++ b/backend/danswer/llm/answering/stream_processing/answer_response_handler.py
@@ -1,97 +0,0 @@
-import abc
-from collections.abc import Generator
-
-from langchain_core.messages import BaseMessage
-
-from danswer.chat.models import CitationInfo
-from danswer.chat.models import LlmDoc
-from danswer.llm.answering.llm_response_handler import ResponsePart
-from danswer.llm.answering.stream_processing.citation_processing import (
-    CitationProcessor,
-)
-from danswer.llm.answering.stream_processing.quotes_processing import (
-    QuotesProcessor,
-)
-from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class AnswerResponseHandler(abc.ABC):
-    @abc.abstractmethod
-    def handle_response_part(
-        self,
-        response_item: BaseMessage | None,
-        previous_response_items: list[BaseMessage],
-    ) -> Generator[ResponsePart, None, None]:
-        raise NotImplementedError
-
-
-class DummyAnswerResponseHandler(AnswerResponseHandler):
-    def handle_response_part(
-        self,
-        response_item: BaseMessage | None,
-        previous_response_items: list[BaseMessage],
-    ) -> Generator[ResponsePart, None, None]:
-        # This is a dummy handler that returns nothing
-        yield from []
-
-
-class CitationResponseHandler(AnswerResponseHandler):
-    def __init__(
-        self, context_docs: list[LlmDoc], doc_id_to_rank_map: DocumentIdOrderMapping
-    ):
-        self.context_docs = context_docs
-        self.doc_id_to_rank_map = doc_id_to_rank_map
-        self.citation_processor = CitationProcessor(
-            context_docs=self.context_docs,
-            doc_id_to_rank_map=self.doc_id_to_rank_map,
-        )
-        self.processed_text = ""
-        self.citations: list[CitationInfo] = []
-
-        # TODO remove this after citation issue is resolved
-        logger.debug(f"Document to ranking map {self.doc_id_to_rank_map}")
-
-    def handle_response_part(
-        self,
-        response_item: BaseMessage | None,
-        previous_response_items: list[BaseMessage],
-    ) -> Generator[ResponsePart, None, None]:
-        if response_item is None:
-            return
-
-        content = (
-            response_item.content if isinstance(response_item.content, str) else ""
-        )
-
-        # Process the new content through the citation processor
-        yield from self.citation_processor.process_token(content)
-
-
-class QuotesResponseHandler(AnswerResponseHandler):
-    def __init__(
-        self,
-        context_docs: list[LlmDoc],
-        is_json_prompt: bool = True,
-    ):
-        self.quotes_processor = QuotesProcessor(
-            context_docs=context_docs,
-            is_json_prompt=is_json_prompt,
-        )
-
-    def handle_response_part(
-        self,
-        response_item: BaseMessage | None,
-        previous_response_items: list[BaseMessage],
-    ) -> Generator[ResponsePart, None, None]:
-        if response_item is None:
-            yield from self.quotes_processor.process_token(None)
-            return
-
-        content = (
-            response_item.content if isinstance(response_item.content, str) else ""
-        )
-
-        yield from self.quotes_processor.process_token(content)
--- a/backend/danswer/one_shot_answer/answer_question.py
+++ b/backend/danswer/one_shot_answer/answer_question.py
@@ -1,456 +0,0 @@
-from collections.abc import Callable
-from collections.abc import Iterator
-from typing import cast
-
-from sqlalchemy.orm import Session
-
-from danswer.chat.chat_utils import reorganize_citations
-from danswer.chat.models import CitationInfo
-from danswer.chat.models import DanswerAnswerPiece
-from danswer.chat.models import DanswerContexts
-from danswer.chat.models import DanswerQuotes
-from danswer.chat.models import DocumentRelevance
-from danswer.chat.models import LLMRelevanceFilterResponse
-from danswer.chat.models import QADocsResponse
-from danswer.chat.models import RelevanceAnalysis
-from danswer.chat.models import StreamingError
-from danswer.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
-from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
-from danswer.configs.chat_configs import QA_TIMEOUT
-from danswer.configs.constants import MessageType
-from danswer.context.search.enums import LLMEvaluationType
-from danswer.context.search.models import RerankMetricsContainer
-from danswer.context.search.models import RetrievalMetricsContainer
-from danswer.context.search.utils import chunks_or_sections_to_search_docs
-from danswer.context.search.utils import dedupe_documents
-from danswer.db.chat import create_chat_session
-from danswer.db.chat import create_db_search_doc
-from danswer.db.chat import create_new_chat_message
-from danswer.db.chat import get_or_create_root_message
-from danswer.db.chat import translate_db_message_to_chat_message_detail
-from danswer.db.chat import translate_db_search_doc_to_server_search_doc
-from danswer.db.chat import update_search_docs_table_with_relevance
-from danswer.db.engine import get_session_context_manager
-from danswer.db.models import Persona
-from danswer.db.models import User
-from danswer.db.persona import get_prompt_by_id
-from danswer.llm.answering.answer import Answer
-from danswer.llm.answering.models import AnswerStyleConfig
-from danswer.llm.answering.models import CitationConfig
-from danswer.llm.answering.models import DocumentPruningConfig
-from danswer.llm.answering.models import PromptConfig
-from danswer.llm.answering.models import QuotesConfig
-from danswer.llm.factory import get_llms_for_persona
-from danswer.llm.factory import get_main_llm_from_tuple
-from danswer.natural_language_processing.utils import get_tokenizer
-from danswer.one_shot_answer.models import DirectQARequest
-from danswer.one_shot_answer.models import OneShotQAResponse
-from danswer.one_shot_answer.models import QueryRephrase
-from danswer.one_shot_answer.qa_utils import combine_message_thread
-from danswer.one_shot_answer.qa_utils import slackify_message_thread
-from danswer.secondary_llm_flows.answer_validation import get_answer_validity
-from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase
-from danswer.server.query_and_chat.models import ChatMessageDetail
-from danswer.server.utils import get_json_line
-from danswer.tools.force import ForceUseTool
-from danswer.tools.models import ToolResponse
-from danswer.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTENT_ID
-from danswer.tools.tool_implementations.search.search_tool import (
-    SEARCH_RESPONSE_SUMMARY_ID,
-)
-from danswer.tools.tool_implementations.search.search_tool import SearchResponseSummary
-from danswer.tools.tool_implementations.search.search_tool import SearchTool
-from danswer.tools.tool_implementations.search.search_tool import (
-    SECTION_RELEVANCE_LIST_ID,
-)
-from danswer.tools.tool_runner import ToolCallKickoff
-from danswer.utils.logger import setup_logger
-from danswer.utils.long_term_log import LongTermLogger
-from danswer.utils.timing import log_generator_function_time
-from danswer.utils.variable_functionality import fetch_ee_implementation_or_noop
-
-logger = setup_logger()
-
-AnswerObjectIterator = Iterator[
-    QueryRephrase
-    | QADocsResponse
-    | LLMRelevanceFilterResponse
-    | DanswerAnswerPiece
-    | DanswerQuotes
-    | DanswerContexts
-    | StreamingError
-    | ChatMessageDetail
-    | CitationInfo
-    | ToolCallKickoff
-    | DocumentRelevance
-]
-
-
-def stream_answer_objects(
-    query_req: DirectQARequest,
-    user: User | None,
-    # These need to be passed in because in Web UI one shot flow,
-    # we can have much more document as there is no history.
-    # For Slack flow, we need to save more tokens for the thread context
-    max_document_tokens: int | None,
-    max_history_tokens: int | None,
-    db_session: Session,
-    # Needed to translate persona num_chunks to tokens to the LLM
-    default_num_chunks: float = MAX_CHUNKS_FED_TO_CHAT,
-    timeout: int = QA_TIMEOUT,
-    bypass_acl: bool = False,
-    use_citations: bool = False,
-    danswerbot_flow: bool = False,
-    retrieval_metrics_callback: (
-        Callable[[RetrievalMetricsContainer], None] | None
-    ) = None,
-    rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
-) -> AnswerObjectIterator:
-    """Streams in order:
-    1. [always] Retrieved documents, stops flow if nothing is found
-    2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on
-    3. [always] A set of streamed DanswerAnswerPiece and DanswerQuotes at the end
-                or an error anywhere along the line if something fails
-    4. [always] Details on the final AI response message that is created
-    """
-    user_id = user.id if user is not None else None
-    query_msg = query_req.messages[-1]
-    history = query_req.messages[:-1]
-
-    chat_session = create_chat_session(
-        db_session=db_session,
-        description="",  # One shot queries don't need naming as it's never displayed
-        user_id=user_id,
-        persona_id=query_req.persona_id,
-        one_shot=True,
-        danswerbot_flow=danswerbot_flow,
-    )
-
-    # permanent "log" store, used primarily for debugging
-    long_term_logger = LongTermLogger(
-        metadata={"user_id": str(user_id), "chat_session_id": str(chat_session.id)}
-    )
-
-    temporary_persona: Persona | None = None
-
-    if query_req.persona_config is not None:
-        temporary_persona = fetch_ee_implementation_or_noop(
-            "danswer.server.query_and_chat.utils", "create_temporary_persona", None
-        )(db_session=db_session, persona_config=query_req.persona_config, user=user)
-
-    persona = temporary_persona if temporary_persona else chat_session.persona
-
-    try:
-        llm, fast_llm = get_llms_for_persona(
-            persona=persona, long_term_logger=long_term_logger
-        )
-    except ValueError as e:
-        logger.error(
-            f"Failed to initialize LLMs for persona '{persona.name}': {str(e)}"
-        )
-        if "No LLM provider" in str(e):
-            raise ValueError(
-                "Please configure a Generative AI model to use this feature."
-            ) from e
-        raise ValueError(
-            "Failed to initialize the AI model. Please check your configuration and try again."
-        ) from e
-
-    llm_tokenizer = get_tokenizer(
-        model_name=llm.config.model_name,
-        provider_type=llm.config.model_provider,
-    )
-
-    # Create a chat session which will just store the root message, the query, and the AI response
-    root_message = get_or_create_root_message(
-        chat_session_id=chat_session.id, db_session=db_session
-    )
-
-    history_str = combine_message_thread(
-        messages=history,
-        max_tokens=max_history_tokens,
-        llm_tokenizer=llm_tokenizer,
-    )
-
-    rephrased_query = query_req.query_override or thread_based_query_rephrase(
-        user_query=query_msg.message,
-        history_str=history_str,
-    )
-
-    # Given back ahead of the documents for latency reasons
-    # In chat flow it's given back along with the documents
-    yield QueryRephrase(rephrased_query=rephrased_query)
-
-    prompt = None
-    if query_req.prompt_id is not None:
-        # NOTE: let the user access any prompt as long as the Persona is shared
-        # with them
-        prompt = get_prompt_by_id(
-            prompt_id=query_req.prompt_id, user=None, db_session=db_session
-        )
-    if prompt is None:
-        if not persona.prompts:
-            raise RuntimeError(
-                "Persona does not have any prompts - this should never happen"
-            )
-        prompt = persona.prompts[0]
-
-    user_message_str = query_msg.message
-    # For this endpoint, we only save one user message to the chat session
-    # However, for slackbot, we want to include the history of the entire thread
-    if danswerbot_flow:
-        # Right now, we only support bringing over citations and search docs
-        # from the last message in the thread, not the entire thread
-        # in the future, we may want to retrieve the entire thread
-        user_message_str = slackify_message_thread(query_req.messages)
-
-    # Create the first User query message
-    new_user_message = create_new_chat_message(
-        chat_session_id=chat_session.id,
-        parent_message=root_message,
-        prompt_id=query_req.prompt_id,
-        message=user_message_str,
-        token_count=len(llm_tokenizer.encode(user_message_str)),
-        message_type=MessageType.USER,
-        db_session=db_session,
-        commit=True,
-    )
-
-    prompt_config = PromptConfig.from_model(prompt)
-    document_pruning_config = DocumentPruningConfig(
-        max_chunks=int(
-            persona.num_chunks if persona.num_chunks is not None else default_num_chunks
-        ),
-        max_tokens=max_document_tokens,
-    )
-
-    answer_config = AnswerStyleConfig(
-        citation_config=CitationConfig() if use_citations else None,
-        quotes_config=QuotesConfig() if not use_citations else None,
-        document_pruning_config=document_pruning_config,
-    )
-
-    search_tool = SearchTool(
-        db_session=db_session,
-        user=user,
-        evaluation_type=(
-            LLMEvaluationType.SKIP
-            if DISABLE_LLM_DOC_RELEVANCE
-            else query_req.evaluation_type
-        ),
-        persona=persona,
-        retrieval_options=query_req.retrieval_options,
-        prompt_config=prompt_config,
-        llm=llm,
-        fast_llm=fast_llm,
-        pruning_config=document_pruning_config,
-        answer_style_config=answer_config,
-        bypass_acl=bypass_acl,
-        chunks_above=query_req.chunks_above,
-        chunks_below=query_req.chunks_below,
-        full_doc=query_req.full_doc,
-    )
-
-    answer = Answer(
-        question=query_msg.message,
-        answer_style_config=answer_config,
-        prompt_config=PromptConfig.from_model(prompt),
-        llm=get_main_llm_from_tuple(
-            get_llms_for_persona(persona=persona, long_term_logger=long_term_logger)
-        ),
-        single_message_history=history_str,
-        tools=[search_tool] if search_tool else [],
-        force_use_tool=(
-            ForceUseTool(
-                tool_name=search_tool.name,
-                args={"query": rephrased_query},
-                force_use=True,
-            )
-        ),
-        # for now, don't use tool calling for this flow, as we haven't
-        # tested quotes with tool calling too much yet
-        skip_explicit_tool_calling=True,
-        return_contexts=query_req.return_contexts,
-        skip_gen_ai_answer_generation=query_req.skip_gen_ai_answer_generation,
-    )
-    # won't be any FileChatDisplay responses since that tool is never passed in
-    for packet in cast(AnswerObjectIterator, answer.processed_streamed_output):
-        # for one-shot flow, don't currently do anything with these
-        if isinstance(packet, ToolResponse):
-            # (likely fine that it comes after the initial creation of the search docs)
-            if packet.id == SEARCH_RESPONSE_SUMMARY_ID:
-                search_response_summary = cast(SearchResponseSummary, packet.response)
-
-                top_docs = chunks_or_sections_to_search_docs(
-                    search_response_summary.top_sections
-                )
-
-                # Deduping happens at the last step to avoid harming quality by dropping content early on
-                deduped_docs = top_docs
-                if query_req.retrieval_options.dedupe_docs:
-                    deduped_docs, dropped_inds = dedupe_documents(top_docs)
-
-                reference_db_search_docs = [
-                    create_db_search_doc(server_search_doc=doc, db_session=db_session)
-                    for doc in deduped_docs
-                ]
-
-                response_docs = [
-                    translate_db_search_doc_to_server_search_doc(db_search_doc)
-                    for db_search_doc in reference_db_search_docs
-                ]
-
-                initial_response = QADocsResponse(
-                    rephrased_query=rephrased_query,
-                    top_documents=response_docs,
-                    predicted_flow=search_response_summary.predicted_flow,
-                    predicted_search=search_response_summary.predicted_search,
-                    applied_source_filters=search_response_summary.final_filters.source_type,
-                    applied_time_cutoff=search_response_summary.final_filters.time_cutoff,
-                    recency_bias_multiplier=search_response_summary.recency_bias_multiplier,
-                )
-
-                yield initial_response
-
-            elif packet.id == SEARCH_DOC_CONTENT_ID:
-                yield packet.response
-
-            elif packet.id == SECTION_RELEVANCE_LIST_ID:
-                document_based_response = {}
-
-                if packet.response is not None:
-                    for evaluation in packet.response:
-                        document_based_response[
-                            evaluation.document_id
-                        ] = RelevanceAnalysis(
-                            relevant=evaluation.relevant, content=evaluation.content
-                        )
-
-                evaluation_response = DocumentRelevance(
-                    relevance_summaries=document_based_response
-                )
-                if reference_db_search_docs is not None:
-                    update_search_docs_table_with_relevance(
-                        db_session=db_session,
-                        reference_db_search_docs=reference_db_search_docs,
-                        relevance_summary=evaluation_response,
-                    )
-                yield evaluation_response
-
-        else:
-            yield packet
-
-    # Saving Gen AI answer and responding with message info
-    gen_ai_response_message = create_new_chat_message(
-        chat_session_id=chat_session.id,
-        parent_message=new_user_message,
-        prompt_id=query_req.prompt_id,
-        message=answer.llm_answer,
-        token_count=len(llm_tokenizer.encode(answer.llm_answer)),
-        message_type=MessageType.ASSISTANT,
-        error=None,
-        reference_docs=reference_db_search_docs,
-        db_session=db_session,
-        commit=True,
-    )
-
-    msg_detail_response = translate_db_message_to_chat_message_detail(
-        gen_ai_response_message
-    )
-    yield msg_detail_response
-
-
-@log_generator_function_time()
-def stream_search_answer(
-    query_req: DirectQARequest,
-    user: User | None,
-    max_document_tokens: int | None,
-    max_history_tokens: int | None,
-) -> Iterator[str]:
-    with get_session_context_manager() as session:
-        objects = stream_answer_objects(
-            query_req=query_req,
-            user=user,
-            max_document_tokens=max_document_tokens,
-            max_history_tokens=max_history_tokens,
-            db_session=session,
-        )
-        for obj in objects:
-            yield get_json_line(obj.model_dump())
-
-
-def get_search_answer(
-    query_req: DirectQARequest,
-    user: User | None,
-    max_document_tokens: int | None,
-    max_history_tokens: int | None,
-    db_session: Session,
-    answer_generation_timeout: int = QA_TIMEOUT,
-    enable_reflexion: bool = False,
-    bypass_acl: bool = False,
-    use_citations: bool = False,
-    danswerbot_flow: bool = False,
-    retrieval_metrics_callback: (
-        Callable[[RetrievalMetricsContainer], None] | None
-    ) = None,
-    rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
-) -> OneShotQAResponse:
-    """Collects the streamed one shot answer responses into a single object"""
-    qa_response = OneShotQAResponse()
-
-    results = stream_answer_objects(
-        query_req=query_req,
-        user=user,
-        max_document_tokens=max_document_tokens,
-        max_history_tokens=max_history_tokens,
-        db_session=db_session,
-        bypass_acl=bypass_acl,
-        use_citations=use_citations,
-        danswerbot_flow=danswerbot_flow,
-        timeout=answer_generation_timeout,
-        retrieval_metrics_callback=retrieval_metrics_callback,
-        rerank_metrics_callback=rerank_metrics_callback,
-    )
-
-    answer = ""
-    for packet in results:
-        if isinstance(packet, QueryRephrase):
-            qa_response.rephrase = packet.rephrased_query
-        if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece:
-            answer += packet.answer_piece
-        elif isinstance(packet, QADocsResponse):
-            qa_response.docs = packet
-        elif isinstance(packet, LLMRelevanceFilterResponse):
-            qa_response.llm_selected_doc_indices = packet.llm_selected_doc_indices
-        elif isinstance(packet, DanswerQuotes):
-            qa_response.quotes = packet
-        elif isinstance(packet, CitationInfo):
-            if qa_response.citations:
-                qa_response.citations.append(packet)
-            else:
-                qa_response.citations = [packet]
-        elif isinstance(packet, DanswerContexts):
-            qa_response.contexts = packet
-        elif isinstance(packet, StreamingError):
-            qa_response.error_msg = packet.error
-        elif isinstance(packet, ChatMessageDetail):
-            qa_response.chat_message_id = packet.message_id
-
-    if answer:
-        qa_response.answer = answer
-
-    if enable_reflexion:
-        # Because follow up messages are explicitly tagged, we don't need to verify the answer
-        if len(query_req.messages) == 1:
-            first_query = query_req.messages[0].message
-            qa_response.answer_valid = get_answer_validity(first_query, answer)
-        else:
-            qa_response.answer_valid = True
-
-    if use_citations and qa_response.answer and qa_response.citations:
-        # Reorganize citation nums to be in the same order as the answer
-        qa_response.answer, qa_response.citations = reorganize_citations(
-            qa_response.answer, qa_response.citations
-        )
-
-    return qa_response
--- a/backend/danswer/one_shot_answer/models.py
+++ b/backend/danswer/one_shot_answer/models.py
@@ -1,114 +0,0 @@
-from typing import Any
-
-from pydantic import BaseModel
-from pydantic import Field
-from pydantic import model_validator
-
-from danswer.chat.models import CitationInfo
-from danswer.chat.models import DanswerContexts
-from danswer.chat.models import DanswerQuotes
-from danswer.chat.models import QADocsResponse
-from danswer.configs.constants import MessageType
-from danswer.context.search.enums import LLMEvaluationType
-from danswer.context.search.enums import RecencyBiasSetting
-from danswer.context.search.enums import SearchType
-from danswer.context.search.models import ChunkContext
-from danswer.context.search.models import RerankingDetails
-from danswer.context.search.models import RetrievalDetails
-
-
-class QueryRephrase(BaseModel):
-    rephrased_query: str
-
-
-class ThreadMessage(BaseModel):
-    message: str
-    sender: str | None = None
-    role: MessageType = MessageType.USER
-
-
-class PromptConfig(BaseModel):
-    name: str
-    description: str = ""
-    system_prompt: str
-    task_prompt: str = ""
-    include_citations: bool = True
-    datetime_aware: bool = True
-
-
-class ToolConfig(BaseModel):
-    id: int
-
-
-class PersonaConfig(BaseModel):
-    name: str
-    description: str
-    search_type: SearchType = SearchType.SEMANTIC
-    num_chunks: float | None = None
-    llm_relevance_filter: bool = False
-    llm_filter_extraction: bool = False
-    recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO
-    llm_model_provider_override: str | None = None
-    llm_model_version_override: str | None = None
-
-    prompts: list[PromptConfig] = Field(default_factory=list)
-    prompt_ids: list[int] = Field(default_factory=list)
-
-    document_set_ids: list[int] = Field(default_factory=list)
-    tools: list[ToolConfig] = Field(default_factory=list)
-    tool_ids: list[int] = Field(default_factory=list)
-    custom_tools_openapi: list[dict[str, Any]] = Field(default_factory=list)
-
-
-class DirectQARequest(ChunkContext):
-    persona_config: PersonaConfig | None = None
-    persona_id: int | None = None
-
-    messages: list[ThreadMessage]
-    prompt_id: int | None = None
-    multilingual_query_expansion: list[str] | None = None
-    retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
-    rerank_settings: RerankingDetails | None = None
-    evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED
-
-    chain_of_thought: bool = False
-    return_contexts: bool = False
-
-    # allows the caller to specify the exact search query they want to use
-    # can be used if the message sent to the LLM / query should not be the same
-    # will also disable Thread-based Rewording if specified
-    query_override: str | None = None
-
-    # If True, skips generative an AI response to the search query
-    skip_gen_ai_answer_generation: bool = False
-
-    @model_validator(mode="after")
-    def check_persona_fields(self) -> "DirectQARequest":
-        if (self.persona_config is None) == (self.persona_id is None):
-            raise ValueError("Exactly one of persona_config or persona_id must be set")
-        return self
-
-    @model_validator(mode="after")
-    def check_chain_of_thought_and_prompt_id(self) -> "DirectQARequest":
-        if self.chain_of_thought and self.prompt_id is not None:
-            raise ValueError(
-                "If chain_of_thought is True, prompt_id must be None"
-                "The chain of thought prompt is only for question "
-                "answering and does not accept customizing."
-            )
-
-        return self
-
-
-class OneShotQAResponse(BaseModel):
-    # This is built piece by piece, any of these can be None as the flow could break
-    answer: str | None = None
-    rephrase: str | None = None
-    quotes: DanswerQuotes | None = None
-    citations: list[CitationInfo] | None = None
-    docs: QADocsResponse | None = None
-    llm_selected_doc_indices: list[int] | None = None
-    error_msg: str | None = None
-    answer_valid: bool = True  # Reflexion result, default True if Reflexion not run
-    chat_message_id: int | None = None
-    contexts: DanswerContexts | None = None
--- a/backend/danswer/one_shot_answer/qa_utils.py
+++ b/backend/danswer/one_shot_answer/qa_utils.py
@@ -1,81 +0,0 @@
-from collections.abc import Generator
-
-from danswer.configs.constants import MessageType
-from danswer.natural_language_processing.utils import BaseTokenizer
-from danswer.one_shot_answer.models import ThreadMessage
-from danswer.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def simulate_streaming_response(model_out: str) -> Generator[str, None, None]:
-    """Mock streaming by generating the passed in model output, character by character"""
-    for token in model_out:
-        yield token
-
-
-def combine_message_thread(
-    messages: list[ThreadMessage],
-    max_tokens: int | None,
-    llm_tokenizer: BaseTokenizer,
-) -> str:
-    """Used to create a single combined message context from threads"""
-    if not messages:
-        return ""
-
-    message_strs: list[str] = []
-    total_token_count = 0
-
-    for message in reversed(messages):
-        if message.role == MessageType.USER:
-            role_str = message.role.value.upper()
-            if message.sender:
-                role_str += " " + message.sender
-            else:
-                # Since other messages might have the user identifying information
-                # better to use Unknown for symmetry
-                role_str += " Unknown"
-        else:
-            role_str = message.role.value.upper()
-
-        msg_str = f"{role_str}:\n{message.message}"
-        message_token_count = len(llm_tokenizer.encode(msg_str))
-
-        if (
-            max_tokens is not None
-            and total_token_count + message_token_count > max_tokens
-        ):
-            break
-
-        message_strs.insert(0, msg_str)
-        total_token_count += message_token_count
-
-    return "\n\n".join(message_strs)
-
-
-def slackify_message(message: ThreadMessage) -> str:
-    if message.role != MessageType.USER:
-        return message.message
-
-    return f"{message.sender or 'Unknown User'} said in Slack:\n{message.message}"
-
-
-def slackify_message_thread(messages: list[ThreadMessage]) -> str:
-    if not messages:
-        return ""
-
-    message_strs: list[str] = []
-    for message in messages:
-        if message.role == MessageType.USER:
-            message_text = (
-                f"{message.sender or 'Unknown User'} said in Slack:\n{message.message}"
-            )
-        elif message.role == MessageType.ASSISTANT:
-            message_text = f"DanswerBot said in Slack:\n{message.message}"
-        else:
-            message_text = (
-                f"{message.role.value.upper()} said in Slack:\n{message.message}"
-            )
-        message_strs.append(message_text)
-
-    return "\n\n".join(message_strs)
--- a/backend/danswer/seeding/initial_docs.json
+++ b/backend/danswer/seeding/initial_docs.json
--- a/backend/danswer/seeding/initial_docs_cohere.json
+++ b/backend/danswer/seeding/initial_docs_cohere.json
@@ -1,44 +0,0 @@
-[
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/overview",
-    "title": "Use Cases Overview",
-    "content": "How to leverage Danswer in your organization\n\nDanswer Overview\nDanswer is the AI Assistant connected to your organization's docs, apps, and people. Danswer makes Generative AI more versatile for work by enabling new types of questions like \"What is the most common feature request we've heard from customers this month\". Whereas other AI systems have no context of your team and are generally unhelpful with work related questions, Danswer makes it possible to ask these questions in natural language and get back answers in seconds.\n\nDanswer can connect to +30 different tools and the use cases are not limited to the ones in the following pages. The highlighted use cases are for inspiration and come from feedback gathered from our users and customers.\n\n\nCommon Getting Started Questions:\n\nWhy are these docs connected in my Danswer deployment?\nAnswer: This is just an example of how connectors work in Danswer. You can connect up your own team's knowledge and you will be able to ask questions unique to your organization. Danswer will keep all of the knowledge up to date and in sync with your connected applications.\n\nIs my data being sent anywhere when I connect it up to Danswer?\nAnswer: No! Danswer is built with data security as our highest priority. We open sourced it so our users can know exactly what is going on with their data. By default all of the document processing happens within Danswer. The only time it is sent outward is for the GenAI call to generate answers.\n\nWhere is the feature for auto sync-ing document level access permissions from all connected sources?\nAnswer: This falls under the Enterprise Edition set of Danswer features built on top of the MIT/community edition. If you are on Danswer Cloud, you have access to them by default. If you're running it yourself, reach out to the Danswer team to receive access.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/enterprise_search",
-    "title": "Enterprise Search",
-    "content": "Value of Enterprise Search with Danswer\n\nWhat is Enterprise Search and why is it Important?\nAn Enterprise Search system gives team members a single place to access all of the disparate knowledge of an organization. Critical information is saved across a host of channels like call transcripts with prospects, engineering design docs, IT runbooks, customer support email exchanges, project management tickets, and more. As fast moving teams scale up, information gets spread out and more disorganized.\n\nSince it quickly becomes infeasible to check across every source, decisions get made on incomplete information, employee satisfaction decreases, and the most valuable members of your team are tied up with constant distractions as junior teammates are unable to unblock themselves. Danswer solves this problem by letting anyone on the team access all of the knowledge across your organization in a permissioned and secure way. Users can ask questions in natural language and get back answers and documents across all of the connected sources instantly.\n\nWhat's the real cost?\nA typical knowledge worker spends over 2 hours a week on search, but more than that, the cost of incomplete or incorrect information can be extremely high. Customer support/success that isn't able to find the reference to similar cases could cause hours or even days of delay leading to lower customer satisfaction or in the worst case - churn. An account exec not realizing that a prospect had previously mentioned a specific need could lead to lost deals. An engineer not realizing a similar feature had previously been built could result in weeks of wasted development time and tech debt with duplicate implementation. With a lack of knowledge, your whole organization is navigating in the dark - inefficient and mistake prone.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/enterprise_search",
-    "title": "Enterprise Search",
-    "content": "More than Search\nWhen analyzing the entire corpus of knowledge within your company is as easy as asking a question in a search bar, your entire team can stay informed and up to date. Danswer also makes it trivial to identify where knowledge is well documented and where it is lacking. Team members who are centers of knowledge can begin to effectively document their expertise since it is no longer being thrown into a black hole. All of this allows the organization to achieve higher efficiency and drive business outcomes.\n\nWith Generative AI, the entire user experience has evolved as well. For example, instead of just finding similar cases for your customer support team to reference, Danswer breaks down the issue and explains it so that even the most junior members can understand it. This in turn lets them give the most holistic and technically accurate response possible to your customers. On the other end, even the super stars of your sales team will not be able to review 10 hours of transcripts before hopping on that critical call, but Danswer can easily parse through it in mere seconds and give crucial context to help your team close.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/ai_platform",
-    "title": "AI Platform",
-    "content": "Build AI Agents powered by the knowledge and workflows specific to your organization.\n\nBeyond Answers\nAgents enabled by generative AI and reasoning capable models are helping teams to automate their work. Danswer is helping teams make it happen. Danswer provides out of the box user chat sessions, attaching custom tools, handling LLM reasoning, code execution, data analysis, referencing internal knowledge, and much more.\n\nDanswer as a platform is not a no-code agent builder. We are made by developers for developers and this gives your team the full flexibility and power to create agents not constrained by blocks and simple logic paths.\n\nFlexibility and Extensibility\nDanswer is open source and completely whitebox. This not only gives transparency to what happens within the system but also means that your team can directly modify the source code to suit your unique needs.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/customer_support",
-    "title": "Customer Support",
-    "content": "Help your customer support team instantly answer any question across your entire product.\n\nAI Enabled Support\nCustomer support agents have one of the highest breadth jobs. They field requests that cover the entire surface area of the product and need to help your users find success on extremely short timelines. Because they're not the same people who designed or built the system, they often lack the depth of understanding needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team optimize the speed and quality of these critical customer-facing interactions.\n\nThe Importance of Context\nThere are two critical components of AI copilots for customer support. The first is that the AI system needs to be connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as pull requests in a code repository. The second critical component is the ability of the AI system to break down difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able to chat back and forth with the system to build a better understanding.\n\nDanswer takes care of both of these. The system connects up to over 30+ different applications and the knowledge is pulled in constantly so that the information access is always up to date.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/sales",
-    "title": "Sales",
-    "content": "Keep your team up to date on every conversation and update so they can close.\n\nRecall Every Detail\nBeing able to instantly revisit every detail of any call without reading transcripts is helping Sales teams provide more tailored pitches, build stronger relationships, and close more deals. Instead of searching and reading through hours of transcripts in preparation for a call, your team can now ask Danswer \"What specific features was ACME interested in seeing for the demo\". Since your team doesn't have time to read every transcript prior to a call, Danswer provides a more thorough summary because it can instantly parse hundreds of pages and distill out the relevant information. Even for fast lookups it becomes much more convenient - for example to brush up on connection building topics by asking \"What rapport building topic did we chat about in the last call with ACME\".\n\nKnow Every Product Update\nIt is impossible for Sales teams to keep up with every product update. Because of this, when a prospect has a question that the Sales team does not know, they have no choice but to rely on the Product and Engineering orgs to get an authoritative answer. Not only is this distracting to the other teams, it also slows down the time to respond to the prospect (and as we know, time is the biggest killer of deals). With Danswer, it is even possible to get answers live on call because of how fast accessing information becomes. A question like \"Have we shipped the Microsoft AD integration yet?\" can now be answered in seconds meaning that prospects can get answers while on the call instead of asynchronously and sales cycles are reduced as a result.",
-    "chunk_ind": 0
-  },
-  {
-    "url": "https://docs.danswer.dev/more/use_cases/operations",
-    "title": "Operations",
-    "content": "Double the productivity of your Ops teams like IT, HR, etc.\n\nAutomatically Resolve Tickets\nModern teams are leveraging AI to auto-resolve up to 50% of tickets. Whether it is an employee asking about benefits details or how to set up the VPN for remote work, Danswer can help your team help themselves. This frees up your team to do the real impactful work of landing star candidates or improving your internal processes.\n\nAI Aided Onboarding\nOne of the periods where your team needs the most help is when they're just ramping up. Instead of feeling lost in dozens of new tools, Danswer gives them a single place where they can ask about anything in natural language. Whether it's how to set up their work environment or what their onboarding goals are, Danswer can walk them through every step with the help of Generative AI. This lets your team feel more empowered and gives time back to the more seasoned members of your team to focus on moving the needle.",
-    "chunk_ind": 0
-  }
-]
--- a/backend/danswer/server/features/prompt/api.py
+++ b/backend/danswer/server/features/prompt/api.py
@@ -1,152 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from sqlalchemy.orm import Session
-from starlette import status
-
-from danswer.auth.users import current_user
-from danswer.db.engine import get_session
-from danswer.db.models import User
-from danswer.db.persona import get_personas_by_ids
-from danswer.db.persona import get_prompt_by_id
-from danswer.db.persona import get_prompts
-from danswer.db.persona import mark_prompt_as_deleted
-from danswer.db.persona import upsert_prompt
-from danswer.server.features.prompt.models import CreatePromptRequest
-from danswer.server.features.prompt.models import PromptSnapshot
-from danswer.utils.logger import setup_logger
-
-
-# Note: As prompts are fairly innocuous/harmless, there are no protections
-# to prevent users from messing with prompts of other users.
-
-logger = setup_logger()
-
-basic_router = APIRouter(prefix="/prompt")
-
-
-def create_update_prompt(
-    prompt_id: int | None,
-    create_prompt_request: CreatePromptRequest,
-    user: User | None,
-    db_session: Session,
-) -> PromptSnapshot:
-    personas = (
-        list(
-            get_personas_by_ids(
-                persona_ids=create_prompt_request.persona_ids,
-                db_session=db_session,
-            )
-        )
-        if create_prompt_request.persona_ids
-        else []
-    )
-
-    prompt = upsert_prompt(
-        prompt_id=prompt_id,
-        user=user,
-        name=create_prompt_request.name,
-        description=create_prompt_request.description,
-        system_prompt=create_prompt_request.system_prompt,
-        task_prompt=create_prompt_request.task_prompt,
-        include_citations=create_prompt_request.include_citations,
-        datetime_aware=create_prompt_request.datetime_aware,
-        personas=personas,
-        db_session=db_session,
-    )
-    return PromptSnapshot.from_model(prompt)
-
-
-@basic_router.post("")
-def create_prompt(
-    create_prompt_request: CreatePromptRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> PromptSnapshot:
-    try:
-        return create_update_prompt(
-            prompt_id=None,
-            create_prompt_request=create_prompt_request,
-            user=user,
-            db_session=db_session,
-        )
-    except ValueError as ve:
-        logger.exception(ve)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Failed to create Persona, invalid info.",
-        )
-    except Exception as e:
-        logger.exception(e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An unexpected error occurred. Please try again later.",
-        )
-
-
-@basic_router.patch("/{prompt_id}")
-def update_prompt(
-    prompt_id: int,
-    update_prompt_request: CreatePromptRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> PromptSnapshot:
-    try:
-        return create_update_prompt(
-            prompt_id=prompt_id,
-            create_prompt_request=update_prompt_request,
-            user=user,
-            db_session=db_session,
-        )
-    except ValueError as ve:
-        logger.exception(ve)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Failed to create Persona, invalid info.",
-        )
-    except Exception as e:
-        logger.exception(e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="An unexpected error occurred. Please try again later.",
-        )
-
-
-@basic_router.delete("/{prompt_id}")
-def delete_prompt(
-    prompt_id: int,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> None:
-    mark_prompt_as_deleted(
-        prompt_id=prompt_id,
-        user=user,
-        db_session=db_session,
-    )
-
-
-@basic_router.get("")
-def list_prompts(
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> list[PromptSnapshot]:
-    user_id = user.id if user is not None else None
-    return [
-        PromptSnapshot.from_model(prompt)
-        for prompt in get_prompts(user_id=user_id, db_session=db_session)
-    ]
-
-
-@basic_router.get("/{prompt_id}")
-def get_prompt(
-    prompt_id: int,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> PromptSnapshot:
-    return PromptSnapshot.from_model(
-        get_prompt_by_id(
-            prompt_id=prompt_id,
-            user=user,
-            db_session=db_session,
-        )
-    )
--- a/backend/danswer/server/features/prompt/models.py
+++ b/backend/danswer/server/features/prompt/models.py
@@ -1,41 +0,0 @@
-from pydantic import BaseModel
-
-from danswer.db.models import Prompt
-
-
-class CreatePromptRequest(BaseModel):
-    name: str
-    description: str
-    system_prompt: str
-    task_prompt: str
-    include_citations: bool = False
-    datetime_aware: bool = False
-    persona_ids: list[int] | None = None
-
-
-class PromptSnapshot(BaseModel):
-    id: int
-    name: str
-    description: str
-    system_prompt: str
-    task_prompt: str
-    include_citations: bool
-    datetime_aware: bool
-    default_prompt: bool
-    # Not including persona info, not needed
-
-    @classmethod
-    def from_model(cls, prompt: Prompt) -> "PromptSnapshot":
-        if prompt.deleted:
-            raise ValueError("Prompt has been deleted")
-
-        return PromptSnapshot(
-            id=prompt.id,
-            name=prompt.name,
-            description=prompt.description,
-            system_prompt=prompt.system_prompt,
-            task_prompt=prompt.task_prompt,
-            include_citations=prompt.include_citations,
-            datetime_aware=prompt.datetime_aware,
-            default_prompt=prompt.default_prompt,
-        )
--- a/backend/danswer/server/manage/get_state.py
+++ b/backend/danswer/server/manage/get_state.py
@@ -1,27 +0,0 @@
-from fastapi import APIRouter
-
-from danswer import __version__
-from danswer.auth.users import user_needs_to_be_verified
-from danswer.configs.app_configs import AUTH_TYPE
-from danswer.server.manage.models import AuthTypeResponse
-from danswer.server.manage.models import VersionResponse
-from danswer.server.models import StatusResponse
-
-router = APIRouter()
-
-
-@router.get("/health")
-def healthcheck() -> StatusResponse:
-    return StatusResponse(success=True, message="ok")
-
-
-@router.get("/auth/type")
-def get_auth_type() -> AuthTypeResponse:
-    return AuthTypeResponse(
-        auth_type=AUTH_TYPE, requires_verification=user_needs_to_be_verified()
-    )
-
-
-@router.get("/version")
-def get_version() -> VersionResponse:
-    return VersionResponse(backend_version=__version__)
--- a/backend/danswer/server/settings/store.py
+++ b/backend/danswer/server/settings/store.py
@@ -1,21 +0,0 @@
-from typing import cast
-
-from danswer.configs.constants import KV_SETTINGS_KEY
-from danswer.key_value_store.factory import get_kv_store
-from danswer.key_value_store.interface import KvKeyNotFoundError
-from danswer.server.settings.models import Settings
-
-
-def load_settings() -> Settings:
-    dynamic_config_store = get_kv_store()
-    try:
-        settings = Settings(**cast(dict, dynamic_config_store.load(KV_SETTINGS_KEY)))
-    except KvKeyNotFoundError:
-        settings = Settings()
-        dynamic_config_store.store(KV_SETTINGS_KEY, settings.model_dump())
-
-    return settings
-
-
-def store_settings(settings: Settings) -> None:
-    get_kv_store().store(KV_SETTINGS_KEY, settings.model_dump())
--- a/backend/danswer/server/utils.py
+++ b/backend/danswer/server/utils.py
@@ -1,84 +0,0 @@
-import json
-import smtplib
-from datetime import datetime
-from email.mime.multipart import MIMEMultipart
-from email.mime.text import MIMEText
-from textwrap import dedent
-from typing import Any
-
-from danswer.configs.app_configs import SMTP_PASS
-from danswer.configs.app_configs import SMTP_PORT
-from danswer.configs.app_configs import SMTP_SERVER
-from danswer.configs.app_configs import SMTP_USER
-from danswer.configs.app_configs import WEB_DOMAIN
-from danswer.db.models import User
-
-
-class DateTimeEncoder(json.JSONEncoder):
-    """Custom JSON encoder that converts datetime objects to ISO format strings."""
-
-    def default(self, obj: Any) -> Any:
-        if isinstance(obj, datetime):
-            return obj.isoformat()
-        return super().default(obj)
-
-
-def get_json_line(
-    json_dict: dict[str, Any], encoder: type[json.JSONEncoder] = DateTimeEncoder
-) -> str:
-    """
-    Convert a dictionary to a JSON string with datetime handling, and add a newline.
-
-    Args:
-        json_dict: The dictionary to be converted to JSON.
-        encoder: JSON encoder class to use, defaults to DateTimeEncoder.
-
-    Returns:
-        A JSON string representation of the input dictionary with a newline character.
-    """
-    return json.dumps(json_dict, cls=encoder) + "\n"
-
-
-def mask_string(sensitive_str: str) -> str:
-    return "****...**" + sensitive_str[-4:]
-
-
-def mask_credential_dict(credential_dict: dict[str, Any]) -> dict[str, str]:
-    masked_creds = {}
-    for key, val in credential_dict.items():
-        if not isinstance(val, str):
-            raise ValueError(
-                f"Unable to mask credentials of type other than string, cannot process request."
-                f"Recieved type: {type(val)}"
-            )
-
-        masked_creds[key] = mask_string(val)
-    return masked_creds
-
-
-def send_user_email_invite(user_email: str, current_user: User) -> None:
-    msg = MIMEMultipart()
-    msg["Subject"] = "Invitation to Join Danswer Workspace"
-    msg["From"] = current_user.email
-    msg["To"] = user_email
-
-    email_body = dedent(
-        f"""\
-        Hello,
-
-        You have been invited to join a workspace on Danswer.
-
-        To join the workspace, please visit the following link:
-
-        {WEB_DOMAIN}/auth/login
-
-        Best regards,
-        The Danswer Team
-    """
-    )
-
-    msg.attach(MIMEText(email_body, "plain"))
-    with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as smtp_server:
-        smtp_server.starttls()
-        smtp_server.login(SMTP_USER, SMTP_PASS)
-        smtp_server.send_message(msg)
--- a/backend/danswer/tools/tool_implementations/images/prompt.py
+++ b/backend/danswer/tools/tool_implementations/images/prompt.py
@@ -1,21 +0,0 @@
-from langchain_core.messages import HumanMessage
-
-from danswer.llm.utils import build_content_with_imgs
-
-
-IMG_GENERATION_SUMMARY_PROMPT = """
-You have just created the attached images in response to the following query: "{query}".
-
-Can you please summarize them in a sentence or two? Do NOT include image urls or bulleted lists.
-"""
-
-
-def build_image_generation_user_prompt(
-    query: str, img_urls: list[str] | None = None
-) -> HumanMessage:
-    return HumanMessage(
-        content=build_content_with_imgs(
-            message=IMG_GENERATION_SUMMARY_PROMPT.format(query=query).strip(),
-            img_urls=img_urls,
-        )
-    )
--- a/Show More
+++ b/Show More