update

2026-02-17 15:55:45 +00:00 · 2025-04-03 12:45:25 -07:00
1121 changed files with 14011 additions and 53190 deletions
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -25,10 +25,6 @@ inputs:
  tags:
    description: 'Image tags'
    required: true
-  no-cache:
-    description: 'Read from cache'
-    required: false
-    default: 'false'
  cache-from:
    description: 'Cache sources'
    required: false
@@ -59,7 +55,6 @@ runs:
        push: ${{ inputs.push }}
        load: ${{ inputs.load }}
        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}

@@ -82,7 +77,6 @@ runs:
        push: ${{ inputs.push }}
        load: ${{ inputs.load }}
        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}

@@ -105,7 +99,6 @@ runs:
        push: ${{ inputs.push }}
        load: ${{ inputs.load }}
        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}

--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -7,47 +7,18 @@ on:

 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-  
-  # don't tag cloud images with "latest"
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}

 jobs:
  build-and-push:
    # TODO: investigate a matrix build like the web container
    # See https://runs-on.com/runners/linux/
-    runs-on:
-      - runs-on
-      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
-      - run-id=${{ github.run_id }}
-      - tag=platform-${{ matrix.platform }}
-    strategy:
-      fail-fast: false
-      matrix:
-        platform:
-          - linux/amd64
-          - linux/arm64
-          
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+
    steps:
-      - name: Prepare
-        run: |
-          platform=${{ matrix.platform }}
-          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-          
      - name: Checkout code
        uses: actions/checkout@v4

-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
-            
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@@ -63,80 +34,18 @@ jobs:
          sudo apt-get install -y build-essential

      - name: Backend Image Docker Build and Push
-        id: build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: ./backend
          file: ./backend/Dockerfile
-          platforms: ${{ matrix.platform }}
+          platforms: linux/amd64,linux/arm64
          push: true
+          tags: |
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+            ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}
          build-args: |
            ONYX_VERSION=${{ github.ref_name }}
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

-      - name: Export digest      
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-          
-  merge:
-    runs-on: ubuntu-latest
-    needs:
-      - build-and-push
-    steps:
-      # Needed for trivyignore
-      - name: Checkout
-        uses: actions/checkout@v4
-        
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: backend-digests-*-${{ github.run_id }}
-          merge-multiple: true
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
-          
      # trivy has their own rate limiting issues causing this action to flake
      # we worked around it by hardcoding to different db repos in env
      # can re-enable when they figure it out
@@ -147,8 +56,6 @@ jobs:
        env:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
        with:
          # To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -4,12 +4,12 @@ name: Build and Push Cloud Web Image on Tag
 on:
  push:
    tags:
-      - "*cloud*"
+      - "*"

 env:
  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
-  DEPLOYMENT: cloud
-  
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
+
 jobs:
  build:
    runs-on:
@@ -38,10 +38,9 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
          tags: |
-            type=raw,value=${{ github.ref_name }}
+            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -54,7 +53,7 @@ jobs:

      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: ./web
          file: ./web/Dockerfile
@@ -71,12 +70,10 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
+          # needed due to weird interactions with the builds for different platforms
+          no-cache: true
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off

      - name: Export digest
        run: |
@@ -87,7 +84,7 @@ jobs:
      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
-          name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
+          name: digests-${{ env.PLATFORM_PAIR }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1
@@ -101,7 +98,7 @@ jobs:
        uses: actions/download-artifact@v4
        with:
          path: /tmp/digests
-          pattern: cloudweb-digests-*-${{ github.run_id }}
+          pattern: digests-*
          merge-multiple: true

      - name: Set up Docker Buildx
@@ -112,10 +109,6 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
@@ -143,8 +136,6 @@ jobs:
        env:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -7,13 +7,10 @@ on:

 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
  DOCKER_BUILDKIT: 1
  BUILDKIT_PROGRESS: plain
-  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}

-  # don't tag cloud images with "latest"
-  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
-  
 jobs:

 #   Bypassing this for now as the idea of not building is glitching
@@ -54,8 +51,6 @@ jobs:
    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
-    env:
-      PLATFORM_PAIR: linux-amd64
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -80,7 +75,7 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and Push AMD64
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
@@ -91,17 +86,12 @@ jobs:
            DANSWER_VERSION=${{ github.ref_name }}
          outputs: type=registry
          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-#           no-cache: true

  build-arm64:
    needs: [check_model_server_changes]
    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
-    env:
-      PLATFORM_PAIR: linux-arm64
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -126,7 +116,7 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Build and Push ARM64
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: ./backend
          file: ./backend/Dockerfile.model_server
@@ -137,8 +127,6 @@ jobs:
            DANSWER_VERSION=${{ github.ref_name }}
          outputs: type=registry
          provenance: false
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

  merge-and-scan:
    needs: [build-amd64, build-arm64, check_model_server_changes]
@@ -168,8 +156,6 @@ jobs:
        env:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -8,25 +8,9 @@ on:
 env:
  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
-  DEPLOYMENT: standalone

 jobs:
-  precheck:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
-    outputs:
-      should-run: ${{ steps.set-output.outputs.should-run }}
-    steps:
-      - name: Check if tag contains "cloud"
-        id: set-output
-        run: |
-          if [[ "${{ github.ref_name }}" == *cloud* ]]; then
-            echo "should-run=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "should-run=true" >> "$GITHUB_OUTPUT"
-          fi
  build:
-    needs: precheck
-    if: needs.precheck.outputs.should-run == 'true'
    runs-on:
      - runs-on
      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
@@ -53,11 +37,9 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
+            type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -70,7 +52,7 @@ jobs:

      - name: Build and push by digest
        id: build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v5
        with:
          context: ./web
          file: ./web/Dockerfile
@@ -80,13 +62,11 @@ jobs:
            ONYX_VERSION=${{ github.ref_name }}
            NODE_OPTIONS=--max-old-space-size=8192

+          # needed due to weird interactions with the builds for different platforms
+          no-cache: true
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-          # no-cache needed due to weird interactions with the builds for different platforms
-          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
-          
+
      - name: Export digest
        run: |
          mkdir -p /tmp/digests
@@ -96,22 +76,21 @@ jobs:
      - name: Upload digest
        uses: actions/upload-artifact@v4
        with:
-          name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
+          name: digests-${{ env.PLATFORM_PAIR }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1

  merge:
+    runs-on: ubuntu-latest
    needs:
      - build
-    if: needs.precheck.outputs.should-run == 'true'
-    runs-on: ubuntu-latest
    steps:
      - name: Download digests
        uses: actions/download-artifact@v4
        with:
          path: /tmp/digests
-          pattern: web-digests-*-${{ github.run_id }}
+          pattern: digests-*
          merge-multiple: true

      - name: Set up Docker Buildx
@@ -122,11 +101,6 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY_IMAGE }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
@@ -154,8 +128,6 @@ jobs:
        env:
          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
        with:
          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
          severity: "CRITICAL,HIGH"
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -37,11 +37,6 @@ jobs:
          echo "changed=true" >> "$GITHUB_OUTPUT"
        fi

-    # uncomment to force run chart-testing
-#     - name: Force run chart-testing (list-changed)
-#       id: list-changed
-#       run: echo "changed=true" >> $GITHUB_OUTPUT
-        
    # lint all charts if any changes were detected
    - name: Run chart-testing (lint)
      if: steps.list-changed.outputs.changed == 'true'
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -16,55 +16,15 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  PLATFORM_PAIR: linux-amd64

 jobs:
  integration-tests:
    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
+    runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-            backend/requirements/ee.txt
-      - run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/ee.txt
-
-      - name: Generate OpenAPI schema
-        working-directory: ./backend
-        env:
-          PYTHONPATH: "."
-        run: |
-          python scripts/onyx_openapi_schema.py --filename generated/openapi.json
-
-      - name: Generate OpenAPI Python client
-        working-directory: ./backend
-        run: |
-          docker run --rm \
-            -v "${{ github.workspace }}/backend/generated:/local" \
-            openapitools/openapi-generator-cli generate \
-            -i /local/openapi.json \
-            -g python \
-            -o /local/onyx_openapi_client \
-            --package-name onyx_openapi_client
-
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@@ -101,8 +61,8 @@ jobs:
          tags: onyxdotapp/onyx-backend:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build Model Server Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -113,8 +73,8 @@ jobs:
          tags: onyxdotapp/onyx-model-server:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -125,8 +85,8 @@ jobs:
          tags: onyxdotapp/onyx-integration:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      # Start containers for multi-tenant tests
      - name: Start Docker containers for multi-tenant tests
@@ -153,8 +113,6 @@ jobs:
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
            -e POSTGRES_DB=postgres \
            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
@@ -200,7 +158,6 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          INTEGRATION_TESTS_MODE=true \
-          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
        id: start_docker

@@ -253,8 +210,6 @@ jobs:
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
            -e POSTGRES_DB=postgres \
            -e POSTGRES_POOL_PRE_PING=true \
            -e POSTGRES_USE_NULL_POOL=true \
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -16,52 +16,15 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  PLATFORM_PAIR: linux-amd64
+
 jobs:
  integration-tests-mit:
    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
+    runs-on: [runs-on, runner=32cpu-linux-x64, "run-id=${{ github.run_id }}"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
-        
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-      - run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

-      - name: Generate OpenAPI schema
-        working-directory: ./backend
-        env:
-          PYTHONPATH: "."
-        run: |
-          python scripts/onyx_openapi_schema.py --filename generated/openapi.json
-
-      - name: Generate OpenAPI Python client
-        working-directory: ./backend
-        run: |
-          docker run --rm \
-            -v "${{ github.workspace }}/backend/generated:/local" \
-            openapitools/openapi-generator-cli generate \
-            -i /local/openapi.json \
-            -g python \
-            -o /local/onyx_openapi_client \
-            --package-name onyx_openapi_client
-            
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

@@ -98,8 +61,8 @@ jobs:
          tags: onyxdotapp/onyx-backend:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build Model Server Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -110,8 +73,8 @@ jobs:
          tags: onyxdotapp/onyx-model-server:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
@@ -122,8 +85,8 @@ jobs:
          tags: onyxdotapp/onyx-integration:test
          push: false
          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
@@ -189,8 +152,6 @@ jobs:
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
            -e POSTGRES_POOL_PRE_PING=true \
            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -10,7 +10,6 @@ env:
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  MOCK_LLM_RESPONSE: true
-  PYTEST_PLAYWRIGHT_SKIP_INITIAL_RESET: true

 jobs:
  playwright-tests:
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -31,33 +31,20 @@ jobs:
        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt

-    - name: Generate OpenAPI schema
-      working-directory: ./backend
-      env:
-        PYTHONPATH: "."
-      run: |
-        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
-
-    - name: Generate OpenAPI Python client
-      working-directory: ./backend
-      run: |
-        docker run --rm \
-          -v "${{ github.workspace }}/backend/generated:/local" \
-          openapitools/openapi-generator-cli generate \
-          -i /local/openapi.json \
-          -g python \
-          -o /local/onyx_openapi_client \
-          --package-name onyx_openapi_client
-            
    - name: Run MyPy
      run: |
        cd backend
        mypy .

+    - name: Run ruff
+      run: |
+        cd backend
+        ruff .
+
    - name: Check import order with reorder-python-imports
      run: |
        cd backend
-        find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+        find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus

    - name: Check code formatting with Black
      run: |
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -12,7 +12,7 @@ env:
  # AWS
  AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
  AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
-
+  
  # Confluence
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
@@ -20,72 +20,46 @@ env:
  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-
  # Jira
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-
-  # Gong
-  GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
-  GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
-
  # Google
  GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
  GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
  GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
-
  # Slab
  SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
-
  # Zendesk
  ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
  ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
  ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
-
  # Salesforce
  SF_USERNAME: ${{ secrets.SF_USERNAME }}
  SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
  SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
-
  # Airtable
  AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
  AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
  AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
  AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
-
  # Sharepoint
  SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
-
  # Github
  ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
-
-  # Gitlab
-  GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
-
  # Gitbook
  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
-
  # Notion
  NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
-
  # Highspot
  HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
  HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}

-  # Slack
-  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
-  # Teams
-  TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
-  TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
-  TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}
-
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
@@ -117,15 +91,7 @@ jobs:

      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
-        run: |
-          py.test \
-            -n 8 \
-            --dist loadfile \
-            --durations=8 \
-            -o junit_family=xunit2 \
-            -xv \
-            --ff \
-            backend/tests/daily/connectors
+        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors

      - name: Alert on Failure
        if: failure() && github.event_name == 'schedule'
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -15,9 +15,6 @@ jobs:
    env:
      PYTHONPATH: ./backend
      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
-      SF_USERNAME: ${{ secrets.SF_USERNAME }}
-      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
-      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
      
    steps:
    - name: Checkout code
--- a/.gitignore
+++ b/.gitignore
@@ -1,28 +1,12 @@
-# editors
-.vscode
-.zed
-
-# macos
+.env
 .DS_store
-
-# python
 .venv
 .mypy_cache
 .idea
-
-# testing
+/deployment/data/nginx/app.conf
+.vscode/
+*.sw?
+/backend/tests/regression/answer_quality/search_test_config.yaml
 /web/test-results/
 backend/onyx/agent_search/main/test_data.json
 backend/tests/regression/answer_quality/test_data.json
-backend/tests/regression/search_quality/eval-*
-backend/tests/regression/search_quality/search_eval_config.yaml
-backend/tests/regression/search_quality/*.json
-
-# secret files
-.env
-jira_test_env
-
-# others
-/deployment/data/nginx/app.conf
-*.sw?
-/backend/tests/regression/answer_quality/search_test_config.yaml
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,13 +1,12 @@
 repos:
  - repo: https://github.com/psf/black
-    rev: 25.1.0
+    rev: 23.3.0
    hooks:
    - id: black
      language_version: python3.11

-  # this is a fork which keeps compatibility with black
-  - repo: https://github.com/wimglenn/reorder-python-imports-black
-    rev: v3.14.0
+  - repo: https://github.com/asottile/reorder_python_imports
+    rev: v3.9.0
    hooks:
    - id: reorder-python-imports
      args: ['--py311-plus', '--application-directories=backend/']
@@ -19,14 +18,14 @@ repos:
  # These settings will remove unused imports with side effects
  # Note: The repo currently does not and should not have imports with side effects
  - repo: https://github.com/PyCQA/autoflake
-    rev: v2.3.1
+    rev: v2.2.0
    hooks:
      - id: autoflake
        args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']

  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.11.4
+    rev: v0.0.286
    hooks:
      - id: ruff
  - repo: https://github.com/pre-commit/mirrors-prettier
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -412,46 +412,6 @@
          "group": "3"
        }
      },
-    {
-      // script to generate the openapi schema
-      "name": "Onyx OpenAPI Schema Generator",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "scripts/onyx_openapi_schema.py",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.env",
-      "env": {
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "--filename",
-        "generated/openapi.json",
-      ]
-    },
-    {
-      // script to debug multi tenant db issues
-      "name": "Onyx DB Manager (Top Chunks)",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "scripts/debugging/onyx_db.py",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.env",
-      "env": {
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "--password",
-        "your_password_here",
-        "--port",
-        "5433",
-        "--report",
-        "top-chunks",
-        "--filename",
-        "generated/tenants_by_num_docs.csv"
-      ]
-    },
      {
        "name": "Debug React Web App in Chrome",
        "type": "chrome",
--- a/.vscode/tasks.template.jsonc
+++ b/.vscode/tasks.template.jsonc
@@ -1,101 +0,0 @@
-{
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "type": "austin",
-            "label": "Profile celery beat",
-            "envFile": "${workspaceFolder}/.env",
-            "options": {
-              "cwd": "${workspaceFolder}/backend"
-            },
-            "command": [
-                "sudo",
-                "-E"
-            ],
-            "args": [
-              "celery",
-              "-A",
-              "onyx.background.celery.versioned_apps.beat",
-              "beat",
-              "--loglevel=INFO"
-            ]
-        },
-        {
-            "type": "shell",
-            "label": "Generate Onyx OpenAPI Python client",
-            "cwd": "${workspaceFolder}/backend",
-            "envFile": "${workspaceFolder}/.env",
-            "options": {
-              "cwd": "${workspaceFolder}/backend"
-            },
-            "command": [
-                "openapi-generator"
-            ],
-            "args": [
-                "generate",
-                "-i",
-                "generated/openapi.json",
-                "-g",
-                "python",
-                "-o",
-                "generated/onyx_openapi_client",
-                "--package-name",
-                "onyx_openapi_client",
-            ]
-        },
-        {
-            "type": "shell",
-            "label": "Generate Typescript Fetch client (openapi-generator)",
-            "envFile": "${workspaceFolder}/.env",
-            "options": {
-              "cwd": "${workspaceFolder}"
-            },
-            "command": [
-                "openapi-generator"
-            ],
-            "args": [
-                "generate",
-                "-i",
-                "backend/generated/openapi.json",
-                "-g",
-                "typescript-fetch",
-                "-o",
-                "${workspaceFolder}/web/src/lib/generated/onyx_api",
-                "--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
-            ]
-        },
-        {
-            "type": "shell",
-            "label": "Generate TypeScript Client (openapi-ts)",
-            "envFile": "${workspaceFolder}/.env",
-            "options": {
-              "cwd": "${workspaceFolder}/web"
-            },
-            "command": [
-                "npx"
-            ],
-            "args": [
-                "openapi-typescript",
-                "../backend/generated/openapi.json",
-                "--output",
-                "./src/lib/generated/onyx-schema.ts",
-            ]
-        },
-        {
-            "type": "shell",
-            "label": "Generate TypeScript Client (orval)",
-            "envFile": "${workspaceFolder}/.env",
-            "options": {
-              "cwd": "${workspaceFolder}/web"
-            },
-            "command": [
-                "npx"
-            ],
-            "args": [
-            	"orval",
-                "--config",
-                "orval.config.js",
-            ]
-        }
-    ]
-}
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->

 # Contributing to Onyx

@@ -12,8 +12,8 @@ As an open source project in a rapidly changing space, we welcome all contributi

 The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.

-To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
-via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
+To ensure that your contribution is aligned with the project's direction, please reach out to Hagen (or any other maintainer) on the Onyx team
+via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
 [Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).

 Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
@@ -28,7 +28,7 @@ Your input is vital to making sure that Onyx moves in the right direction.
 Before starting on implementation, please raise a GitHub issue.

 Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
-[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
+[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA) /
 [Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.

 ### Contributing Code
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->

 <a name="readme-top"></a>

@@ -13,7 +13,7 @@
 <a href="https://docs.onyx.app/" target="_blank">
    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
 </a>
-<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
+<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA" target="_blank">
    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
 </a>
 <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -9,6 +9,4 @@ api_keys.py
 vespa-app.zip
 dynamic_config_storage/
 celerybeat-schedule*
-onyx/connectors/salesforce/data/
-.test.env
-/generated
+onyx/connectors/salesforce/data/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -37,8 +37,7 @@ RUN apt-get update && \
        pkg-config \
        gcc \
        nano \
-        vim \
-        postgresql-client && \
+        vim && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

@@ -86,7 +85,7 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
 # Pre-downloading NLTK for setups with limited egress
 RUN python -c "import nltk; \
 nltk.download('stopwords', quiet=True); \
-nltk.download('punkt_tab', quiet=True);"
+nltk.download('punkt', quiet=True);"
 # nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed

 # Set up application files
--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -1,4 +1,4 @@
-<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
+<!-- DANSWER_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->

 # Alembic DB Migrations

--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -24,7 +24,6 @@ from onyx.configs.constants import SSL_CERT_FILE
 from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
 from onyx.db.models import Base
 from celery.backends.database.session import ResultModelBase  # type: ignore
-from onyx.db.engine import SqlEngine

 # Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
 # hidden! (defaults to level=WARN)
@@ -148,9 +147,6 @@ async def run_async_migrations() -> None:
        continue_on_error,
    ) = get_schema_options()

-    # without init_engine, subsequent engine calls fail hard intentionally
-    SqlEngine.init_engine(pool_size=20, max_overflow=5)
-
    engine = create_async_engine(
        build_connection_string(),
        poolclass=pool.NullPool,
@@ -184,10 +180,10 @@ async def run_async_migrations() -> None:
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
-                    logger.error("--continue=true is not set, raising exception!")
+                    logger.error("--continue is not set, raising exception!")
                    raise

-                logger.warning("--continue=true is set, continuing to next schema.")
+                logger.warning("--continue is set, continuing to next schema.")

    else:
        try:
@@ -206,21 +202,10 @@ async def run_async_migrations() -> None:


 def run_migrations_offline() -> None:
-    """
-    NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
-    instead of migrating the db live via an open connection
-
-    Not clear on when this would be used by us or if it even works.
-
-    If it is offline, then why are there calls to the db engine?
-
-    This doesn't really get used when we migrate in the cloud."""
+    """This doesn't really get used when we migrate in the cloud."""

    logger.info("run_migrations_offline starting.")

-    # without init_engine, subsequent engine calls fail hard intentionally
-    SqlEngine.init_engine(pool_size=20, max_overflow=5)
-
    schema_name, _, upgrade_all_tenants, continue_on_error = get_schema_options()
    url = build_connection_string()

--- a/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
+++ b/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
@@ -5,7 +5,6 @@ Revises: 6fc7886d665d
 Create Date: 2025-01-14 12:14:00.814390

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
+++ b/backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
@@ -1,121 +0,0 @@
-"""rework-kg-config
-
-Revision ID: 03bf8be6b53a
-Revises: 65bc6e0f8500
-Create Date: 2025-06-16 10:52:34.815335
-
-"""
-
-import json
-
-
-from datetime import datetime
-from datetime import timedelta
-from sqlalchemy.dialects import postgresql
-from sqlalchemy import text
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "03bf8be6b53a"
-down_revision = "65bc6e0f8500"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # get current config
-    current_configs = (
-        op.get_bind()
-        .execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
-        .all()
-    )
-    current_config_dict = {
-        config.kg_variable_name: (
-            config.kg_variable_values[0]
-            if config.kg_variable_name
-            not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
-            else config.kg_variable_values
-        )
-        for config in current_configs
-        if config.kg_variable_values
-    }
-
-    # not using the KGConfigSettings model here in case it changes in the future
-    kg_config_settings = json.dumps(
-        {
-            "KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
-            "KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
-            "KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
-            "KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
-            "KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
-                "KG_IGNORE_EMAIL_DOMAINS", []
-            ),
-            "KG_COVERAGE_START": current_config_dict.get(
-                "KG_COVERAGE_START",
-                (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
-            ),
-            "KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
-            "KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
-                "KG_MAX_PARENT_RECURSION_DEPTH", 2
-            ),
-            "KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
-        }
-    )
-    op.execute(
-        f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
-    )
-
-    # drop kg config table
-    op.drop_table("kg_config")
-
-
-def downgrade() -> None:
-    # get current config
-    current_config_dict = {
-        "KG_EXPOSED": False,
-        "KG_ENABLED": False,
-        "KG_VENDOR": [],
-        "KG_VENDOR_DOMAINS": [],
-        "KG_IGNORE_EMAIL_DOMAINS": [],
-        "KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
-        "KG_MAX_COVERAGE_DAYS": 90,
-        "KG_MAX_PARENT_RECURSION_DEPTH": 2,
-    }
-    current_configs = (
-        op.get_bind()
-        .execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
-        .one_or_none()
-    )
-    if current_configs is not None:
-        current_config_dict.update(current_configs[0])
-    insert_values = [
-        {
-            "kg_variable_name": name,
-            "kg_variable_values": (
-                [str(val).lower() if isinstance(val, bool) else str(val)]
-                if not isinstance(val, list)
-                else val
-            ),
-        }
-        for name, val in current_config_dict.items()
-    ]
-
-    op.create_table(
-        "kg_config",
-        sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
-        sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
-        sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
-        sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
-    )
-    op.bulk_insert(
-        sa.table(
-            "kg_config",
-            sa.column("kg_variable_name", sa.String),
-            sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
-        ),
-        insert_values,
-    )
-
-    op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")
--- a/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py
+++ b/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py
@@ -5,7 +5,6 @@ Revises: 8a87bd6ec550
 Create Date: 2024-07-23 11:12:39.462397

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
+++ b/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
@@ -5,7 +5,6 @@ Revises: 5f4b8568a221
 Create Date: 2024-03-02 23:23:49.960309

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
+++ b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
@@ -5,7 +5,6 @@ Revises: 570282d33c49
 Create Date: 2024-05-05 19:30:34.317972

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.sql import table
--- a/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
+++ b/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
@@ -5,7 +5,6 @@ Revises: 52a219fb5233
 Create Date: 2024-09-10 15:03:48.233926

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
+++ b/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
@@ -5,7 +5,6 @@ Revises: 369644546676
 Create Date: 2025-01-10 14:01:14.067144

 """
-
 from alembic import op

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py
+++ b/backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py
@@ -5,7 +5,6 @@ Revises: 77d07dffae64
 Create Date: 2023-11-11 20:51:24.228999

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/173cae5bba26_port_config_store.py
+++ b/backend/alembic/versions/173cae5bba26_port_config_store.py
@@ -5,7 +5,6 @@ Revises: e50154680a5c
 Create Date: 2024-03-19 15:30:44.425436

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
+++ b/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
@@ -5,7 +5,6 @@ Revises: 4ee1287bd26a
 Create Date: 2024-11-21 11:49:04.488677

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -5,7 +5,6 @@ Revises: 9c00a2bccb83
 Create Date: 2025-02-18 10:45:13.957807

 """
-
 from alembic import op

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py
+++ b/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py
@@ -5,7 +5,6 @@ Revises: 6756efa39ada
 Create Date: 2024-10-15 19:26:44.071259

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
+++ b/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
@@ -5,7 +5,6 @@ Revises: 35e6853a51d5
 Create Date: 2024-09-18 11:48:59.418726

 """
-
 from alembic import op


--- a/backend/alembic/versions/213fd978c6d8_notifications.py
+++ b/backend/alembic/versions/213fd978c6d8_notifications.py
@@ -5,7 +5,6 @@ Revises: 5fc1f54cc252
 Create Date: 2024-08-10 11:13:36.070790

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py
+++ b/backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py
@@ -1,45 +0,0 @@
-"""Add foreign key to user__external_user_group_id
-
-Revision ID: 238b84885828
-Revises: a7688ab35c45
-Create Date: 2025-05-19 17:15:33.424584
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "238b84885828"
-down_revision = "a7688ab35c45"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # First, clean up any entries that don't have a valid cc_pair_id
-    op.execute(
-        """
-        DELETE FROM user__external_user_group_id
-        WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
-        """
-    )
-
-    # Add foreign key constraint with cascade delete
-    op.create_foreign_key(
-        "fk_user__external_user_group_id_cc_pair_id",
-        "user__external_user_group_id",
-        "connector_credential_pair",
-        ["cc_pair_id"],
-        ["id"],
-        ondelete="CASCADE",
-    )
-
-
-def downgrade() -> None:
-    # Drop the foreign key constraint
-    op.drop_constraint(
-        "fk_user__external_user_group_id_cc_pair_id",
-        "user__external_user_group_id",
-        type_="foreignkey",
-    )
--- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
+++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
@@ -5,7 +5,6 @@ Revises: bc9771dccadf
 Create Date: 2024-06-27 16:04:51.480437

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/2666d766cb9b_google_oauth2.py
+++ b/backend/alembic/versions/2666d766cb9b_google_oauth2.py
@@ -5,7 +5,6 @@ Revises: 6d387b3196c2
 Create Date: 2023-05-05 15:49:35.716016

 """
-
 import fastapi_users_db_sqlalchemy
 import sqlalchemy as sa
 from alembic import op
--- a/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
+++ b/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
@@ -5,7 +5,6 @@ Revises: 2daa494a0851
 Create Date: 2024-11-12 13:23:29.858995

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/27c6ecc08586_permission_framework.py
+++ b/backend/alembic/versions/27c6ecc08586_permission_framework.py
@@ -5,7 +5,6 @@ Revises: 2666d766cb9b
 Create Date: 2023-05-24 18:45:17.244495

 """
-
 import fastapi_users_db_sqlalchemy
 import sqlalchemy as sa
 from alembic import op
--- a/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
+++ b/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
@@ -5,7 +5,6 @@ Revises: c0aab6edb6dd
 Create Date: 2025-01-04 11:39:43.268612

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
+++ b/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
@@ -5,7 +5,6 @@ Revises: f5437cc136c5
 Create Date: 2025-02-11 14:57:51.308775

 """
-
 from alembic import op


--- a/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py
+++ b/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py
@@ -5,7 +5,6 @@ Revises: 4b08d97e175a
 Create Date: 2024-08-21 19:15:15.762948

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
+++ b/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
@@ -5,7 +5,6 @@ Revises: c0fd6e4da83a
 Create Date: 2024-11-11 10:57:22.991157

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
+++ b/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
@@ -5,7 +5,6 @@ Revises: 33ea50e88f24
 Create Date: 2025-01-31 10:30:27.289646

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
+++ b/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
@@ -5,7 +5,6 @@ Revises: 7f99be1cb9f5
 Create Date: 2023-10-16 23:21:01.283424

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py
+++ b/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py
@@ -5,7 +5,6 @@ Revises: 91ffac7e65b3
 Create Date: 2024-07-24 21:29:31.784562

 """
-
 import random
 from alembic import op
 import sqlalchemy as sa
--- a/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
+++ b/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
@@ -5,7 +5,6 @@ Revises: 5b29123cd710
 Create Date: 2024-11-01 12:51:01.535003

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
+++ b/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
@@ -5,7 +5,6 @@ Revises: a6df6b88ef81
 Create Date: 2025-01-29 10:54:22.141765

 """
-
 from alembic import op


--- a/backend/alembic/versions/351faebd379d_add_curator_fields.py
+++ b/backend/alembic/versions/351faebd379d_add_curator_fields.py
@@ -5,7 +5,6 @@ Revises: ee3f4b47fad5
 Create Date: 2024-08-15 22:37:08.397052

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
+++ b/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
@@ -5,7 +5,6 @@ Revises: 91a0a4d62b14
 Create Date: 2024-09-20 21:24:04.891018

 """
-
 from alembic import op


--- a/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
+++ b/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
@@ -5,7 +5,6 @@ Revises: c99d76fcd298
 Create Date: 2024-09-13 13:20:32.885317

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
+++ b/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
@@ -5,7 +5,6 @@ Revises: 2955778aa44c
 Create Date: 2025-01-08 15:38:17.224380

 """
-
 from alembic import op
 from sqlalchemy import text

--- a/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
+++ b/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
@@ -5,7 +5,6 @@ Revises: df46c75b714e
 Create Date: 2025-03-10 10:02:30.586666

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/3879338f8ba1_add_tool_table.py
+++ b/backend/alembic/versions/3879338f8ba1_add_tool_table.py
@@ -5,7 +5,6 @@ Revises: f1c6478c3fd8
 Create Date: 2024-05-11 16:11:23.718084

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
+++ b/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
@@ -5,7 +5,6 @@ Revises: 776b3bbe9092
 Create Date: 2024-03-27 19:41:29.073594

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -5,7 +5,6 @@ Revises: b7c2b63c4a03
 Create Date: 2025-03-05 10:50:30.516962

 """
-
 from alembic import op
 import sqlalchemy as sa
 import json
--- a/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
+++ b/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
@@ -5,7 +5,6 @@ Revises: e0a68a81d434
 Create Date: 2023-10-05 18:47:09.582849

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -5,7 +5,6 @@ Revises: 8f43500ee275
 Create Date: 2025-02-26 13:07:56.217791

 """
-
 from alembic import op


--- a/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
+++ b/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
@@ -5,7 +5,6 @@ Revises: 27c6ecc08586
 Create Date: 2023-06-14 23:45:51.760440

 """
-
 import sqlalchemy as sa
 from alembic import op

--- a/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
+++ b/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
@@ -5,7 +5,6 @@ Revises: aeda5f2df4f6
 Create Date: 2025-01-13 12:49:51.705235

 """
-
 from alembic import op
 import sqlalchemy as sa
 import fastapi_users_db_sqlalchemy
--- a/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
+++ b/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
@@ -5,7 +5,6 @@ Revises: 703313b75876
 Create Date: 2024-04-13 18:07:29.153817

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
+++ b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
@@ -5,7 +5,6 @@ Revises: e1392f05e840
 Create Date: 2024-08-01 12:38:54.466081

 """
-
 from alembic import op

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py
+++ b/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py
@@ -5,7 +5,6 @@ Revises: d716b0791ddd
 Create Date: 2024-06-28 20:01:05.927647

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py
+++ b/backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py
@@ -5,7 +5,6 @@ Revises: c18cdf4b497e
 Create Date: 2024-06-18 20:46:09.095034

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
+++ b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
@@ -5,7 +5,6 @@ Revises: 3c5e35aa9af0
 Create Date: 2023-07-18 17:33:40.365034

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/46625e4745d4_remove_native_enum.py
+++ b/backend/alembic/versions/46625e4745d4_remove_native_enum.py
@@ -5,7 +5,6 @@ Revises: 9d97fecfab7f
 Create Date: 2023-10-27 11:38:33.803145

 """
-
 from alembic import op
 from sqlalchemy import String

--- a/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
+++ b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
@@ -5,7 +5,6 @@ Revises: f32615f71aeb
 Create Date: 2024-09-23 12:58:03.894038

 """
-
 from alembic import op

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
+++ b/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
@@ -5,7 +5,6 @@ Revises: e91df4e935ef
 Create Date: 2024-03-20 18:53:32.461518

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
+++ b/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
@@ -5,7 +5,6 @@ Revises:
 Create Date: 2023-05-04 00:55:32.971991

 """
-
 import sqlalchemy as sa
 from alembic import op
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py
+++ b/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py
@@ -5,7 +5,6 @@ Revises: ecab2b3f1a3b
 Create Date: 2024-04-11 11:05:18.414438

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/4794bc13e484_update_prompt_length.py
+++ b/backend/alembic/versions/4794bc13e484_update_prompt_length.py
@@ -5,7 +5,6 @@ Revises: f7505c5b0284
 Create Date: 2025-04-02 11:26:36.180328

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
+++ b/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
@@ -1,150 +0,0 @@
-"""Fix invalid model-configurations state
-
-Revision ID: 47a07e1a38f1
-Revises: 7a70b7664e37
-Create Date: 2025-04-23 15:39:43.159504
-
-"""
-
-from alembic import op
-from pydantic import BaseModel, ConfigDict
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-from onyx.llm.llm_provider_options import (
-    fetch_model_names_for_provider_as_set,
-    fetch_visible_model_names_for_provider_as_set,
-)
-
-
-# revision identifiers, used by Alembic.
-revision = "47a07e1a38f1"
-down_revision = "7a70b7664e37"
-branch_labels = None
-depends_on = None
-
-
-class _SimpleModelConfiguration(BaseModel):
-    # Configure model to read from attributes
-    model_config = ConfigDict(from_attributes=True)
-
-    id: int
-    llm_provider_id: int
-    name: str
-    is_visible: bool
-    max_input_tokens: int | None
-
-
-def upgrade() -> None:
-    llm_provider_table = sa.sql.table(
-        "llm_provider",
-        sa.column("id", sa.Integer),
-        sa.column("provider", sa.String),
-        sa.column("model_names", postgresql.ARRAY(sa.String)),
-        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
-        sa.column("default_model_name", sa.String),
-        sa.column("fast_default_model_name", sa.String),
-    )
-    model_configuration_table = sa.sql.table(
-        "model_configuration",
-        sa.column("id", sa.Integer),
-        sa.column("llm_provider_id", sa.Integer),
-        sa.column("name", sa.String),
-        sa.column("is_visible", sa.Boolean),
-        sa.column("max_input_tokens", sa.Integer),
-    )
-
-    connection = op.get_bind()
-
-    llm_providers = connection.execute(
-        sa.select(
-            llm_provider_table.c.id,
-            llm_provider_table.c.provider,
-        )
-    ).fetchall()
-
-    for llm_provider in llm_providers:
-        llm_provider_id, provider_name = llm_provider
-
-        default_models = fetch_model_names_for_provider_as_set(provider_name)
-        display_models = fetch_visible_model_names_for_provider_as_set(
-            provider_name=provider_name
-        )
-
-        # if `fetch_model_names_for_provider_as_set` returns `None`, then
-        # that means that `provider_name` is not a well-known llm provider.
-        if not default_models:
-            continue
-
-        if not display_models:
-            raise RuntimeError(
-                "If `default_models` is non-None, `display_models` must be non-None too."
-            )
-
-        model_configurations = [
-            _SimpleModelConfiguration.model_validate(model_configuration)
-            for model_configuration in connection.execute(
-                sa.select(
-                    model_configuration_table.c.id,
-                    model_configuration_table.c.llm_provider_id,
-                    model_configuration_table.c.name,
-                    model_configuration_table.c.is_visible,
-                    model_configuration_table.c.max_input_tokens,
-                ).where(model_configuration_table.c.llm_provider_id == llm_provider_id)
-            ).fetchall()
-        ]
-
-        if model_configurations:
-            at_least_one_is_visible = any(
-                [
-                    model_configuration.is_visible
-                    for model_configuration in model_configurations
-                ]
-            )
-
-            # If there is at least one model which is public, this is a valid state.
-            # Therefore, don't touch it and move on to the next one.
-            if at_least_one_is_visible:
-                continue
-
-            existing_visible_model_names: set[str] = set(
-                [
-                    model_configuration.name
-                    for model_configuration in model_configurations
-                    if model_configuration.is_visible
-                ]
-            )
-
-            difference = display_models.difference(existing_visible_model_names)
-
-            for model_name in difference:
-                if not model_name:
-                    continue
-
-                insert_statement = postgresql.insert(model_configuration_table).values(
-                    llm_provider_id=llm_provider_id,
-                    name=model_name,
-                    is_visible=True,
-                    max_input_tokens=None,
-                )
-
-                connection.execute(
-                    insert_statement.on_conflict_do_update(
-                        index_elements=["llm_provider_id", "name"],
-                        set_={"is_visible": insert_statement.excluded.is_visible},
-                    )
-                )
-        else:
-            for model_name in default_models:
-                connection.execute(
-                    model_configuration_table.insert().values(
-                        llm_provider_id=llm_provider_id,
-                        name=model_name,
-                        is_visible=model_name in display_models,
-                        max_input_tokens=None,
-                    )
-                )
-
-
-def downgrade() -> None:
-    pass
--- a/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
+++ b/backend/alembic/versions/47e5bef3a1d7_add_persona_categories.py
@@ -5,7 +5,6 @@ Revises: dfbe9e93d3c7
 Create Date: 2024-11-05 18:55:02.221064

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/48d14957fe80_add_support_for_custom_tools.py
+++ b/backend/alembic/versions/48d14957fe80_add_support_for_custom_tools.py
@@ -5,7 +5,6 @@ Revises: b85f02ec1308
 Create Date: 2024-06-09 14:58:19.946509

 """
-
 from alembic import op
 import fastapi_users_db_sqlalchemy
 import sqlalchemy as sa
--- a/backend/alembic/versions/495cb26ce93e_create_knowlege_graph_tables.py
+++ b/backend/alembic/versions/495cb26ce93e_create_knowlege_graph_tables.py
@@ -1,682 +0,0 @@
-"""create knowledge graph tables
-
-Revision ID: 495cb26ce93e
-Revises: ca04500b9ee8
-Create Date: 2025-03-19 08:51:14.341989
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-from sqlalchemy import text
-from datetime import datetime, timedelta
-
-from onyx.configs.app_configs import DB_READONLY_USER
-from onyx.configs.app_configs import DB_READONLY_PASSWORD
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
-
-
-# revision identifiers, used by Alembic.
-revision = "495cb26ce93e"
-down_revision = "ca04500b9ee8"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-
-    # Create a new permission-less user to be later used for knowledge graph queries.
-    # The user will later get temporary read privileges for a specific view that will be
-    # ad hoc generated specific to a knowledge graph query.
-    #
-    # Note: in order for the migration to run, the DB_READONLY_USER and DB_READONLY_PASSWORD
-    # environment variables MUST be set. Otherwise, an exception will be raised.
-
-    if not MULTI_TENANT:
-
-        # Enable pg_trgm extension if not already enabled
-        op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm")
-
-        # Create read-only db user here only in single tenant mode. For multi-tenant mode,
-        # the user is created in the alembic_tenants migration.
-        if not (DB_READONLY_USER and DB_READONLY_PASSWORD):
-            raise Exception("DB_READONLY_USER or DB_READONLY_PASSWORD is not set")
-
-        op.execute(
-            text(
-                f"""
-                DO $$
-                BEGIN
-                    -- Check if the read-only user already exists
-                    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-                        -- Create the read-only user with the specified password
-                        EXECUTE format('CREATE USER %I WITH PASSWORD %L', '{DB_READONLY_USER}', '{DB_READONLY_PASSWORD}');
-                        -- First revoke all privileges to ensure a clean slate
-                        EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
-                        -- Grant only the CONNECT privilege to allow the user to connect to the database
-                        -- but not perform any operations without additional specific grants
-                        EXECUTE format('GRANT CONNECT ON DATABASE %I TO %I', current_database(), '{DB_READONLY_USER}');
-                    END IF;
-                END
-                $$;
-                """
-            )
-        )
-
-    # Grant usage on current schema to readonly user
-    op.execute(
-        text(
-            f"""
-            DO $$
-            BEGIN
-                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-                    EXECUTE format('GRANT USAGE ON SCHEMA %I TO %I', current_schema(), '{DB_READONLY_USER}');
-                END IF;
-            END
-            $$;
-            """
-        )
-    )
-
-    op.create_table(
-        "kg_config",
-        sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
-        sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
-        sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
-        sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
-    )
-
-    # Insert initial data into kg_config table
-    op.bulk_insert(
-        sa.table(
-            "kg_config",
-            sa.column("kg_variable_name", sa.String),
-            sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
-        ),
-        [
-            {"kg_variable_name": "KG_EXPOSED", "kg_variable_values": ["false"]},
-            {"kg_variable_name": "KG_ENABLED", "kg_variable_values": ["false"]},
-            {"kg_variable_name": "KG_VENDOR", "kg_variable_values": []},
-            {"kg_variable_name": "KG_VENDOR_DOMAINS", "kg_variable_values": []},
-            {"kg_variable_name": "KG_IGNORE_EMAIL_DOMAINS", "kg_variable_values": []},
-            {
-                "kg_variable_name": "KG_EXTRACTION_IN_PROGRESS",
-                "kg_variable_values": ["false"],
-            },
-            {
-                "kg_variable_name": "KG_CLUSTERING_IN_PROGRESS",
-                "kg_variable_values": ["false"],
-            },
-            {
-                "kg_variable_name": "KG_COVERAGE_START",
-                "kg_variable_values": [
-                    (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d")
-                ],
-            },
-            {"kg_variable_name": "KG_MAX_COVERAGE_DAYS", "kg_variable_values": ["90"]},
-            {
-                "kg_variable_name": "KG_MAX_PARENT_RECURSION_DEPTH",
-                "kg_variable_values": ["2"],
-            },
-        ],
-    )
-
-    op.create_table(
-        "kg_entity_type",
-        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column("description", sa.String(), nullable=True),
-        sa.Column("grounding", sa.String(), nullable=False),
-        sa.Column(
-            "attributes",
-            postgresql.JSONB,
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column("active", sa.Boolean(), nullable=False, default=False),
-        sa.Column("deep_extraction", sa.Boolean(), nullable=False, default=False),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            onupdate=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.Column("grounded_source_name", sa.String(), nullable=True),
-        sa.Column("entity_values", postgresql.ARRAY(sa.String()), nullable=True),
-        sa.Column(
-            "clustering",
-            postgresql.JSONB,
-            nullable=False,
-            server_default="{}",
-        ),
-    )
-
-    # Create KGRelationshipType table
-    op.create_table(
-        "kg_relationship_type",
-        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column("name", sa.String(), nullable=False, index=True),
-        sa.Column(
-            "source_entity_type_id_name", sa.String(), nullable=False, index=True
-        ),
-        sa.Column(
-            "target_entity_type_id_name", sa.String(), nullable=False, index=True
-        ),
-        sa.Column("definition", sa.Boolean(), nullable=False, default=False),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column("type", sa.String(), nullable=False, index=True),
-        sa.Column("active", sa.Boolean(), nullable=False, default=True),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            onupdate=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.Column(
-            "clustering",
-            postgresql.JSONB,
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.ForeignKeyConstraint(
-            ["source_entity_type_id_name"], ["kg_entity_type.id_name"]
-        ),
-        sa.ForeignKeyConstraint(
-            ["target_entity_type_id_name"], ["kg_entity_type.id_name"]
-        ),
-    )
-
-    # Create KGRelationshipTypeExtractionStaging table
-    op.create_table(
-        "kg_relationship_type_extraction_staging",
-        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column("name", sa.String(), nullable=False, index=True),
-        sa.Column(
-            "source_entity_type_id_name", sa.String(), nullable=False, index=True
-        ),
-        sa.Column(
-            "target_entity_type_id_name", sa.String(), nullable=False, index=True
-        ),
-        sa.Column("definition", sa.Boolean(), nullable=False, default=False),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column("type", sa.String(), nullable=False, index=True),
-        sa.Column("active", sa.Boolean(), nullable=False, default=True),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.Column(
-            "clustering",
-            postgresql.JSONB,
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
-        sa.ForeignKeyConstraint(
-            ["source_entity_type_id_name"], ["kg_entity_type.id_name"]
-        ),
-        sa.ForeignKeyConstraint(
-            ["target_entity_type_id_name"], ["kg_entity_type.id_name"]
-        ),
-    )
-
-    # Create KGEntity table
-    op.create_table(
-        "kg_entity",
-        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column("name", sa.String(), nullable=False, index=True),
-        sa.Column("entity_class", sa.String(), nullable=True, index=True),
-        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
-        sa.Column("entity_key", sa.String(), nullable=True, index=True),
-        sa.Column("name_trigrams", postgresql.ARRAY(sa.String(3)), nullable=True),
-        sa.Column("document_id", sa.String(), nullable=True, index=True),
-        sa.Column(
-            "alternative_names",
-            postgresql.ARRAY(sa.String()),
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
-        sa.Column("description", sa.String(), nullable=True),
-        sa.Column(
-            "keywords",
-            postgresql.ARRAY(sa.String()),
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column(
-            "acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
-        ),
-        sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
-        sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
-        sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            onupdate=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
-        sa.UniqueConstraint(
-            "name",
-            "entity_type_id_name",
-            "document_id",
-            name="uq_kg_entity_name_type_doc",
-        ),
-    )
-    op.create_index("ix_entity_type_acl", "kg_entity", ["entity_type_id_name", "acl"])
-    op.create_index(
-        "ix_entity_name_search", "kg_entity", ["name", "entity_type_id_name"]
-    )
-
-    # Create KGEntityExtractionStaging table
-    op.create_table(
-        "kg_entity_extraction_staging",
-        sa.Column("id_name", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column("name", sa.String(), nullable=False, index=True),
-        sa.Column("document_id", sa.String(), nullable=True, index=True),
-        sa.Column(
-            "alternative_names",
-            postgresql.ARRAY(sa.String()),
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("entity_type_id_name", sa.String(), nullable=False, index=True),
-        sa.Column("description", sa.String(), nullable=True),
-        sa.Column(
-            "keywords",
-            postgresql.ARRAY(sa.String()),
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column(
-            "acl", postgresql.ARRAY(sa.String()), nullable=False, server_default="{}"
-        ),
-        sa.Column("boosts", postgresql.JSONB, nullable=False, server_default="{}"),
-        sa.Column("attributes", postgresql.JSONB, nullable=False, server_default="{}"),
-        sa.Column("transferred_id_name", sa.String(), nullable=True, default=None),
-        sa.Column("entity_class", sa.String(), nullable=True, index=True),
-        sa.Column("entity_key", sa.String(), nullable=True, index=True),
-        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
-        sa.Column("parent_key", sa.String(), nullable=True, index=True),
-        sa.Column("event_time", sa.DateTime(timezone=True), nullable=True),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.ForeignKeyConstraint(["entity_type_id_name"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["document_id"], ["document.id"]),
-    )
-    op.create_index(
-        "ix_entity_extraction_staging_acl",
-        "kg_entity_extraction_staging",
-        ["entity_type_id_name", "acl"],
-    )
-    op.create_index(
-        "ix_entity_extraction_staging_name_search",
-        "kg_entity_extraction_staging",
-        ["name", "entity_type_id_name"],
-    )
-
-    # Create KGRelationship table
-    op.create_table(
-        "kg_relationship",
-        sa.Column("id_name", sa.String(), nullable=False, index=True),
-        sa.Column("source_node", sa.String(), nullable=False, index=True),
-        sa.Column("target_node", sa.String(), nullable=False, index=True),
-        sa.Column("source_node_type", sa.String(), nullable=False, index=True),
-        sa.Column("target_node_type", sa.String(), nullable=False, index=True),
-        sa.Column("source_document", sa.String(), nullable=True, index=True),
-        sa.Column("type", sa.String(), nullable=False, index=True),
-        sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            onupdate=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.ForeignKeyConstraint(["source_node"], ["kg_entity.id_name"]),
-        sa.ForeignKeyConstraint(["target_node"], ["kg_entity.id_name"]),
-        sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
-        sa.ForeignKeyConstraint(
-            ["relationship_type_id_name"], ["kg_relationship_type.id_name"]
-        ),
-        sa.UniqueConstraint(
-            "source_node",
-            "target_node",
-            "type",
-            name="uq_kg_relationship_source_target_type",
-        ),
-        sa.PrimaryKeyConstraint("id_name", "source_document"),
-    )
-    op.create_index(
-        "ix_kg_relationship_nodes", "kg_relationship", ["source_node", "target_node"]
-    )
-
-    # Create KGRelationshipExtractionStaging table
-    op.create_table(
-        "kg_relationship_extraction_staging",
-        sa.Column("id_name", sa.String(), nullable=False, index=True),
-        sa.Column("source_node", sa.String(), nullable=False, index=True),
-        sa.Column("target_node", sa.String(), nullable=False, index=True),
-        sa.Column("source_node_type", sa.String(), nullable=False, index=True),
-        sa.Column("target_node_type", sa.String(), nullable=False, index=True),
-        sa.Column("source_document", sa.String(), nullable=True, index=True),
-        sa.Column("type", sa.String(), nullable=False, index=True),
-        sa.Column("relationship_type_id_name", sa.String(), nullable=False, index=True),
-        sa.Column("occurrences", sa.Integer(), server_default="1", nullable=False),
-        sa.Column("transferred", sa.Boolean(), nullable=False, server_default="false"),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-        sa.ForeignKeyConstraint(
-            ["source_node"], ["kg_entity_extraction_staging.id_name"]
-        ),
-        sa.ForeignKeyConstraint(
-            ["target_node"], ["kg_entity_extraction_staging.id_name"]
-        ),
-        sa.ForeignKeyConstraint(["source_node_type"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["target_node_type"], ["kg_entity_type.id_name"]),
-        sa.ForeignKeyConstraint(["source_document"], ["document.id"]),
-        sa.ForeignKeyConstraint(
-            ["relationship_type_id_name"],
-            ["kg_relationship_type_extraction_staging.id_name"],
-        ),
-        sa.UniqueConstraint(
-            "source_node",
-            "target_node",
-            "type",
-            name="uq_kg_relationship_extraction_staging_source_target_type",
-        ),
-        sa.PrimaryKeyConstraint("id_name", "source_document"),
-    )
-    op.create_index(
-        "ix_kg_relationship_extraction_staging_nodes",
-        "kg_relationship_extraction_staging",
-        ["source_node", "target_node"],
-    )
-
-    # Create KGTerm table
-    op.create_table(
-        "kg_term",
-        sa.Column("id_term", sa.String(), primary_key=True, nullable=False, index=True),
-        sa.Column(
-            "entity_types",
-            postgresql.ARRAY(sa.String()),
-            nullable=False,
-            server_default="{}",
-        ),
-        sa.Column(
-            "time_updated",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            onupdate=sa.text("now()"),
-        ),
-        sa.Column(
-            "time_created", sa.DateTime(timezone=True), server_default=sa.text("now()")
-        ),
-    )
-    op.create_index("ix_search_term_entities", "kg_term", ["entity_types"])
-    op.create_index("ix_search_term_term", "kg_term", ["id_term"])
-
-    op.add_column(
-        "document",
-        sa.Column("kg_stage", sa.String(), nullable=True, index=True),
-    )
-    op.add_column(
-        "document",
-        sa.Column("kg_processing_time", sa.DateTime(timezone=True), nullable=True),
-    )
-    op.add_column(
-        "connector",
-        sa.Column(
-            "kg_processing_enabled",
-            sa.Boolean(),
-            nullable=True,
-            server_default="false",
-        ),
-    )
-
-    op.add_column(
-        "connector",
-        sa.Column(
-            "kg_coverage_days",
-            sa.Integer(),
-            nullable=True,
-            server_default=None,
-        ),
-    )
-
-    # Create GIN index for clustering and normalization
-    op.execute(
-        "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_kg_entity_clustering_trigrams "
-        f"ON kg_entity USING GIN (name {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.gin_trgm_ops)"
-    )
-    op.execute(
-        "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_kg_entity_normalization_trigrams "
-        "ON kg_entity USING GIN (name_trigrams)"
-    )
-
-    # Create kg_entity trigger to update kg_entity.name and its trigrams
-    alphanum_pattern = r"[^a-z0-9]+"
-    truncate_length = 1000
-    function = "update_kg_entity_name"
-    op.execute(
-        text(
-            f"""
-            CREATE OR REPLACE FUNCTION {function}()
-            RETURNS TRIGGER AS $$
-            DECLARE
-                name text;
-                cleaned_name text;
-            BEGIN
-                -- Set name to semantic_id if document_id is not NULL
-                IF NEW.document_id IS NOT NULL THEN
-                    SELECT lower(semantic_id) INTO name
-                    FROM document
-                    WHERE id = NEW.document_id;
-                ELSE
-                    name = lower(NEW.name);
-                END IF;
-
-                -- Clean name and truncate if too long
-                cleaned_name = regexp_replace(
-                    name,
-                    '{alphanum_pattern}', '', 'g'
-                );
-                IF length(cleaned_name) > {truncate_length} THEN
-                    cleaned_name = left(cleaned_name, {truncate_length});
-                END IF;
-
-                -- Set name and name trigrams
-                NEW.name = name;
-                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name);
-                RETURN NEW;
-            END;
-            $$ LANGUAGE plpgsql;
-            """
-        )
-    )
-    trigger = f"{function}_trigger"
-    op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON kg_entity")
-    op.execute(
-        f"""
-        CREATE TRIGGER {trigger}
-            BEFORE INSERT OR UPDATE OF name
-            ON kg_entity
-            FOR EACH ROW
-            EXECUTE FUNCTION {function}();
-        """
-    )
-
-    # Create kg_entity trigger to update kg_entity.name and its trigrams
-    function = "update_kg_entity_name_from_doc"
-    op.execute(
-        text(
-            f"""
-            CREATE OR REPLACE FUNCTION {function}()
-            RETURNS TRIGGER AS $$
-            DECLARE
-                doc_name text;
-                cleaned_name text;
-            BEGIN
-                doc_name = lower(NEW.semantic_id);
-
-                -- Clean name and truncate if too long
-                cleaned_name = regexp_replace(
-                    doc_name,
-                    '{alphanum_pattern}', '', 'g'
-                );
-                IF length(cleaned_name) > {truncate_length} THEN
-                    cleaned_name = left(cleaned_name, {truncate_length});
-                END IF;
-
-                -- Set name and name trigrams for all entities referencing this document
-                UPDATE kg_entity
-                SET
-                    name = doc_name,
-                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name)
-                WHERE document_id = NEW.id;
-                RETURN NEW;
-            END;
-            $$ LANGUAGE plpgsql;
-            """
-        )
-    )
-    trigger = f"{function}_trigger"
-    op.execute(f"DROP TRIGGER IF EXISTS {trigger} ON document")
-    op.execute(
-        f"""
-        CREATE TRIGGER {trigger}
-            AFTER UPDATE OF semantic_id
-            ON document
-            FOR EACH ROW
-            EXECUTE FUNCTION {function}();
-        """
-    )
-
-
-def downgrade() -> None:
-
-    #  Drop all views that start with 'kg_'
-    op.execute(
-        """
-                DO $$
-                DECLARE
-                    view_name text;
-                BEGIN
-                    FOR view_name IN
-                        SELECT c.relname
-                        FROM pg_catalog.pg_class c
-                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
-                        WHERE c.relkind = 'v'
-                        AND n.nspname = current_schema()
-                        AND c.relname LIKE 'kg_relationships_with_access%'
-                    LOOP
-                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
-                    END LOOP;
-                END $$;
-            """
-    )
-
-    op.execute(
-        """
-                DO $$
-                DECLARE
-                    view_name text;
-                BEGIN
-                    FOR view_name IN
-                        SELECT c.relname
-                        FROM pg_catalog.pg_class c
-                        JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
-                        WHERE c.relkind = 'v'
-                        AND n.nspname = current_schema()
-                        AND c.relname LIKE 'allowed_docs%'
-                    LOOP
-                        EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident(view_name);
-                    END LOOP;
-                END $$;
-            """
-    )
-
-    for table, function in (
-        ("kg_entity", "update_kg_entity_name"),
-        ("document", "update_kg_entity_name_from_doc"),
-    ):
-        op.execute(f"DROP TRIGGER IF EXISTS {function}_trigger ON {table}")
-        op.execute(f"DROP FUNCTION IF EXISTS {function}()")
-
-    # Drop index
-    op.execute("COMMIT")  # Commit to allow CONCURRENTLY
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_kg_entity_clustering_trigrams")
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_kg_entity_normalization_trigrams")
-
-    # Drop tables in reverse order of creation to handle dependencies
-    op.drop_table("kg_term")
-    op.drop_table("kg_relationship")
-    op.drop_table("kg_entity")
-    op.drop_table("kg_relationship_type")
-    op.drop_table("kg_relationship_extraction_staging")
-    op.drop_table("kg_relationship_type_extraction_staging")
-    op.drop_table("kg_entity_extraction_staging")
-    op.drop_table("kg_entity_type")
-    op.drop_column("connector", "kg_processing_enabled")
-    op.drop_column("connector", "kg_coverage_days")
-    op.drop_column("document", "kg_stage")
-    op.drop_column("document", "kg_processing_time")
-    op.drop_table("kg_config")
-
-    # Revoke usage on current schema for the readonly user
-    op.execute(
-        text(
-            f"""
-            DO $$
-            BEGIN
-                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-                    EXECUTE format('REVOKE ALL ON SCHEMA %I FROM %I', current_schema(), '{DB_READONLY_USER}');
-                END IF;
-            END
-            $$;
-            """
-        )
-    )
-
-    if not MULTI_TENANT:
-        # Drop read-only db user here only in single tenant mode. For multi-tenant mode,
-        # the user is dropped in the alembic_tenants migration.
-
-        op.execute(
-            text(
-                f"""
-            DO $$
-            BEGIN
-                IF EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{DB_READONLY_USER}') THEN
-                    -- First revoke all privileges from the database
-                    EXECUTE format('REVOKE ALL ON DATABASE %I FROM %I', current_database(), '{DB_READONLY_USER}');
-                    -- Then drop the user
-                    EXECUTE format('DROP USER %I', '{DB_READONLY_USER}');
-                END IF;
-            END
-            $$;
-        """
-            )
-        )
-        op.execute(text("DROP EXTENSION IF EXISTS pg_trgm"))
--- a/backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py
+++ b/backend/alembic/versions/4a951134c801_moved_status_to_connector_credential_.py
@@ -5,7 +5,6 @@ Revises: 7477a5f5d728
 Create Date: 2024-08-10 19:20:34.527559

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py
+++ b/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py
@@ -5,7 +5,6 @@ Revises: d9ec13955951
 Create Date: 2024-08-20 15:28:52.993827

 """
-
 from alembic import op

 # revision identifiers, used by Alembic.
--- a/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
+++ b/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
@@ -5,7 +5,6 @@ Revises: f1ca58b2f2ec
 Create Date: 2025-01-29 07:48:46.784041

 """
-
 import logging
 from typing import cast
 from alembic import op
--- a/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
+++ b/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
@@ -5,7 +5,6 @@ Revises: 47e5bef3a1d7
 Create Date: 2024-11-06 13:15:53.302644

 """
-
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
--- a/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py
+++ b/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py
@@ -5,7 +5,6 @@ Revises: 7da0ae5ad583
 Create Date: 2023-11-27 17:23:29.668422

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
+++ b/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py
@@ -5,7 +5,6 @@ Revises: f7e58d357687
 Create Date: 2024-08-28 17:40:46.077470

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.sql import func
--- a/backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py
+++ b/backend/alembic/versions/54a74a0417fc_danswerbot_onyxbot.py
@@ -5,7 +5,6 @@ Revises: 94dc3d0236f8
 Create Date: 2024-12-11 18:05:05.490737

 """
-
 from alembic import op


--- a/backend/alembic/versions/55546a7967ee_assistant_rework.py
+++ b/backend/alembic/versions/55546a7967ee_assistant_rework.py
@@ -5,7 +5,6 @@ Revises: 61ff3651add4
 Create Date: 2024-09-18 17:00:23.755399

 """
-
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
--- a/backend/alembic/versions/570282d33c49_track_onyxbot_explicitly.py
+++ b/backend/alembic/versions/570282d33c49_track_onyxbot_explicitly.py
@@ -5,7 +5,6 @@ Revises: 7547d982db8f
 Create Date: 2024-05-04 17:49:28.568109

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/57b53544726e_add_document_set_tables.py
+++ b/backend/alembic/versions/57b53544726e_add_document_set_tables.py
@@ -5,7 +5,6 @@ Revises: 800f48024ae9
 Create Date: 2023-09-20 16:59:39.097177

 """
-
 from alembic import op
 import fastapi_users_db_sqlalchemy
 import sqlalchemy as sa
--- a/backend/alembic/versions/5809c0787398_add_chat_sessions.py
+++ b/backend/alembic/versions/5809c0787398_add_chat_sessions.py
@@ -5,7 +5,6 @@ Revises: d929f0c1c6af
 Create Date: 2023-09-04 15:29:44.002164

 """
-
 import fastapi_users_db_sqlalchemy
 from alembic import op
 import sqlalchemy as sa
--- a/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py
+++ b/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py
@@ -5,7 +5,6 @@ Revises: 949b4a92a401
 Create Date: 2024-10-30 19:37:59.630704

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/5c448911b12f_add_content_type_to_userfile.py
+++ b/backend/alembic/versions/5c448911b12f_add_content_type_to_userfile.py
@@ -1,24 +0,0 @@
-"""Add content type to UserFile
-
-Revision ID: 5c448911b12f
-Revises: 47a07e1a38f1
-Create Date: 2025-04-25 16:59:48.182672
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "5c448911b12f"
-down_revision = "47a07e1a38f1"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    op.add_column("user_file", sa.Column("content_type", sa.String(), nullable=True))
-
-
-def downgrade() -> None:
-    op.drop_column("user_file", "content_type")
--- a/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py
+++ b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py
@@ -5,7 +5,6 @@ Revises: efb35676026c
 Create Date: 2024-09-13 18:52:59.256478

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py
+++ b/backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py
@@ -5,7 +5,6 @@ Revises: e4334d5b33ba
 Create Date: 2024-10-08 15:56:07.975636

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py
+++ b/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py
@@ -5,7 +5,6 @@ Revises: e6a4bbc13fe4
 Create Date: 2023-08-10 21:43:09.069523

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py
+++ b/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py
@@ -5,7 +5,6 @@ Revises: dbaa756c2ccf
 Create Date: 2024-02-16 15:02:03.319907

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/5fc1f54cc252_hybrid_enum.py
+++ b/backend/alembic/versions/5fc1f54cc252_hybrid_enum.py
@@ -5,7 +5,6 @@ Revises: 1d6ad76d1f37
 Create Date: 2024-08-06 15:35:40.278485

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/61ff3651add4_add_permission_syncing.py
+++ b/backend/alembic/versions/61ff3651add4_add_permission_syncing.py
@@ -5,7 +5,6 @@ Revises: 1b8206b29c5d
 Create Date: 2024-09-05 13:57:11.770413

 """
-
 import fastapi_users_db_sqlalchemy

 from alembic import op
--- a/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py
+++ b/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py
@@ -5,7 +5,6 @@ Revises: 0a98909f2757
 Create Date: 2024-05-07 14:54:55.493100

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/65bc6e0f8500_remove_kg_subtype_from_db.py
+++ b/backend/alembic/versions/65bc6e0f8500_remove_kg_subtype_from_db.py
@@ -1,41 +0,0 @@
-"""remove kg subtype from db
-
-Revision ID: 65bc6e0f8500
-Revises: cec7ec36c505
-Create Date: 2025-06-13 10:04:27.705976
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "65bc6e0f8500"
-down_revision = "cec7ec36c505"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_column("kg_entity", "entity_class")
-    op.drop_column("kg_entity", "entity_subtype")
-    op.drop_column("kg_entity_extraction_staging", "entity_class")
-    op.drop_column("kg_entity_extraction_staging", "entity_subtype")
-
-
-def downgrade() -> None:
-    op.add_column(
-        "kg_entity_extraction_staging",
-        sa.Column("entity_subtype", sa.String(), nullable=True, index=True),
-    )
-    op.add_column(
-        "kg_entity_extraction_staging",
-        sa.Column("entity_class", sa.String(), nullable=True, index=True),
-    )
-    op.add_column(
-        "kg_entity", sa.Column("entity_subtype", sa.String(), nullable=True, index=True)
-    )
-    op.add_column(
-        "kg_entity", sa.Column("entity_class", sa.String(), nullable=True, index=True)
-    )
--- a/backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py
+++ b/backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py
@@ -5,7 +5,6 @@ Revises: 5d12a446f5c0
 Create Date: 2024-10-15 17:47:44.108537

 """
-
 from alembic import op
 import sqlalchemy as sa

--- a/backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py
+++ b/backend/alembic/versions/6a804aeb4830_duplicated_no_harm_user_file_migration.py
@@ -5,6 +5,11 @@ Revises: 8e1ac4f39a9f
 Create Date: 2025-04-01 07:26:10.539362

 """
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import inspect
+import datetime
+

 # revision identifiers, used by Alembic.
 revision = "6a804aeb4830"
@@ -13,10 +18,99 @@ branch_labels = None
 depends_on = None


-# Leaving this around only because some people might be on this migration
-# originally was a duplicate of the user files migration
 def upgrade() -> None:
-    pass
+    # Check if user_file table already exists
+    conn = op.get_bind()
+    inspector = inspect(conn)
+
+    if not inspector.has_table("user_file"):
+        # Create user_folder table without parent_id
+        op.create_table(
+            "user_folder",
+            sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+            sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+            sa.Column("name", sa.String(length=255), nullable=True),
+            sa.Column("description", sa.String(length=255), nullable=True),
+            sa.Column("display_priority", sa.Integer(), nullable=True, default=0),
+            sa.Column(
+                "created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
+            ),
+        )
+
+        # Create user_file table with folder_id instead of parent_folder_id
+        op.create_table(
+            "user_file",
+            sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+            sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True),
+            sa.Column(
+                "folder_id",
+                sa.Integer(),
+                sa.ForeignKey("user_folder.id"),
+                nullable=True,
+            ),
+            sa.Column("link_url", sa.String(), nullable=True),
+            sa.Column("token_count", sa.Integer(), nullable=True),
+            sa.Column("file_type", sa.String(), nullable=True),
+            sa.Column("file_id", sa.String(length=255), nullable=False),
+            sa.Column("document_id", sa.String(length=255), nullable=False),
+            sa.Column("name", sa.String(length=255), nullable=False),
+            sa.Column(
+                "created_at",
+                sa.DateTime(),
+                default=datetime.datetime.utcnow,
+            ),
+            sa.Column(
+                "cc_pair_id",
+                sa.Integer(),
+                sa.ForeignKey("connector_credential_pair.id"),
+                nullable=True,
+                unique=True,
+            ),
+        )
+
+        # Create persona__user_file table
+        op.create_table(
+            "persona__user_file",
+            sa.Column(
+                "persona_id",
+                sa.Integer(),
+                sa.ForeignKey("persona.id"),
+                primary_key=True,
+            ),
+            sa.Column(
+                "user_file_id",
+                sa.Integer(),
+                sa.ForeignKey("user_file.id"),
+                primary_key=True,
+            ),
+        )
+
+        # Create persona__user_folder table
+        op.create_table(
+            "persona__user_folder",
+            sa.Column(
+                "persona_id",
+                sa.Integer(),
+                sa.ForeignKey("persona.id"),
+                primary_key=True,
+            ),
+            sa.Column(
+                "user_folder_id",
+                sa.Integer(),
+                sa.ForeignKey("user_folder.id"),
+                primary_key=True,
+            ),
+        )
+
+        op.add_column(
+            "connector_credential_pair",
+            sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False),
+        )
+
+        # Update existing records to have is_user_file=False instead of NULL
+        op.execute(
+            "UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL"
+        )


 def downgrade() -> None:
--- a/Show More
+++ b/Show More