fix

updates
erase history
2026-02-17 15:55:45 +00:00 · 2025-06-30 15:19:42 -07:00 · 2025-06-30 15:06:12 -07:00 · 2025-06-30 09:01:23 -07:00 · 2025-06-30 08:16:43 -07:00 · 2025-06-30 07:49:55 -07:00
2247 changed files with 377985 additions and 661 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @onyx-dot-app/onyx-core-team
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -0,0 +1,116 @@
+name: 'Build and Push Docker Image with Retry'
+description: 'Attempts to build and push a Docker image, with a retry on failure'
+inputs:
+  context:
+    description: 'Build context'
+    required: true
+  file:
+    description: 'Dockerfile location'
+    required: true
+  platforms:
+    description: 'Target platforms'
+    required: true
+  pull:
+    description: 'Always attempt to pull a newer version of the image'
+    required: false
+    default: 'true'
+  push:
+    description: 'Push the image to registry'
+    required: false
+    default: 'true'
+  load:
+    description: 'Load the image into Docker daemon'
+    required: false
+    default: 'true'
+  tags:
+    description: 'Image tags'
+    required: true
+  no-cache:
+    description: 'Read from cache'
+    required: false
+    default: 'false'
+  cache-from:
+    description: 'Cache sources'
+    required: false
+  cache-to:
+    description: 'Cache destinations'
+    required: false
+  retry-wait-time:
+    description: 'Time to wait before attempt 2 in seconds'
+    required: false
+    default: '60'
+  retry-wait-time-2:
+    description: 'Time to wait before attempt 3 in seconds'
+    required: false
+    default: '120'
+
+runs:
+  using: "composite"
+  steps:
+    - name: Build and push Docker image (Attempt 1 of 3)
+      id: buildx1
+      uses: docker/build-push-action@v6
+      continue-on-error: true
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+
+    - name: Wait before attempt 2
+      if: steps.buildx1.outcome != 'success'
+      run: |
+        echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
+        sleep ${{ inputs.retry-wait-time }}
+      shell: bash
+
+    - name: Build and push Docker image (Attempt 2 of 3)
+      id: buildx2
+      if: steps.buildx1.outcome != 'success'
+      uses: docker/build-push-action@v6
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+
+    - name: Wait before attempt 3
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
+      run: |
+        echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
+        sleep ${{ inputs.retry-wait-time-2 }}
+      shell: bash
+
+    - name: Build and push Docker image (Attempt 3 of 3)
+      id: buildx3
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
+      uses: docker/build-push-action@v6
+      with:
+        context: ${{ inputs.context }}
+        file: ${{ inputs.file }}
+        platforms: ${{ inputs.platforms }}
+        pull: ${{ inputs.pull }}
+        push: ${{ inputs.push }}
+        load: ${{ inputs.load }}
+        tags: ${{ inputs.tags }}
+        no-cache: ${{ inputs.no-cache }}
+        cache-from: ${{ inputs.cache-from }}
+        cache-to: ${{ inputs.cache-to }}
+
+    - name: Report failure
+      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
+      run: |
+        echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
+      shell: bash
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,14 @@
+## Description
+
+[Provide a brief description of the changes in this PR]
+
+## How Has This Been Tested?
+
+[Describe the tests you ran to verify your changes]
+
+## Backporting (check the box to trigger backport action)
+
+Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
+
+- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
+- [ ] [Optional] Override Linear Check
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -0,0 +1,156 @@
+name: Build and Push Backend Image on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+
+env:
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
+  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+  
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
+
+jobs:
+  build-and-push:
+    # TODO: investigate a matrix build like the web container
+    # See https://runs-on.com/runners/linux/
+    runs-on:
+      - runs-on
+      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
+      - run-id=${{ github.run_id }}
+      - tag=platform-${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+          
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+          
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
+            
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Install build-essential
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential
+
+      - name: Backend Image Docker Build and Push
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: ${{ matrix.platform }}
+          push: true
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Export digest      
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+          
+  merge:
+    runs-on: ubuntu-latest
+    needs:
+      - build-and-push
+    steps:
+      # Needed for trivyignore
+      - name: Checkout
+        uses: actions/checkout@v4
+        
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          path: /tmp/digests
+          pattern: backend-digests-*-${{ github.run_id }}
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
+          
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        with:
+          # To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
+          trivyignores: ./backend/.trivyignore
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -0,0 +1,150 @@
+name: Build and Push Cloud Web Image on Tag
+# Identical to the web container build, but with correct image tag and build args
+
+on:
+  push:
+    tags:
+      - "*cloud*"
+
+env:
+  REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
+  DEPLOYMENT: cloud
+  
+jobs:
+  build:
+    runs-on:
+      - runs-on
+      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
+      - run-id=${{ github.run_id }}
+      - tag=platform-${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: ${{ matrix.platform }}
+          push: true
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NEXT_PUBLIC_CLOUD_ENABLED=true
+            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
+            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
+            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
+            NEXT_PUBLIC_GTM_ENABLED=true
+            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
+            NODE_OPTIONS=--max-old-space-size=8192
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          # no-cache needed due to weird interactions with the builds for different platforms
+          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
+
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  merge:
+    runs-on: ubuntu-latest
+    needs:
+      - build
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          path: /tmp/digests
+          pattern: cloudweb-digests-*-${{ github.run_id }}
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
+
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        with:
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -0,0 +1,176 @@
+name: Build and Push Model Server Image on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+
+env:
+  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
+  DOCKER_BUILDKIT: 1
+  BUILDKIT_PROGRESS: plain
+  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
+
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
+  
+jobs:
+
+#   Bypassing this for now as the idea of not building is glitching
+#   releases and builds that depends on everything being tagged in docker
+#   1) Preliminary job to check if the changed files are relevant
+#   check_model_server_changes:
+#     runs-on: ubuntu-latest
+#     outputs:
+#       changed: ${{ steps.check.outputs.changed }}
+#     steps:
+#       - name: Checkout code
+#         uses: actions/checkout@v4
+# 
+#       - name: Check if relevant files changed
+#         id: check
+#         run: |
+#           # Default to "false"
+#           echo "changed=false" >> $GITHUB_OUTPUT
+# 
+#           # Compare the previous commit (github.event.before) to the current one (github.sha)
+#           # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+#           # set changed=true
+#           if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+#              | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+#             echo "changed=true" >> $GITHUB_OUTPUT
+#           fi
+
+  check_model_server_changes:
+    runs-on: ubuntu-latest
+    outputs:
+      changed: "true"
+    steps:
+      - name: Bypass check and set output
+        run: echo "changed=true" >> $GITHUB_OUTPUT
+        
+  build-amd64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
+    runs-on:
+      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
+    env:
+      PLATFORM_PAIR: linux-amd64
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: System Info
+        run: |
+          df -h
+          free -h
+          docker system prune -af --volumes
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and Push AMD64
+        uses: docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          push: true
+          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64
+          build-args: |
+            DANSWER_VERSION=${{ github.ref_name }}
+          outputs: type=registry
+          provenance: false
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+#           no-cache: true
+
+  build-arm64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
+    runs-on:
+      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
+    env:
+      PLATFORM_PAIR: linux-arm64
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: System Info
+        run: |
+          df -h
+          free -h
+          docker system prune -af --volumes
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and Push ARM64
+        uses: docker/build-push-action@v6
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          push: true
+          tags: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          build-args: |
+            DANSWER_VERSION=${{ github.ref_name }}
+          outputs: type=registry
+          provenance: false
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+  merge-and-scan:
+    needs: [build-amd64, build-arm64, check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create and Push Multi-arch Manifest
+        run: |
+          docker buildx create --use
+          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
+            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
+            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
+              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
+              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
+          fi
+
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        with:
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
+          timeout: "10m"
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -0,0 +1,161 @@
+name: Build and Push Web Image on Tag
+
+on:
+  push:
+    tags:
+      - "*"
+
+env:
+  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
+  DEPLOYMENT: standalone
+
+jobs:
+  precheck:
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    outputs:
+      should-run: ${{ steps.set-output.outputs.should-run }}
+    steps:
+      - name: Check if tag contains "cloud"
+        id: set-output
+        run: |
+          if [[ "${{ github.ref_name }}" == *cloud* ]]; then
+            echo "should-run=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "should-run=true" >> "$GITHUB_OUTPUT"
+          fi
+  build:
+    needs: precheck
+    if: needs.precheck.outputs.should-run == 'true'
+    runs-on:
+      - runs-on
+      - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }}
+      - run-id=${{ github.run_id }}
+      - tag=platform-${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - linux/amd64
+          - linux/arm64
+
+    steps:
+      - name: Prepare
+        run: |
+          platform=${{ matrix.platform }}
+          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: ${{ matrix.platform }}
+          push: true
+          build-args: |
+            ONYX_VERSION=${{ github.ref_name }}
+            NODE_OPTIONS=--max-old-space-size=8192
+
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          # no-cache needed due to weird interactions with the builds for different platforms
+          # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off
+          
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+
+      - name: Upload digest
+        uses: actions/upload-artifact@v4
+        with:
+          name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  merge:
+    needs:
+      - build
+    if: needs.precheck.outputs.should-run == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          path: /tmp/digests
+          pattern: web-digests-*-${{ github.run_id }}
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_IMAGE }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=${{ github.ref_name }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Create manifest list and push
+        working-directory: /tmp/digests
+        run: |
+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+            $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
+      - name: Inspect image
+        run: |
+          docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }}
+
+      # trivy has their own rate limiting issues causing this action to flake
+      # we worked around it by hardcoding to different db repos in env
+      # can re-enable when they figure it out
+      # https://github.com/aquasecurity/trivy/discussions/7538
+      # https://github.com/aquasecurity/trivy-action/issues/389
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        env:
+          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
+          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
+          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        with:
+          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
+          severity: "CRITICAL,HIGH"
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -0,0 +1,37 @@
+# This workflow is set up to be manually triggered via the GitHub Action tab.
+# Given a version, it will tag those backend and webserver images as "latest".
+
+name: Tag Latest Version
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "The version (ie v0.0.1) to tag as latest"
+        required: true
+
+jobs:
+  tag:
+    # See https://runs-on.com/runners/linux/
+    # use a lower powered instance since this just does i/o to docker hub
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Enable Docker CLI experimental features
+        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
+
+      - name: Pull, Tag and Push Web Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
+
+      - name: Pull, Tag and Push API Server Image
+        run: |
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
--- a/.github/workflows/hotfix-release-branches.yml
+++ b/.github/workflows/hotfix-release-branches.yml
@@ -0,0 +1,171 @@
+# This workflow is intended to be manually triggered via the GitHub Action tab.
+# Given a hotfix branch, it will attempt to open a PR to all release branches and
+# by default auto merge them
+
+name: Hotfix release branches
+
+on:
+  workflow_dispatch:
+    inputs:
+      hotfix_commit:
+        description: "Hotfix commit hash"
+        required: true
+      hotfix_suffix:
+        description: "Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})"
+        required: true
+      release_branch_pattern:
+        description: "Release branch pattern (regex)"
+        required: true
+        default: "release/.*"
+      auto_merge:
+        description: "Automatically merge the hotfix PRs"
+        required: true
+        type: choice
+        default: "true"
+        options:
+          - true
+          - false
+
+jobs:
+  hotfix_release_branches:
+    permissions: write-all
+    # See https://runs-on.com/runners/linux/
+    # use a lower powered instance since this just does i/o to docker hub
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    steps:
+      # needs RKUO_DEPLOY_KEY for write access to merge PR's
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+          fetch-depth: 0
+
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"
+
+      - name: Fetch All Branches
+        run: |
+          git fetch --all --prune
+
+      - name: Verify Hotfix Commit Exists
+        run: |
+          git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; }
+
+      - name: Get Release Branches
+        id: get_release_branches
+        run: |
+          BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ')
+          if [ -z "$BRANCHES" ]; then
+            echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'."
+            exit 1
+          fi
+
+          echo "Found release branches:"
+          echo "$BRANCHES"
+
+          # Join the branches into a single line separated by commas
+          BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//')
+
+          # Set the branches as an output
+          echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT
+
+      # notes on all the vagaries of wiring up automated PR's
+      # https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs
+      # we must use a custom token for GH_TOKEN to trigger the subsequent PR checks
+      - name: Create and Merge Pull Requests to Matching Release Branches
+        env:
+          HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }}
+          HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }}
+          AUTO_MERGE: ${{ github.event.inputs.auto_merge }}
+          GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }}
+        run: |
+          # Get the branches from the previous step
+          BRANCHES="${{ steps.get_release_branches.outputs.branches }}"
+
+          # Convert BRANCHES to an array
+          IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES"
+
+          # Loop through each release branch and create and merge a PR
+          for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do
+            echo "Processing $RELEASE_BRANCH..."
+            
+            # Parse out the release version by removing "release/" from the branch name
+            RELEASE_VERSION=${RELEASE_BRANCH#release/}
+            echo "Release version parsed: $RELEASE_VERSION"
+            
+            HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}"
+            echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH"
+
+            # Checkout the release branch
+            echo "Checking out $RELEASE_BRANCH"
+            git checkout "$RELEASE_BRANCH"
+
+            # Create the new hotfix branch
+            if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then
+              echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation."
+            else
+              echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH"
+              git checkout -b "$HOTFIX_BRANCH"
+            fi
+            
+            # Check if the hotfix commit is a merge commit
+            if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then
+              # -m 1 uses the target branch as the base (which is what we want)
+              echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick"
+              CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT"
+            else
+              CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT"
+            fi
+
+            # Perform the cherry-pick
+            echo "Executing: $CHERRY_PICK_CMD"
+            eval "$CHERRY_PICK_CMD"
+
+            if [ $? -ne 0 ]; then
+              echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..."
+              git cherry-pick --abort
+              continue
+            fi
+
+            # Push the hotfix branch to the remote
+            echo "Pushing $HOTFIX_BRANCH..."
+            git push origin "$HOTFIX_BRANCH"
+            echo "Hotfix branch $HOTFIX_BRANCH created and pushed."
+            
+            # Check if PR already exists
+            EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number')
+
+            if [ -n "$EXISTING_PR" ]; then
+              echo "An open PR already exists: #$EXISTING_PR. Skipping..."
+              continue
+            fi
+            
+            # Create a new PR and capture the output
+            PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \
+              --body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \
+              --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH")
+
+            # Extract the URL from the output
+            PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+')
+            echo "Pull request created: $PR_URL"
+
+            # Extract PR number from URL
+            PR_NUMBER=$(basename "$PR_URL")
+            echo "Pull request created: $PR_NUMBER"
+
+            if [ "$AUTO_MERGE" == "true" ]; then
+              echo "Attempting to merge pull request #$PR_NUMBER"
+
+              # Attempt to merge the PR
+              gh pr merge "$PR_NUMBER" --merge --auto --delete-branch
+
+              if [ $? -eq 0 ]; then
+                echo "Pull request #$PR_NUMBER merged successfully."
+              else
+                # Optionally, handle the error or continue
+                echo "Failed to merge pull request #$PR_NUMBER."
+              fi
+            fi
+          done
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -0,0 +1,23 @@
+name: 'Nightly - Close stale issues and PRs'
+on:
+  schedule:
+    - cron: '0 11 * * *' # Runs every day at 3 AM PST / 4 AM PDT / 11 AM UTC
+
+permissions:
+  # contents: write # only for delete-branch option
+  issues: write
+  pull-requests: write
+  
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v9
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
+          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
+          close-issue-message: 'This issue was closed because it has been stalled for 90 days with no activity.'
+          close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.'
+          days-before-stale: 75
+#           days-before-close: 90  # uncomment after we test stale behavior
+          
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -0,0 +1,142 @@
+# Scan for problematic software licenses
+
+# trivy has their own rate limiting issues causing this action to flake
+# we worked around it by hardcoding to different db repos in env
+# can re-enable when they figure it out
+# https://github.com/aquasecurity/trivy/discussions/7538
+# https://github.com/aquasecurity/trivy-action/issues/389
+
+name: 'Nightly - Scan licenses'
+on:
+#   schedule:
+#     - cron: '0 14 * * *'  # Runs every day at 6 AM PST / 7 AM PDT / 2 PM UTC
+  workflow_dispatch:  # Allows manual triggering
+
+permissions:
+  actions: read
+  contents: read
+  security-events: write
+  
+jobs:
+  scan-licenses:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+      
+      - name: Get explicit and transitive dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+          pip freeze > requirements-all.txt
+                    
+      - name: Check python
+        id: license_check_report
+        uses: pilosus/action-pip-license-checker@v2
+        with:
+          requirements: 'requirements-all.txt'
+          fail: 'Copyleft'
+          exclude: '(?i)^(pylint|aio[-_]*).*'
+          
+      - name: Print report
+        if: always()
+        run: echo "${{ steps.license_check_report.outputs.report }}"
+      
+      - name: Install npm dependencies
+        working-directory: ./web
+        run: npm ci
+
+        # be careful enabling the sarif and upload as it may spam the security tab
+        # with a huge amount of items. Work out the issues before enabling upload.       
+#       - name: Run Trivy vulnerability scanner in repo mode
+#         if: always()
+#         uses: aquasecurity/trivy-action@0.29.0
+#         with:
+#           scan-type: fs
+#           scan-ref: .
+#           scanners: license
+#           format: table
+#           severity: HIGH,CRITICAL
+# #           format: sarif
+# #           output: trivy-results.sarif
+# 
+# #       - name: Upload Trivy scan results to GitHub Security tab
+# #         uses: github/codeql-action/upload-sarif@v3
+# #         with:
+# #           sarif_file: trivy-results.sarif
+
+  scan-trivy:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+      
+    steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+
+    # Backend
+    - name: Pull backend docker image
+      run: docker pull onyxdotapp/onyx-backend:latest
+
+    - name: Run Trivy vulnerability scanner on backend
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-backend:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0  # Set to 1 if we want a failed scan to fail the workflow
+
+    # Web server
+    - name: Pull web server docker image
+      run: docker pull onyxdotapp/onyx-web-server:latest
+          
+    - name: Run Trivy vulnerability scanner on web server
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-web-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
+
+    # Model server
+    - name: Pull model server docker image
+      run: docker pull onyxdotapp/onyx-model-server:latest
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-model-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
--- a/.github/workflows/pr-backport-autotrigger.yml
+++ b/.github/workflows/pr-backport-autotrigger.yml
@@ -0,0 +1,124 @@
+name: Backport on Merge
+
+# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
+
+on:
+  pull_request:
+    types: [closed] # Later we check for merge so only PRs that go in can get backported
+
+permissions:
+  contents: write
+  actions: write
+
+jobs:
+  backport:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+          fetch-depth: 0
+
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"
+          git fetch --prune
+
+      - name: Check for Backport Checkbox
+        id: checkbox-check
+        run: |
+          PR_BODY="${{ github.event.pull_request.body }}"
+          if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
+            echo "backport=true" >> $GITHUB_OUTPUT
+          else
+            echo "backport=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: List and sort release branches
+        id: list-branches
+        run: |
+          git fetch --all --tags
+          BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
+          BETA=$(echo "$BRANCHES" | head -n 1)
+          STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
+          echo "beta=release/$BETA" >> $GITHUB_OUTPUT
+          echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
+          # Fetch latest tags for beta and stable
+          LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
+          LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
+
+          # Handle case where no beta tags exist
+          if [[ -z "$LATEST_BETA_TAG" ]]; then
+            NEW_BETA_TAG="v1.0.0-beta.1"
+          else
+            NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
+          fi
+
+          # Increment latest stable tag
+          NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
+          echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
+          echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
+
+      - name: Echo branch and tag information
+        run: |
+          echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
+          echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
+          echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
+          echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
+          echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
+          echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
+
+      - name: Trigger Backport
+        if: steps.checkbox-check.outputs.backport == 'true'
+        run: |
+          set -e
+          echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
+
+          # Echo the merge commit SHA
+          echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
+
+          # Fetch all history for all branches and tags
+          git fetch --prune
+
+          # Reset and prepare the beta branch
+          git checkout ${{ steps.list-branches.outputs.beta }}
+          echo "Last 5 commits on beta branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to beta failed due to conflicts."
+            exit 1
+          }
+
+          # Create new beta branch/tag
+          git tag ${{ steps.list-branches.outputs.new_beta_tag }}
+          # Push the changes and tag to the beta branch using PAT
+          git push origin ${{ steps.list-branches.outputs.beta }}
+          git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
+
+          # Reset and prepare the stable branch
+          git checkout ${{ steps.list-branches.outputs.stable }}
+          echo "Last 5 commits on stable branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to stable failed due to conflicts."
+            exit 1
+          }
+
+          # Create new stable branch/tag
+          git tag ${{ steps.list-branches.outputs.new_stable_tag }}
+          # Push the changes and tag to the stable branch using PAT
+          git push origin ${{ steps.list-branches.outputs.stable }}
+          git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -0,0 +1,86 @@
+name: External Dependency Unit Tests
+
+on:
+  merge_group:
+  pull_request:
+    branches: [main]
+
+env:
+  # AWS
+  S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
+  S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
+
+  # MinIO
+  S3_ENDPOINT_URL: "http://localhost:9004"
+
+jobs:
+  discover-test-dirs:
+    runs-on: ubuntu-latest
+    outputs:
+      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      
+      - name: Discover test directories
+        id: set-matrix
+        run: |
+          # Find all subdirectories in backend/tests/external_dependency_unit
+          dirs=$(find backend/tests/external_dependency_unit -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | sort | jq -R -s -c 'split("\n")[:-1]')
+          echo "test-dirs=$dirs" >> $GITHUB_OUTPUT
+
+  external-dependency-unit-tests:
+    needs: discover-test-dirs
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    
+    strategy:
+      fail-fast: false
+      matrix:
+        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
+
+    env:
+      PYTHONPATH: ./backend
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          playwright install chromium
+          playwright install-deps chromium
+
+      - name: Set up Standard Dependencies
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d minio relational_db cache index
+
+      - name: Run migrations
+        run: |
+          cd backend
+          alembic upgrade head
+
+      - name: Run Tests for ${{ matrix.test-dir }}
+        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+        run: |
+          py.test \
+            -n 8 \
+            --dist loadfile \
+            --durations=8 \
+            -o junit_family=xunit2 \
+            -xv \
+            --ff \
+            backend/tests/external_dependency_unit/${{ matrix.test-dir }}
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -0,0 +1,61 @@
+name: Helm - Lint and Test Charts
+
+on:
+  merge_group:
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:  # Allows manual triggering
+  
+jobs:
+  helm-chart-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"]
+
+    # fetch-depth 0 is required for helm/chart-testing-action
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        
+    - name: Set up Helm
+      uses: azure/setup-helm@v4.2.0
+      with:
+        version: v3.17.0
+      
+    - name: Set up chart-testing
+      uses: helm/chart-testing-action@v2.7.0
+
+    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
+    - name: Run chart-testing (list-changed)
+      id: list-changed
+      run: |
+        echo "default_branch: ${{ github.event.repository.default_branch }}"
+        changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
+        echo "list-changed output: $changed"
+        if [[ -n "$changed" ]]; then
+          echo "changed=true" >> "$GITHUB_OUTPUT"
+        fi
+
+    # uncomment to force run chart-testing
+#     - name: Force run chart-testing (list-changed)
+#       id: list-changed
+#       run: echo "changed=true" >> $GITHUB_OUTPUT
+        
+    # lint all charts if any changes were detected
+    - name: Run chart-testing (lint)
+      if: steps.list-changed.outputs.changed == 'true'
+      run: ct lint --config ct.yaml --all
+      # the following would lint only changed charts, but linting isn't expensive
+      # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
+
+    - name: Create kind cluster
+      if: steps.list-changed.outputs.changed == 'true'
+      uses: helm/kind-action@v1.12.0
+
+    - name: Run chart-testing (install)
+      if: steps.list-changed.outputs.changed == 'true'
+      run: ct install --all --helm-extra-set-args="--set=nginx.enabled=false" --debug --config ct.yaml
+      # the following would install only changed charts, but we only have one chart so 
+      # don't worry about that for now
+      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -0,0 +1,319 @@
+name: Run Integration Tests v2
+concurrency:
+  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
+  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
+  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
+  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
+  PLATFORM_PAIR: linux-amd64
+
+jobs:
+  integration-tests:
+    # See https://runs-on.com/runners/linux/
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/ee.txt
+      - run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/ee.txt
+
+      - name: Generate OpenAPI schema
+        working-directory: ./backend
+        env:
+          PYTHONPATH: "."
+        run: |
+          python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+
+      - name: Generate OpenAPI Python client
+        working-directory: ./backend
+        run: |
+          docker run --rm \
+            -v "${{ github.workspace }}/backend/generated:/local" \
+            openapitools/openapi-generator-cli generate \
+            -i /local/openapi.json \
+            -g python \
+            -o /local/onyx_openapi_client \
+            --package-name onyx_openapi_client
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Web Docker image
+        run: |
+          docker pull onyxdotapp/onyx-web-server:latest
+          docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
+
+      # we use the runs-on cache for docker builds
+      # in conjunction with runs-on runners, it has better speed and unlimited caching
+      # https://runs-on.com/caching/s3-cache-for-github-actions/
+      # https://runs-on.com/caching/docker/
+      # https://github.com/moby/buildkit#s3-cache-experimental
+
+      # images are built and run locally for testing purposes. Not pushed.
+      - name: Build Backend Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-backend:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Model Server Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-model-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build integration test Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/tests/integration/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-integration:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      # Start containers for multi-tenant tests
+      - name: Start Docker containers for multi-tenant tests
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          MULTI_TENANT=true \
+          AUTH_TYPE=cloud \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          DEV_MODE=true \
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
+        id: start_docker_multi_tenant
+
+      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
+      - name: Run Multi-Tenant Integration Tests
+        run: |
+          echo "Waiting for 3 minutes to ensure API server is ready..."
+          sleep 180
+          echo "Running integration tests..."
+          docker run --rm --network onyx-stack_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e AUTH_TYPE=cloud \
+            -e MULTI_TENANT=true \
+            -e REQUIRE_EMAIL_VERIFICATION=false \
+            -e DISABLE_TELEMETRY=true \
+            -e IMAGE_TAG=test \
+            -e DEV_MODE=true \
+            onyxdotapp/onyx-integration:test \
+            /app/tests/integration/multitenant_tests
+        continue-on-error: true
+        id: run_multitenant_tests
+
+      - name: Check multi-tenant test results
+        run: |
+          if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
+            echo "Multi-tenant integration tests failed. Exiting with error."
+            exit 1
+          else
+            echo "All multi-tenant integration tests passed successfully."
+          fi
+
+      - name: Stop multi-tenant Docker containers
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
+
+      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          INTEGRATION_TESTS_MODE=true \
+          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          docker logs -f onyx-stack-api_server-1 &
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+
+      - name: Start Mock Services
+        run: |
+          cd backend/tests/integration/mock_services
+          docker compose -f docker-compose.mock-it-services.yml \
+            -p mock-it-services-stack up -d
+
+      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
+      - name: Run Standard Integration Tests
+        run: |
+          echo "Running integration tests..."
+          docker run --rm --network onyx-stack_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
+            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
+            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
+            -e JIRA_BASE_URL=${JIRA_BASE_URL} \
+            -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
+            -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
+            onyxdotapp/onyx-integration:test \
+            /app/tests/integration/tests \
+            /app/tests/integration/connector_job_tests
+        continue-on-error: true
+        id: run_tests
+
+      - name: Check test results
+        run: |
+          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
+            echo "Integration tests failed. Exiting with error."
+            exit 1
+          else
+            echo "All integration tests passed successfully."
+          fi
+
+      # ------------------------------------------------------------
+      # Always gather logs BEFORE "down":
+      - name: Dump API server logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+
+      - name: Dump all-container logs (optional)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs
+          path: ${{ github.workspace }}/docker-compose.log
+      # ------------------------------------------------------------
+
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -0,0 +1,29 @@
+name: Ensure PR references Linear
+
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+
+jobs:
+  linear-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check PR body for Linear link or override
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
+        run: |
+          # Looking for "https://linear.app" in the body
+          if echo "$PR_BODY" | grep -qE "https://linear\.app"; then
+            echo "Found a Linear link. Check passed."
+            exit 0
+          fi
+
+          # Looking for a checked override: "[x] Override Linear Check"
+          if echo "$PR_BODY" | grep -q "\[x\].*Override Linear Check"; then
+            echo "Override box is checked. Check passed."
+            exit 0
+          fi
+
+          # Otherwise, fail the run
+          echo "No Linear link or override found in the PR description."
+          exit 1
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -0,0 +1,254 @@
+name: Run MIT Integration Tests v2
+concurrency:
+  group: Run-MIT-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
+  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
+  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
+  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
+  PLATFORM_PAIR: linux-amd64
+jobs:
+  integration-tests-mit:
+    # See https://runs-on.com/runners/linux/
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+      - run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+
+      - name: Generate OpenAPI schema
+        working-directory: ./backend
+        env:
+          PYTHONPATH: "."
+        run: |
+          python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+
+      - name: Generate OpenAPI Python client
+        working-directory: ./backend
+        run: |
+          docker run --rm \
+            -v "${{ github.workspace }}/backend/generated:/local" \
+            openapitools/openapi-generator-cli generate \
+            -i /local/openapi.json \
+            -g python \
+            -o /local/onyx_openapi_client \
+            --package-name onyx_openapi_client
+            
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Web Docker image
+        run: |
+          docker pull onyxdotapp/onyx-web-server:latest
+          docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
+
+      # we use the runs-on cache for docker builds
+      # in conjunction with runs-on runners, it has better speed and unlimited caching
+      # https://runs-on.com/caching/s3-cache-for-github-actions/
+      # https://runs-on.com/caching/docker/
+      # https://github.com/moby/buildkit#s3-cache-experimental
+
+      # images are built and run locally for testing purposes. Not pushed.
+      - name: Build Backend Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-backend:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Model Server Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-model-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build integration test Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/tests/integration/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-integration:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          INTEGRATION_TESTS_MODE=true \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          docker logs -f onyx-stack-api_server-1 &
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+
+      - name: Start Mock Services
+        run: |
+          cd backend/tests/integration/mock_services
+          docker compose -f docker-compose.mock-it-services.yml \
+            -p mock-it-services-stack up -d
+
+      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
+      - name: Run Standard Integration Tests
+        run: |
+          echo "Running integration tests..."
+          docker run --rm --network onyx-stack_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
+            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
+            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
+            -e JIRA_BASE_URL=${JIRA_BASE_URL} \
+            -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
+            -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
+            onyxdotapp/onyx-integration:test \
+            /app/tests/integration/tests \
+            /app/tests/integration/connector_job_tests
+        continue-on-error: true
+        id: run_tests
+
+      - name: Check test results
+        run: |
+          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
+            echo "Integration tests failed. Exiting with error."
+            exit 1
+          else
+            echo "All integration tests passed successfully."
+          fi
+
+      # ------------------------------------------------------------
+      # Always gather logs BEFORE "down":
+      - name: Dump API server logs
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+
+      - name: Dump all-container logs (optional)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs
+          path: ${{ github.workspace }}/docker-compose.log
+      # ------------------------------------------------------------
+
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -0,0 +1,245 @@
+name: Run Playwright Tests
+concurrency:
+  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on: push
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  MOCK_LLM_RESPONSE: true
+  PYTEST_PLAYWRIGHT_SKIP_INITIAL_RESET: true
+
+jobs:
+  playwright-tests:
+    name: Playwright Tests
+
+    # See https://runs-on.com/runners/linux/
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+      - run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci
+
+      - name: Install playwright browsers
+        working-directory: ./web
+        run: npx playwright install --with-deps
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # we use the runs-on cache for docker builds
+      # in conjunction with runs-on runners, it has better speed and unlimited caching
+      # https://runs-on.com/caching/s3-cache-for-github-actions/
+      # https://runs-on.com/caching/docker/
+      # https://github.com/moby/buildkit#s3-cache-experimental
+
+      # images are built and run locally for testing purposes. Not pushed.
+
+      - name: Build Web Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-web-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Backend Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-backend:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Build Model Server Docker image
+        uses: ./.github/actions/custom-build-and-push
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/amd64
+          tags: onyxdotapp/onyx-model-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          GEN_AI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          docker logs -f danswer-stack-api_server-1 &
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+
+      - name: Run pytest playwright test init
+        working-directory: ./backend
+        env:
+          PYTEST_IGNORE_SKIP: true
+        run: pytest -s tests/integration/tests/playwright/test_playwright.py
+
+      - name: Run Playwright tests
+        working-directory: ./web
+        run: npx playwright test
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          # Chromatic automatically defaults to the test-results directory.
+          # Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
+          name: test-results
+          path: ./web/test-results
+          retention-days: 30
+
+      # save before stopping the containers so the logs can be captured
+      - name: Save Docker logs
+        if: success() || failure()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
+          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+
+      - name: Upload logs
+        if: success() || failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-logs
+          path: ${{ github.workspace }}/docker-compose.log
+
+      - name: Stop Docker containers
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+
+# NOTE: Chromatic UI diff testing is currently disabled.
+# We are using Playwright for local and CI testing without visual regression checks.
+# Chromatic may be reintroduced in the future for UI diff testing if needed.
+
+# chromatic-tests:
+#   name: Chromatic Tests
+
+#   needs: playwright-tests
+#   runs-on:
+#     [
+#       runs-on,
+#       runner=32cpu-linux-x64,
+#       disk=large,
+#       "run-id=${{ github.run_id }}",
+#     ]
+#   steps:
+#     - name: Checkout code
+#       uses: actions/checkout@v4
+#       with:
+#         fetch-depth: 0
+
+#     - name: Setup node
+#       uses: actions/setup-node@v4
+#       with:
+#         node-version: 22
+
+#     - name: Install node dependencies
+#       working-directory: ./web
+#       run: npm ci
+
+#     - name: Download Playwright test results
+#       uses: actions/download-artifact@v4
+#       with:
+#         name: test-results
+#         path: ./web/test-results
+
+#     - name: Run Chromatic
+#       uses: chromaui/action@latest
+#       with:
+#         playwright: true
+#         projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+#         workingDir: ./web
+#       env:
+#         CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -0,0 +1,65 @@
+name: Python Checks
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - 'release/**'
+
+jobs:
+  mypy-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+        cache: 'pip'
+        cache-dependency-path: |
+          backend/requirements/default.txt
+          backend/requirements/dev.txt
+          backend/requirements/model_server.txt
+    - run: |
+        python -m pip install --upgrade pip
+        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+
+    - name: Generate OpenAPI schema
+      working-directory: ./backend
+      env:
+        PYTHONPATH: "."
+      run: |
+        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
+
+    - name: Generate OpenAPI Python client
+      working-directory: ./backend
+      run: |
+        docker run --rm \
+          -v "${{ github.workspace }}/backend/generated:/local" \
+          openapitools/openapi-generator-cli generate \
+          -i /local/openapi.json \
+          -g python \
+          -o /local/onyx_openapi_client \
+          --package-name onyx_openapi_client
+            
+    - name: Run MyPy
+      run: |
+        cd backend
+        mypy .
+
+    - name: Check import order with reorder-python-imports
+      run: |
+        cd backend
+        find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
+
+    - name: Check code formatting with Black
+      run: |
+        cd backend
+        black --check .
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -0,0 +1,142 @@
+name: Connector Tests
+
+on:
+  merge_group:
+  pull_request:
+    branches: [main]
+  schedule:
+    # This cron expression runs the job daily at 16:00 UTC (9am PT)
+    - cron: "0 16 * * *"
+
+env:
+  # AWS
+  AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
+  AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_SECRET_ACCESS_KEY_DAILY_CONNECTOR_TESTS }}
+
+  # Confluence
+  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
+  CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }}
+  CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
+  CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }}
+  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
+  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
+
+  # Jira
+  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
+  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
+  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
+
+  # Gong
+  GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
+  GONG_ACCESS_KEY_SECRET: ${{ secrets.GONG_ACCESS_KEY_SECRET }}
+
+  # Google
+  GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
+  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
+  GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
+  GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
+  GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
+
+  # Slab
+  SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
+
+  # Zendesk
+  ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
+  ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
+  ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
+
+  # Salesforce
+  SF_USERNAME: ${{ secrets.SF_USERNAME }}
+  SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
+  SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
+
+  # Hubspot
+  HUBSPOT_ACCESS_TOKEN: ${{ secrets.HUBSPOT_ACCESS_TOKEN }}
+
+  # Airtable
+  AIRTABLE_TEST_BASE_ID: ${{ secrets.AIRTABLE_TEST_BASE_ID }}
+  AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
+  AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
+  AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
+
+  # Sharepoint
+  SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
+  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
+  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
+  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
+
+  # Github
+  ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}
+
+  # Gitlab
+  GITLAB_ACCESS_TOKEN: ${{ secrets.GITLAB_ACCESS_TOKEN }}
+
+  # Gitbook
+  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
+  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
+
+  # Notion
+  NOTION_INTEGRATION_TOKEN: ${{ secrets.NOTION_INTEGRATION_TOKEN }}
+
+  # Highspot
+  HIGHSPOT_KEY: ${{ secrets.HIGHSPOT_KEY }}
+  HIGHSPOT_SECRET: ${{ secrets.HIGHSPOT_SECRET }}
+
+  # Slack
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+
+  # Teams
+  TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
+  TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
+  TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}
+
+jobs:
+  connectors-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+
+    env:
+      PYTHONPATH: ./backend
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          playwright install chromium
+          playwright install-deps chromium
+
+      - name: Run Tests
+        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+        run: |
+          py.test \
+            -n 8 \
+            --dist loadfile \
+            --durations=8 \
+            -o junit_family=xunit2 \
+            -xv \
+            --ff \
+            backend/tests/daily/connectors
+
+      - name: Alert on Failure
+        if: failure() && github.event_name == 'schedule'
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+        run: |
+          curl -X POST \
+            -H 'Content-type: application/json' \
+            --data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
+            $SLACK_WEBHOOK
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -0,0 +1,149 @@
+name: Model Server Tests
+
+on:
+  schedule:
+    # This cron expression runs the job daily at 16:00 UTC (9am PT)
+    - cron: "0 16 * * *"
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to run the workflow on'
+        required: false
+        default: 'main'
+        
+env:
+  # Bedrock
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
+
+  # API keys for testing
+  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
+  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+  AZURE_API_URL: ${{ secrets.AZURE_API_URL }}
+
+jobs:
+  model-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+
+    env:
+      PYTHONPATH: ./backend
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Model Server Docker image
+        run: |
+          docker pull onyxdotapp/onyx-model-server:latest
+          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
+          
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+
+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+          
+      - name: Run Tests
+        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+        run: |
+          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
+          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
+
+      - name: Alert on Failure
+        if: failure() && github.event_name == 'schedule'
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+        run: |
+          curl -X POST \
+            -H 'Content-type: application/json' \
+            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
+            $SLACK_WEBHOOK
+            
+      - name: Dump all-container logs (optional)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs
+          path: ${{ github.workspace }}/docker-compose.log
+          
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
+          
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -0,0 +1,43 @@
+name: Python Unit Tests
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - 'release/**'
+
+jobs:
+  backend-check:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+
+    env:
+      PYTHONPATH: ./backend
+      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
+      SF_USERNAME: ${{ secrets.SF_USERNAME }}
+      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
+      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
+      
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+        cache: 'pip'
+        cache-dependency-path: |
+          backend/requirements/default.txt
+          backend/requirements/dev.txt
+
+    - name: Install Dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+
+    - name: Run Tests
+      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+      run: py.test -o junit_family=xunit2 -xv --ff backend/tests/unit
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -0,0 +1,23 @@
+name: Quality Checks PR
+concurrency:
+  group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request: null
+
+jobs:
+  quality-checks:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - uses: pre-commit/action@v3.0.1
+        with:
+          extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -0,0 +1,53 @@
+name: Nightly Tag Push
+
+on:
+  schedule:
+    - cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
+
+permissions:
+  contents: write # Allows pushing tags to the repository
+
+jobs:
+  create-and-push-tag:
+    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}"]
+
+    steps:
+      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
+      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
+      # implement here which needs an actual user's deploy key
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"
+
+      - name: Check for existing nightly tag
+        id: check_tag
+        run: |
+          if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then
+            echo "A tag starting with 'nightly-latest' already exists on HEAD."
+            echo "tag_exists=true" >> $GITHUB_OUTPUT
+          else
+            echo "No tag starting with 'nightly-latest' exists on HEAD."
+            echo "tag_exists=false" >> $GITHUB_OUTPUT
+          fi
+
+      # don't tag again if HEAD already has a nightly-latest tag on it
+      - name: Create Nightly Tag
+        if: steps.check_tag.outputs.tag_exists == 'false'
+        env:
+          DATE: ${{ github.run_id }}
+        run: |
+          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
+          echo "Creating tag: $TAG_NAME"
+          git tag $TAG_NAME
+
+      - name: Push Tag
+        if: steps.check_tag.outputs.tag_exists == 'false'
+        run: |
+          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
+          git push origin $TAG_NAME
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# editors
+.vscode
+.zed
+
+# macos
+.DS_store
+
+# python
+.venv
+.mypy_cache
+.idea
+
+# testing
+/web/test-results/
+backend/onyx/agent_search/main/test_data.json
+backend/tests/regression/answer_quality/test_data.json
+backend/tests/regression/search_quality/eval-*
+backend/tests/regression/search_quality/search_eval_config.yaml
+backend/tests/regression/search_quality/*.json
+
+# secret files
+.env
+jira_test_env
+
+# others
+/deployment/data/nginx/app.conf
+*.sw?
+/backend/tests/regression/answer_quality/search_test_config.yaml
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,66 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 25.1.0
+    hooks:
+    - id: black
+      language_version: python3.11
+
+  # this is a fork which keeps compatibility with black
+  - repo: https://github.com/wimglenn/reorder-python-imports-black
+    rev: v3.14.0
+    hooks:
+    - id: reorder-python-imports
+      args: ['--py311-plus', '--application-directories=backend/']
+      # need to ignore alembic files, since reorder-python-imports gets confused
+      # and thinks that alembic is a local package since there is a folder
+      # in the backend directory called `alembic`
+      exclude: ^backend/alembic/
+
+  # These settings will remove unused imports with side effects
+  # Note: The repo currently does not and should not have imports with side effects
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.3.1
+    hooks:
+      - id: autoflake
+        args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.11.4
+    hooks:
+      - id: ruff
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.1.0
+    hooks:
+    - id: prettier
+      types_or: [html, css, javascript, ts, tsx]
+      additional_dependencies:
+      - prettier
+
+  # We would like to have a mypy pre-commit hook, but due to the fact that
+  # pre-commit runs in it's own isolated environment, we would need to install
+  # and keep in sync all dependencies so mypy has access to the appropriate type
+  # stubs. This does not seem worth it at the moment, so for now we will stick to
+  # having mypy run via Github Actions / manually by contributors
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: v1.1.1
+  #   hooks:
+  #     - id: mypy
+  #       exclude: ^tests/
+  #       # below are needed for type stubs since pre-commit runs in it's own
+  #       # isolated environment. Unfortunately, this needs to be kept in sync
+  #       # with requirements/dev.txt + requirements/default.txt
+  #       additional_dependencies: [
+  #         alembic==1.10.4,
+  #         types-beautifulsoup4==4.12.0.3,
+  #         types-html5lib==1.1.11.13,
+  #         types-oauthlib==3.2.0.9,
+  #         types-psycopg2==2.9.21.10,
+  #         types-python-dateutil==2.8.19.13,
+  #         types-regex==2023.3.23.1,
+  #         types-requests==2.28.11.17,
+  #         types-retry==0.9.9.3,
+  #         types-urllib3==1.26.25.11
+  #       ]
+  #       # TODO: add back once errors are addressed
+  #       # args: [--strict]
--- a/.prettierignore
+++ b/.prettierignore
@@ -0,0 +1 @@
+backend/tests/integration/tests/pruning/website
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -0,0 +1,66 @@
+# Copy this file to .env in the .vscode folder
+# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
+# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
+
+# For local dev, often user Authentication is not needed
+AUTH_TYPE=disabled
+
+# Skip warm up for dev
+SKIP_WARM_UP=True
+
+# Always keep these on for Dev
+# Logs all model prompts to stdout
+LOG_DANSWER_MODEL_INTERACTIONS=True
+# More verbose logging
+LOG_LEVEL=debug
+
+
+# This passes top N results to LLM an additional time for reranking prior to answer generation
+# This step is quite heavy on token usage so we disable it for dev generally
+DISABLE_LLM_DOC_RELEVANCE=False
+
+
+# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
+OAUTH_CLIENT_ID=<REPLACE THIS>
+OAUTH_CLIENT_SECRET=<REPLACE THIS>
+# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
+REQUIRE_EMAIL_VERIFICATION=False
+
+
+# Set these so if you wipe the DB, you don't end up having to go through the UI every time
+GEN_AI_API_KEY=<REPLACE THIS>
+OPENAI_API_KEY=<REPLACE THIS>
+# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
+GEN_AI_MODEL_VERSION=gpt-4o
+FAST_GEN_AI_MODEL_VERSION=gpt-4o
+
+# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
+# Only needed if using DanswerBot
+#DANSWER_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
+#DANSWER_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
+
+
+# Python stuff
+PYTHONPATH=../backend
+PYTHONUNBUFFERED=1
+
+
+# Internet Search 
+BING_API_KEY=<REPLACE THIS>
+
+
+# Enable the full set of Danswer Enterprise Edition features
+# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
+ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
+
+# Agent Search configs  # TODO: Remove give proper namings
+AGENT_RETRIEVAL_STATS=False   # Note: This setting will incur substantial re-ranking effort
+AGENT_RERANKING_STATS=True
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
+AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
+
+# S3 File Store Configuration (MinIO for local development)
+S3_ENDPOINT_URL=http://localhost:9004
+S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
+S3_AWS_ACCESS_KEY_ID=minioadmin
+S3_AWS_SECRET_ACCESS_KEY=minioadmin
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -0,0 +1,464 @@
+/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
+
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "compounds": [
+      {
+        // Dummy entry used to label the group
+        "name": "--- Compound ---",
+        "configurations": ["--- Individual ---"],
+        "presentation": {
+          "group": "1"
+        }
+      },
+      {
+        "name": "Run All Onyx Services",
+        "configurations": [
+          "Web Server",
+          "Model Server",
+          "API Server",
+          "Slack Bot",
+          "Celery primary",
+          "Celery light",
+          "Celery heavy",
+          "Celery indexing",
+          "Celery user files indexing",
+          "Celery beat",
+          "Celery monitoring"
+        ],
+        "presentation": {
+          "group": "1"
+        }
+      },
+      {
+        "name": "Web / Model / API",
+        "configurations": ["Web Server", "Model Server", "API Server"],
+        "presentation": {
+          "group": "1"
+        }
+      },
+      {
+        "name": "Celery (all)",
+        "configurations": [
+          "Celery primary",
+          "Celery light",
+          "Celery heavy",
+          "Celery indexing",
+          "Celery user files indexing",
+          "Celery beat",
+          "Celery monitoring"
+        ],
+        "presentation": {
+          "group": "1"
+        }
+      }
+    ],
+    "configurations": [
+      {
+        // Dummy entry used to label the group
+        "name": "--- Individual ---",
+        "type": "node",
+        "request": "launch",
+        "presentation": {
+          "group": "2",
+          "order": 0
+        }
+      },
+      {
+        "name": "Web Server",
+        "type": "node",
+        "request": "launch",
+        "cwd": "${workspaceRoot}/web",
+        "runtimeExecutable": "npm",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "runtimeArgs": ["run", "dev"],
+        "presentation": {
+          "group": "2"
+        },
+        "console": "integratedTerminal",
+        "consoleTitle": "Web Server Console"
+      },
+      {
+        "name": "Model Server",
+        "consoleName": "Model Server",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "uvicorn",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1"
+        },
+        "args": ["model_server.main:app", "--reload", "--port", "9000"],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Model Server Console"
+      },
+      {
+        "name": "API Server",
+        "consoleName": "API Server",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "uvicorn",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1"
+        },
+        "args": ["onyx.main:app", "--reload", "--port", "8080"],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "API Server Console"
+      },
+      // For the listener to access the Slack API,
+      // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
+      {
+        "name": "Slack Bot",
+        "consoleName": "Slack Bot",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "onyx/onyxbot/slack/listener.py",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Slack Bot Console"
+      },
+      {
+        "name": "Celery primary",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.primary",
+          "worker",
+          "--pool=threads",
+          "--concurrency=4",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=primary@%n",
+          "-Q",
+          "celery"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery primary Console"
+      },
+      {
+        "name": "Celery light",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.light",
+          "worker",
+          "--pool=threads",
+          "--concurrency=64",
+          "--prefetch-multiplier=8",
+          "--loglevel=INFO",
+          "--hostname=light@%n",
+          "-Q",
+          "vespa_metadata_sync,connector_deletion,doc_permissions_upsert"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery light Console"
+      },
+      {
+        "name": "Celery heavy",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.heavy",
+          "worker",
+          "--pool=threads",
+          "--concurrency=4",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=heavy@%n",
+          "-Q",
+          "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery heavy Console"
+      },
+      {
+        "name": "Celery indexing",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "ENABLE_MULTIPASS_INDEXING": "false",
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.indexing",
+          "worker",
+          "--pool=threads",
+          "--concurrency=1",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=indexing@%n",
+          "-Q",
+          "connector_indexing"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery indexing Console"
+      },
+      {
+        "name": "Celery monitoring",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {},
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.monitoring",
+          "worker",
+          "--pool=solo",
+          "--concurrency=1",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=monitoring@%n",
+          "-Q",
+          "monitoring"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery monitoring Console"
+      },
+      {
+        "name": "Celery beat",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.beat",
+          "beat",
+          "--loglevel=INFO"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery beat Console"
+      },
+      {
+        "name": "Celery user files indexing",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.indexing",
+          "worker",
+          "--pool=threads",
+          "--concurrency=1",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=user_files_indexing@%n",
+          "-Q",
+          "user_files_indexing"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery user files indexing Console"
+      },
+      {
+        "name": "Pytest",
+        "consoleName": "Pytest",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "pytest",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-v"
+          // Specify a sepcific module/test to run or provide nothing to run all tests
+          //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Pytest Console"
+      },
+      {
+        // Dummy entry used to label the group
+        "name": "--- Tasks ---",
+        "type": "node",
+        "request": "launch",
+        "presentation": {
+          "group": "3",
+          "order": 0
+        }
+      },
+      {
+        "name": "Clear and Restart External Volumes and Containers",
+        "type": "node",
+        "request": "launch",
+        "runtimeExecutable": "bash",
+        "runtimeArgs": [
+          "${workspaceFolder}/backend/scripts/restart_containers.sh"
+        ],
+        "cwd": "${workspaceFolder}",
+        "console": "integratedTerminal",
+        "stopOnEntry": true,
+        "presentation": {
+          "group": "3"
+        }
+      },
+      {
+        // Celery jobs launched through a single background script (legacy)
+        // Recommend using the "Celery (all)" compound launch instead.
+        "name": "Background Jobs",
+        "consoleName": "Background Jobs",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "scripts/dev_run_background_jobs.py",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        }
+      },
+      {
+        "name": "Install Python Requirements",
+        "type": "node",
+        "request": "launch",
+        "runtimeExecutable": "bash",
+        "runtimeArgs": [
+          "-c",
+          "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+        ],
+        "cwd": "${workspaceFolder}",
+        "console": "integratedTerminal",
+        "presentation": {
+          "group": "3"
+        }
+      },
+    {
+      // script to generate the openapi schema
+      "name": "Onyx OpenAPI Schema Generator",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "scripts/onyx_openapi_schema.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.env",
+      "env": {
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "--filename",
+        "generated/openapi.json",
+      ]
+    },
+    {
+      // script to debug multi tenant db issues
+      "name": "Onyx DB Manager (Top Chunks)",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "scripts/debugging/onyx_db.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.env",
+      "env": {
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "--password",
+        "your_password_here",
+        "--port",
+        "5433",
+        "--report",
+        "top-chunks",
+        "--filename",
+        "generated/tenants_by_num_docs.csv"
+      ]
+    },
+      {
+        "name": "Debug React Web App in Chrome",
+        "type": "chrome",
+        "request": "launch",
+        "url": "http://localhost:3000",
+        "webRoot": "${workspaceFolder}/web"
+      }
+    ]
+  }
+  
--- a/.vscode/tasks.template.jsonc
+++ b/.vscode/tasks.template.jsonc
@@ -0,0 +1,101 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "type": "austin",
+            "label": "Profile celery beat",
+            "envFile": "${workspaceFolder}/.env",
+            "options": {
+              "cwd": "${workspaceFolder}/backend"
+            },
+            "command": [
+                "sudo",
+                "-E"
+            ],
+            "args": [
+              "celery",
+              "-A",
+              "onyx.background.celery.versioned_apps.beat",
+              "beat",
+              "--loglevel=INFO"
+            ]
+        },
+        {
+            "type": "shell",
+            "label": "Generate Onyx OpenAPI Python client",
+            "cwd": "${workspaceFolder}/backend",
+            "envFile": "${workspaceFolder}/.env",
+            "options": {
+              "cwd": "${workspaceFolder}/backend"
+            },
+            "command": [
+                "openapi-generator"
+            ],
+            "args": [
+                "generate",
+                "-i",
+                "generated/openapi.json",
+                "-g",
+                "python",
+                "-o",
+                "generated/onyx_openapi_client",
+                "--package-name",
+                "onyx_openapi_client",
+            ]
+        },
+        {
+            "type": "shell",
+            "label": "Generate Typescript Fetch client (openapi-generator)",
+            "envFile": "${workspaceFolder}/.env",
+            "options": {
+              "cwd": "${workspaceFolder}"
+            },
+            "command": [
+                "openapi-generator"
+            ],
+            "args": [
+                "generate",
+                "-i",
+                "backend/generated/openapi.json",
+                "-g",
+                "typescript-fetch",
+                "-o",
+                "${workspaceFolder}/web/src/lib/generated/onyx_api",
+                "--additional-properties=disallowAdditionalPropertiesIfNotPresent=false,legacyDiscriminatorBehavior=false,supportsES6=true",
+            ]
+        },
+        {
+            "type": "shell",
+            "label": "Generate TypeScript Client (openapi-ts)",
+            "envFile": "${workspaceFolder}/.env",
+            "options": {
+              "cwd": "${workspaceFolder}/web"
+            },
+            "command": [
+                "npx"
+            ],
+            "args": [
+                "openapi-typescript",
+                "../backend/generated/openapi.json",
+                "--output",
+                "./src/lib/generated/onyx-schema.ts",
+            ]
+        },
+        {
+            "type": "shell",
+            "label": "Generate TypeScript Client (orval)",
+            "envFile": "${workspaceFolder}/.env",
+            "options": {
+              "cwd": "${workspaceFolder}/web"
+            },
+            "command": [
+                "npx"
+            ],
+            "args": [
+            	"orval",
+                "--config",
+                "orval.config.js",
+            ]
+        }
+    ]
+}
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,276 @@
+<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
+
+# Contributing to Onyx
+
+Hey there! We are so excited that you're interested in Onyx.
+
+As an open source project in a rapidly changing space, we welcome all contributions.
+
+## 💃 Guidelines
+
+### Contribution Opportunities
+
+The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
+
+To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
+via [Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
+[Discord](https://discord.gg/TDJ59cGV2X) or [email](mailto:founders@onyx.app).
+
+Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
+will be marked with the `approved by maintainers` label.
+Issues marked `good first issue` are an especially great place to start.
+
+**Connectors** to other tools are another great place to contribute. For details on how, refer to this
+[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
+
+If you have a new/different contribution in mind, we'd love to hear about it!
+Your input is vital to making sure that Onyx moves in the right direction.
+Before starting on implementation, please raise a GitHub issue.
+
+Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
+[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA) /
+[Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all.
+
+### Contributing Code
+
+To contribute to this project, please follow the
+["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
+When opening a pull request, mention related issues and feel free to tag relevant maintainers.
+
+Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
+See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
+
+### Getting Help 🙋
+
+Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
+That way we can help future contributors and users can avoid the same issue.
+
+We also have support channels and generally interesting discussions on our
+[Slack](https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA)
+and
+[Discord](https://discord.gg/TDJ59cGV2X).
+
+We would love to see you there!
+
+## Get Started 🚀
+
+Onyx being a fully functional app, relies on some external software, specifically:
+
+- [Postgres](https://www.postgresql.org/) (Relational DB)
+- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
+- [Redis](https://redis.io/) (Cache)
+- [Nginx](https://nginx.org/) (Not needed for development flows generally)
+
+> **Note:**
+> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
+> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
+
+### Local Set Up
+
+Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
+
+If using a lower version, modifications will have to be made to the code.
+If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
+
+#### Backend: Python requirements
+
+Currently, we use pip and recommend creating a virtual environment.
+
+For convenience here's a command for it:
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+
+> **Note:**
+> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
+> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.
+
+_For Windows, activate the virtual environment using Command Prompt:_
+
+```bash
+.venv\Scripts\activate
+```
+
+If using PowerShell, the command slightly differs:
+
+```powershell
+.venv\Scripts\Activate.ps1
+```
+
+Install the required python dependencies:
+
+```bash
+pip install -r onyx/backend/requirements/default.txt
+pip install -r onyx/backend/requirements/dev.txt
+pip install -r onyx/backend/requirements/ee.txt
+pip install -r onyx/backend/requirements/model_server.txt
+```
+
+Install Playwright for Python (headless browser required by the Web Connector)
+
+In the activated Python virtualenv, install Playwright for Python by running:
+
+```bash
+playwright install
+```
+
+You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
+
+#### Frontend: Node dependencies
+
+Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
+Once the above is done, navigate to `onyx/web` run:
+
+```bash
+npm i
+```
+
+## Formatting and Linting
+
+### Backend
+
+For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
+First, install pre-commit (if you don't have it already) following the instructions
+[here](https://pre-commit.com/#installation).
+
+With the virtual environment active, install the pre-commit library with:
+
+```bash
+pip install pre-commit
+```
+
+Then, from the `onyx/backend` directory, run:
+
+```bash
+pre-commit install
+```
+
+Additionally, we use `mypy` for static type checking.
+Onyx is fully type-annotated, and we want to keep it that way!
+To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
+
+### Web
+
+We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `onyx/web` directory.
+To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
+Please double check that prettier passes before creating a pull request.
+
+# Running the application for development
+
+## Developing using VSCode Debugger (recommended)
+
+We highly recommend using VSCode debugger for development.
+See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
+
+Otherwise, you can follow the instructions below to run the application for development.
+
+## Manually running the application for development
+### Docker containers for external software
+
+You will need Docker installed to run these containers.
+
+First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
+
+```bash
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache
+```
+
+(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
+
+### Running Onyx locally
+
+To start the frontend, navigate to `onyx/web` and run:
+
+```bash
+npm run dev
+```
+
+Next, start the model server which runs the local NLP models.
+Navigate to `onyx/backend` and run:
+
+```bash
+uvicorn model_server.main:app --reload --port 9000
+```
+
+_For Windows (for compatibility with both PowerShell and Command Prompt):_
+
+```bash
+powershell -Command "uvicorn model_server.main:app --reload --port 9000"
+```
+
+The first time running Onyx, you will need to run the DB migrations for Postgres.
+After the first time, this is no longer required unless the DB models change.
+
+Navigate to `onyx/backend` and with the venv active, run:
+
+```bash
+alembic upgrade head
+```
+
+Next, start the task queue which orchestrates the background jobs.
+Jobs that take more time are run async from the API server.
+
+Still in `onyx/backend`, run:
+
+```bash
+python ./scripts/dev_run_background_jobs.py
+```
+
+To run the backend API server, navigate back to `onyx/backend` and run:
+
+```bash
+AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
+```
+
+_For Windows (for compatibility with both PowerShell and Command Prompt):_
+
+```bash
+powershell -Command "
+    $env:AUTH_TYPE='disabled'
+    uvicorn onyx.main:app --reload --port 8080
+"
+```
+
+> **Note:**
+> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
+
+#### Wrapping up
+
+You should now have 4 servers running:
+
+- Web server
+- Backend API
+- Model server
+- Background jobs
+
+Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
+
+You've successfully set up a local Onyx instance! 🏁
+
+#### Running the Onyx application in a container
+
+You can run the full Onyx application stack from pre-built images including all external software dependencies.
+
+Navigate to `onyx/deployment/docker_compose` and run:
+
+```bash
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+```
+
+After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
+
+If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
+
+```bash
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
+```
+
+
+### Release Process
+
+Onyx loosely follows the SemVer versioning standard.
+Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
+A set of Docker containers will be pushed automatically to DockerHub with every tag.
+You can see the containers [here](https://hub.docker.com/search?q=onyx%2F).
--- a/CONTRIBUTING_MACOS.md
+++ b/CONTRIBUTING_MACOS.md
@@ -0,0 +1,36 @@
+## Some additional notes for Mac Users
+
+The base instructions to set up the development environment are located in [CONTRIBUTING.md](https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md).
+
+### Setting up Python
+
+Ensure [Homebrew](https://brew.sh/) is already set up.
+
+Then install python 3.11.
+
+```bash
+brew install python@3.11
+```
+
+Add python 3.11 to your path: add the following line to ~/.zshrc
+
+```
+export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
+```
+
+> **Note:**
+> You will need to open a new terminal for the path change above to take effect.
+
+### Setting up Docker
+
+On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and
+ensure it is running before continuing with the docker commands.
+
+### Formatting and Linting
+
+MacOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly.
+After installing pre-commit, run the following command:
+
+```bash
+sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit
+```
--- a/CONTRIBUTING_VSCODE.md
+++ b/CONTRIBUTING_VSCODE.md
@@ -0,0 +1,30 @@
+# VSCode Debugging Setup
+
+This guide explains how to set up and use VSCode's debugging capabilities with this project.
+
+## Initial Setup
+
+1. **Environment Setup**:
+   - Copy `.vscode/.env.template` to `.vscode/.env`
+   - Fill in the necessary environment variables in `.vscode/.env`
+2. **launch.json**:
+   - Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
+
+## Using the Debugger
+
+Before starting, make sure the Docker Daemon is running.
+
+1. Open the Debug view in VSCode (Cmd+Shift+D on macOS)
+2. From the dropdown at the top, select "Clear and Restart External Volumes and Containers" and press the green play button
+3. From the dropdown at the top, select "Run All Onyx Services" and press the green play button
+4. CD into web, run "npm i" followed by npm run dev.
+5. Now, you can navigate to onyx in your browser (default is http://localhost:3000) and start using the app
+6. You can set breakpoints by clicking to the left of line numbers to help debug while the app is running
+7. Use the debug toolbar to step through code, inspect variables, etc.
+
+## Features
+
+- Hot reload is enabled for the web server and API servers
+- Python debugging is configured with debugpy
+- Environment variables are loaded from `.vscode/.env`
+- Console output is organized in the integrated terminal with labeled tabs
--- a/686
+++ b/686
@@ -1,661 +1,25 @@
-                    GNU AFFERO GENERAL PUBLIC LICENSE
-                       Version 3, 19 November 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
-  A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate.  Many developers of free software are heartened and
-encouraged by the resulting cooperation.  However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
-  The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community.  It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server.  Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
-  An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals.  This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU Affero General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Remote Network Interaction; Use with the GNU General Public License.
-
-  Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software.  This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time.  Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published
-    by the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source.  For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code.  There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for the
-specific requirements.
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU AGPL, see
-<https://www.gnu.org/licenses/>.
+Copyright (c) 2023-present DanswerAI, Inc.
+
+Portions of this software are licensed as follows:
+
+- All content that resides under "ee" directories of this repository, if that directory exists, is licensed under the license defined in "backend/ee/LICENSE". Specifically all content under "backend/ee" and "web/src/app/ee" is licensed under the license defined in "backend/ee/LICENSE".
+- All third party components incorporated into the Onyx Software are licensed under the original license provided by the owner of the applicable component.
+- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/OnyxLogoCropped.jpg
+++ b/OnyxLogoCropped.jpg
--- a/README.md
+++ b/README.md
@@ -0,0 +1,117 @@
+<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
+
+<a name="readme-top"></a>
+
+<h2 align="center">
+<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+</h2>
+
+<p align="center">
+<p align="center">Open Source Gen-AI + Enterprise Search.</p>
+
+<p align="center">
+<a href="https://docs.onyx.app/" target="_blank">
+    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+</a>
+<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
+    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
+</a>
+<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
+    <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+</a>
+<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
+    <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+</a>
+</p>
+
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
+Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
+Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
+Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
+Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.
+
+
+<h3>Feature Highlights</h3>
+
+**Deep research over your team's knowledge:**
+
+https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8
+
+
+**Use Onyx as a secure AI Chat with any LLM:**
+
+![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
+
+
+**Easily set up connectors to your apps:**
+
+![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
+
+
+**Access Onyx where your team already works:**
+
+![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)
+
+
+## Deployment
+**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
+
+Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
+`docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.
+
+We also have built-in support for high-availability/scalable deployment on Kubernetes.
+References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).
+
+
+## 🔍 Other Notable Benefits of Onyx
+- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
+- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
+- Knowledge curation features like document-sets, query history, usage analytics, etc.
+- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
+
+
+## 🚧 Roadmap
+- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
+- Personalized Search
+- Organizational understanding and ability to locate and suggest experts from your team.
+- Code Search
+- SQL and Structured Query Language
+
+
+## 🔌 Connectors
+Keep knowledge and access up to sync across 40+ connectors:
+
+- Google Drive
+- Confluence
+- Slack
+- Gmail
+- Salesforce
+- Microsoft Sharepoint
+- Github
+- Jira
+- Zendesk
+- Gong
+- Microsoft Teams
+- Dropbox
+- Local Files
+- Websites
+- And more ...
+
+See the full list [here](https://docs.onyx.app/connectors).
+
+
+## 📚 Licensing
+There are two editions of Onyx:
+
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
+For feature details, check out [our website](https://www.onyx.app/pricing).
+
+To try the Onyx Enterprise Edition:
+1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
+2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).
+
+
+## 💡 Contributing
+Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
+
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -0,0 +1,17 @@
+**/__pycache__
+venv/
+env/
+*.egg-info
+.cache
+.git/
+.svn/
+.vscode/
+.idea/
+*.log
+log/
+.env
+secrets.yaml
+build/
+dist/
+.coverage
+htmlcov/
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -0,0 +1,14 @@
+__pycache__/
+.mypy_cache
+.idea/
+site_crawls/
+.ipynb_checkpoints/
+api_keys.py
+*ipynb
+.env*
+vespa-app.zip
+dynamic_config_storage/
+celerybeat-schedule*
+onyx/connectors/salesforce/data/
+.test.env
+/generated
--- a/backend/.trivyignore
+++ b/backend/.trivyignore
@@ -0,0 +1,46 @@
+# https://github.com/madler/zlib/issues/868
+# Pulled in with base Debian image, it's part of the contrib folder but unused
+# zlib1g is fine
+# Will be gone with Debian image upgrade
+# No impact in our settings
+CVE-2023-45853
+
+# krb5 related, worst case is denial of service by resource exhaustion
+# Accept the risk
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+
+# Specific to Firefox which we do not use
+# No impact in our settings
+CVE-2024-0743
+
+# bind9 related, worst case is denial of service by CPU resource exhaustion
+# Accept the risk
+CVE-2023-50387
+CVE-2023-50868
+CVE-2023-50387
+CVE-2023-50868
+
+# libexpat1, XML parsing resource exhaustion
+# We don't parse any user provided XMLs
+# No impact in our settings
+CVE-2023-52425
+CVE-2024-28757
+
+# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
+# No impact in our settings
+CVE-2023-7104
+
+# libharfbuzz0b, O(n^2) growth, worst case is denial of service
+# Accept the risk
+CVE-2023-25193
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -0,0 +1,121 @@
+FROM python:3.11.7-slim-bookworm
+
+LABEL com.danswer.maintainer="founders@onyx.app"
+LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
+contains code for both the Community and Enterprise editions of Onyx. If you do not \
+have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
+Edition features outside of personal development or testing purposes. Please reach out to \
+founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
+
+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+# DO_NOT_TRACK is used to disable telemetry for Unstructured
+ENV ONYX_VERSION=${ONYX_VERSION} \
+    DANSWER_RUNNING_IN_DOCKER="true" \
+    DO_NOT_TRACK="true"
+
+
+RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
+# Install system dependencies
+# cmake needed for psycopg (postgres)
+# libpq-dev needed for psycopg (postgres)
+# curl included just for users' convenience
+# zip for Vespa step futher down
+# ca-certificates for HTTPS
+RUN apt-get update && \
+    apt-get install -y \
+        cmake \
+        curl \
+        zip \
+        ca-certificates \
+        libgnutls30 \
+        libblkid1 \
+        libmount1 \
+        libsmartcols1 \
+        libuuid1 \
+        libxmlsec1-dev \
+        pkg-config \
+        gcc \
+        nano \
+        vim && \
+    rm -rf /var/lib/apt/lists/* && \
+    apt-get clean
+
+
+
+# Install Python dependencies
+# Remove py which is pulled in by retry, py is not needed and is a CVE
+COPY ./requirements/default.txt /tmp/requirements.txt
+COPY ./requirements/ee.txt /tmp/ee-requirements.txt
+RUN pip install --no-cache-dir --upgrade \
+        --retries 5 \
+        --timeout 30 \
+        -r /tmp/requirements.txt \
+        -r /tmp/ee-requirements.txt && \
+    pip uninstall -y py && \
+    playwright install chromium && \
+    playwright install-deps chromium && \
+    ln -s /usr/local/bin/supervisord /usr/bin/supervisord
+
+# Cleanup for CVEs and size reduction
+# https://github.com/tornadoweb/tornado/issues/3107
+# xserver-common and xvfb included by playwright installation but not needed after
+# perl-base is part of the base Python Debian image but not needed for Onyx functionality
+# perl-base could only be removed with --allow-remove-essential
+RUN apt-get update && \
+    apt-get remove -y --allow-remove-essential \
+        perl-base \
+        xserver-common \
+        xvfb \
+        cmake \
+        libldap-2.5-0 \
+        libxmlsec1-dev \
+        pkg-config \
+        gcc && \
+    apt-get install -y libxmlsec1-openssl && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/* && \
+    rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
+
+# Install postgresql-client for easy manual tests
+# Install it here to avoid it being cleaned up above
+RUN apt-get update && apt-get install -y postgresql-client
+
+# Pre-downloading models for setups with limited egress
+RUN python -c "from tokenizers import Tokenizer; \
+Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
+
+# Pre-downloading NLTK for setups with limited egress
+RUN python -c "import nltk; \
+nltk.download('stopwords', quiet=True); \
+nltk.download('punkt_tab', quiet=True);"
+# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
+
+# Set up application files
+WORKDIR /app
+
+# Enterprise Version Files
+COPY ./ee /app/ee
+COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# Set up application files
+COPY ./onyx /app/onyx
+COPY ./shared_configs /app/shared_configs
+COPY ./alembic /app/alembic
+COPY ./alembic_tenants /app/alembic_tenants
+COPY ./alembic.ini /app/alembic.ini
+COPY supervisord.conf /usr/etc/supervisord.conf
+COPY ./static /app/static
+
+# Escape hatch scripts
+COPY ./scripts/debugging /app/scripts/debugging
+COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
+
+# Put logo in assets
+COPY ./assets /app/assets
+
+ENV PYTHONPATH=/app
+
+# Default command which does nothing
+# This container is used by api server and background which specify their own CMD
+CMD ["tail", "-f", "/dev/null"]
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -0,0 +1,62 @@
+FROM python:3.11.7-slim-bookworm
+
+LABEL com.danswer.maintainer="founders@onyx.app"
+LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
+AI models for Onyx. This container and all the code is MIT Licensed and free for all to use. \
+You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
+visit https://github.com/onyx-dot-app/onyx."
+
+# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
+ARG ONYX_VERSION=0.0.0-dev
+ENV ONYX_VERSION=${ONYX_VERSION} \
+    DANSWER_RUNNING_IN_DOCKER="true"
+
+
+RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
+
+COPY ./requirements/model_server.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade \
+        --retries 5 \
+        --timeout 30 \
+        -r /tmp/requirements.txt
+
+RUN apt-get remove -y --allow-remove-essential perl-base && \ 
+    apt-get autoremove -y
+
+# Pre-downloading models for setups with limited egress
+# Download tokenizers, distilbert for the Onyx model
+# Download model weights
+# Run Nomic to pull in the custom architecture and have it cached locally
+RUN python -c "from transformers import AutoTokenizer; \
+AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
+AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
+from huggingface_hub import snapshot_download; \
+snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
+snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
+snapshot_download('nomic-ai/nomic-embed-text-v1'); \
+snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
+from sentence_transformers import SentenceTransformer; \
+SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
+
+# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
+# running Onyx, don't overwrite it with the built in cache folder
+RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
+
+WORKDIR /app
+
+# Utils used by model server
+COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
+COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
+
+# Place to fetch version information
+COPY ./onyx/__init__.py /app/onyx/__init__.py
+
+# Shared between Onyx Backend and Model Server
+COPY ./shared_configs /app/shared_configs
+
+# Model Server main code
+COPY ./model_server /app/model_server
+
+ENV PYTHONPATH=/app
+
+CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic.ini
+++ b/backend/alembic.ini
@@ -0,0 +1,118 @@
+# A generic, single database configuration.
+
+[DEFAULT]
+# path to migration scripts
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python-dateutil library that can be
+# installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to dateutil.tz.gettz()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  
+# Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+hooks = black
+black.type = console_scripts
+black.entrypoint = black
+black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = INFO
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
+
+
+[alembic]
+script_location = alembic
+version_locations = %(script_location)s/versions
+
+[schema_private]
+script_location = alembic_tenants
+version_locations = %(script_location)s/versions
--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -0,0 +1,63 @@
+<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/backend/alembic/README.md"} -->
+
+# Alembic DB Migrations
+
+These files are for creating/updating the tables in the Relational DB (Postgres).
+Onyx migrations use a generic single-database configuration with an async dbapi.
+
+## To generate new migrations:
+
+run from onyx/backend:
+`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`
+
+More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html
+
+## Running migrations
+
+To run all un-applied migrations:
+`alembic upgrade head`
+
+To undo migrations:
+`alembic downgrade -X`
+where X is the number of migrations you want to undo from the current state
+
+### Multi-tenant migrations
+
+For multi-tenant deployments, you can use additional options:
+
+**Upgrade all tenants:**
+```bash
+alembic -x upgrade_all_tenants=true upgrade head
+```
+
+**Upgrade specific schemas:**
+```bash
+# Single schema
+alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012 upgrade head
+
+# Multiple schemas (comma-separated)
+alembic -x schemas=tenant_12345678-1234-1234-1234-123456789012,public,another_tenant upgrade head
+```
+
+**Upgrade tenants within an alphabetical range:**
+```bash
+# Upgrade tenants 100-200 when sorted alphabetically (positions 100 to 200)
+alembic -x upgrade_all_tenants=true -x tenant_range_start=100 -x tenant_range_end=200 upgrade head
+
+# Upgrade tenants starting from position 1000 alphabetically
+alembic -x upgrade_all_tenants=true -x tenant_range_start=1000 upgrade head
+
+# Upgrade first 500 tenants alphabetically
+alembic -x upgrade_all_tenants=true -x tenant_range_end=500 upgrade head
+```
+
+**Continue on error (for batch operations):**
+```bash
+alembic -x upgrade_all_tenants=true -x continue=true upgrade head
+```
+
+The tenant range filtering works by:
+1. Sorting tenant IDs alphabetically
+2. Using 1-based position numbers (1st, 2nd, 3rd tenant, etc.)
+3. Filtering to the specified range of positions
+4. Non-tenant schemas (like 'public') are always included
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -0,0 +1,470 @@
+from typing import Any, Literal
+from onyx.db.engine.iam_auth import get_iam_auth_token
+from onyx.configs.app_configs import USE_IAM_AUTH
+from onyx.configs.app_configs import POSTGRES_HOST
+from onyx.configs.app_configs import POSTGRES_PORT
+from onyx.configs.app_configs import POSTGRES_USER
+from onyx.configs.app_configs import AWS_REGION_NAME
+from onyx.db.engine.sql_engine import build_connection_string
+from onyx.db.engine.tenant_utils import get_all_tenant_ids
+from sqlalchemy import event
+from sqlalchemy import pool
+from sqlalchemy import text
+from sqlalchemy.engine.base import Connection
+import os
+import ssl
+import asyncio
+import logging
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy.ext.asyncio import create_async_engine
+from sqlalchemy.sql.schema import SchemaItem
+from onyx.configs.constants import SSL_CERT_FILE
+from shared_configs.configs import (
+    MULTI_TENANT,
+    POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE,
+    TENANT_ID_PREFIX,
+)
+from onyx.db.models import Base
+from celery.backends.database.session import ResultModelBase  # type: ignore
+from onyx.db.engine.sql_engine import SqlEngine
+
+# Make sure in alembic.ini [logger_root] level=INFO is set or most logging will be
+# hidden! (defaults to level=WARN)
+
+# Alembic Config object
+config = context.config
+
+if config.config_file_name is not None and config.attributes.get(
+    "configure_logger", True
+):
+    fileConfig(config.config_file_name)
+
+target_metadata = [Base.metadata, ResultModelBase.metadata]
+
+EXCLUDE_TABLES = {"kombu_queue", "kombu_message"}
+
+logger = logging.getLogger(__name__)
+
+ssl_context: ssl.SSLContext | None = None
+if USE_IAM_AUTH:
+    if not os.path.exists(SSL_CERT_FILE):
+        raise FileNotFoundError(f"Expected {SSL_CERT_FILE} when USE_IAM_AUTH is true.")
+    ssl_context = ssl.create_default_context(cafile=SSL_CERT_FILE)
+
+
+def include_object(
+    object: SchemaItem,
+    name: str | None,
+    type_: Literal[
+        "schema",
+        "table",
+        "column",
+        "index",
+        "unique_constraint",
+        "foreign_key_constraint",
+    ],
+    reflected: bool,
+    compare_to: SchemaItem | None,
+) -> bool:
+    if type_ == "table" and name in EXCLUDE_TABLES:
+        return False
+    return True
+
+
+def filter_tenants_by_range(
+    tenant_ids: list[str], start_range: int | None = None, end_range: int | None = None
+) -> list[str]:
+    """
+    Filter tenant IDs by alphabetical position range.
+
+    Args:
+        tenant_ids: List of tenant IDs to filter
+        start_range: Starting position in alphabetically sorted list (1-based, inclusive)
+        end_range: Ending position in alphabetically sorted list (1-based, inclusive)
+
+    Returns:
+        Filtered list of tenant IDs in their original order
+    """
+    if start_range is None and end_range is None:
+        return tenant_ids
+
+    # Separate tenant IDs from non-tenant schemas
+    tenant_schemas = [tid for tid in tenant_ids if tid.startswith(TENANT_ID_PREFIX)]
+    non_tenant_schemas = [
+        tid for tid in tenant_ids if not tid.startswith(TENANT_ID_PREFIX)
+    ]
+
+    # Sort tenant schemas alphabetically.
+    # NOTE: can cause missed schemas if a schema is created in between workers
+    # fetching of all tenant IDs. We accept this risk for now. Just re-running
+    # the migration will fix the issue.
+    sorted_tenant_schemas = sorted(tenant_schemas)
+
+    # Apply range filtering (0-based indexing)
+    start_idx = start_range if start_range is not None else 0
+    end_idx = end_range if end_range is not None else len(sorted_tenant_schemas)
+
+    # Ensure indices are within bounds
+    start_idx = max(0, start_idx)
+    end_idx = min(len(sorted_tenant_schemas), end_idx)
+
+    # Get the filtered tenant schemas
+    filtered_tenant_schemas = sorted_tenant_schemas[start_idx:end_idx]
+
+    # Combine with non-tenant schemas and preserve original order
+    filtered_tenants = []
+    for tenant_id in tenant_ids:
+        if tenant_id in filtered_tenant_schemas or tenant_id in non_tenant_schemas:
+            filtered_tenants.append(tenant_id)
+
+    return filtered_tenants
+
+
+def get_schema_options() -> (
+    tuple[bool, bool, bool, int | None, int | None, list[str] | None]
+):
+    x_args_raw = context.get_x_argument()
+    x_args = {}
+    for arg in x_args_raw:
+        if "=" in arg:
+            key, value = arg.split("=", 1)
+            x_args[key.strip()] = value.strip()
+        else:
+            raise ValueError(f"Invalid argument: {arg}")
+
+    create_schema = x_args.get("create_schema", "true").lower() == "true"
+    upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true"
+
+    # continue on error with individual tenant
+    # only applies to online migrations
+    continue_on_error = x_args.get("continue", "false").lower() == "true"
+
+    # Tenant range filtering
+    tenant_range_start = None
+    tenant_range_end = None
+
+    if "tenant_range_start" in x_args:
+        try:
+            tenant_range_start = int(x_args["tenant_range_start"])
+        except ValueError:
+            raise ValueError(
+                f"Invalid tenant_range_start value: {x_args['tenant_range_start']}. Must be an integer."
+            )
+
+    if "tenant_range_end" in x_args:
+        try:
+            tenant_range_end = int(x_args["tenant_range_end"])
+        except ValueError:
+            raise ValueError(
+                f"Invalid tenant_range_end value: {x_args['tenant_range_end']}. Must be an integer."
+            )
+
+    # Validate range
+    if tenant_range_start is not None and tenant_range_end is not None:
+        if tenant_range_start > tenant_range_end:
+            raise ValueError(
+                f"tenant_range_start ({tenant_range_start}) cannot be greater than tenant_range_end ({tenant_range_end})"
+            )
+
+    # Specific schema names filtering (replaces both schema_name and the old tenant_ids approach)
+    schemas = None
+    if "schemas" in x_args:
+        schema_names_str = x_args["schemas"].strip()
+        if schema_names_str:
+            # Split by comma and strip whitespace
+            schemas = [
+                name.strip() for name in schema_names_str.split(",") if name.strip()
+            ]
+            if schemas:
+                logger.info(f"Specific schema names specified: {schemas}")
+
+    # Validate that only one method is used at a time
+    range_filtering = tenant_range_start is not None or tenant_range_end is not None
+    specific_filtering = schemas is not None and len(schemas) > 0
+
+    if range_filtering and specific_filtering:
+        raise ValueError(
+            "Cannot use both tenant range filtering (tenant_range_start/tenant_range_end) "
+            "and specific schema filtering (schemas) at the same time. "
+            "Please use only one filtering method."
+        )
+
+    if upgrade_all_tenants and specific_filtering:
+        raise ValueError(
+            "Cannot use both upgrade_all_tenants=true and schemas at the same time. "
+            "Use either upgrade_all_tenants=true for all tenants, or schemas for specific schemas."
+        )
+
+    # If any filtering parameters are specified, we're not doing the default single schema migration
+    if range_filtering:
+        upgrade_all_tenants = True
+
+    # Validate multi-tenant requirements
+    if MULTI_TENANT and not upgrade_all_tenants and not specific_filtering:
+        raise ValueError(
+            "In multi-tenant mode, you must specify either upgrade_all_tenants=true "
+            "or provide schemas. Cannot run default migration."
+        )
+
+    return (
+        create_schema,
+        upgrade_all_tenants,
+        continue_on_error,
+        tenant_range_start,
+        tenant_range_end,
+        schemas,
+    )
+
+
+def do_run_migrations(
+    connection: Connection, schema_name: str, create_schema: bool
+) -> None:
+    if create_schema:
+        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
+        connection.execute(text("COMMIT"))
+
+    connection.execute(text(f'SET search_path TO "{schema_name}"'))
+
+    context.configure(
+        connection=connection,
+        target_metadata=target_metadata,  # type: ignore
+        include_object=include_object,
+        version_table_schema=schema_name,
+        include_schemas=True,
+        compare_type=True,
+        compare_server_default=True,
+        script_location=config.get_main_option("script_location"),
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def provide_iam_token_for_alembic(
+    dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+) -> None:
+    if USE_IAM_AUTH:
+        # Database connection settings
+        region = AWS_REGION_NAME
+        host = POSTGRES_HOST
+        port = POSTGRES_PORT
+        user = POSTGRES_USER
+
+        # Get IAM authentication token
+        token = get_iam_auth_token(host, port, user, region)
+
+        # For Alembic / SQLAlchemy in this context, set SSL and password
+        cparams["password"] = token
+        cparams["ssl"] = ssl_context
+
+
+async def run_async_migrations() -> None:
+    (
+        create_schema,
+        upgrade_all_tenants,
+        continue_on_error,
+        tenant_range_start,
+        tenant_range_end,
+        schemas,
+    ) = get_schema_options()
+
+    if not schemas and not MULTI_TENANT:
+        schemas = [POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE]
+
+    # without init_engine, subsequent engine calls fail hard intentionally
+    SqlEngine.init_engine(pool_size=20, max_overflow=5)
+
+    engine = create_async_engine(
+        build_connection_string(),
+        poolclass=pool.NullPool,
+    )
+
+    if USE_IAM_AUTH:
+
+        @event.listens_for(engine.sync_engine, "do_connect")
+        def event_provide_iam_token_for_alembic(
+            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+        ) -> None:
+            provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
+
+    if schemas:
+        # Use specific schema names directly without fetching all tenants
+        logger.info(f"Migrating specific schema names: {schemas}")
+
+        i_schema = 0
+        num_schemas = len(schemas)
+        for schema in schemas:
+            i_schema += 1
+            logger.info(
+                f"Migrating schema: index={i_schema} num_schemas={num_schemas} schema={schema}"
+            )
+            try:
+                async with engine.connect() as connection:
+                    await connection.run_sync(
+                        do_run_migrations,
+                        schema_name=schema,
+                        create_schema=create_schema,
+                    )
+            except Exception as e:
+                logger.error(f"Error migrating schema {schema}: {e}")
+                if not continue_on_error:
+                    logger.error("--continue=true is not set, raising exception!")
+                    raise
+
+                logger.warning("--continue=true is set, continuing to next schema.")
+
+    elif upgrade_all_tenants:
+        tenant_schemas = get_all_tenant_ids()
+
+        filtered_tenant_schemas = filter_tenants_by_range(
+            tenant_schemas, tenant_range_start, tenant_range_end
+        )
+
+        if tenant_range_start is not None or tenant_range_end is not None:
+            logger.info(
+                f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
+            )
+            logger.info(
+                f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
+            )
+
+        i_tenant = 0
+        num_tenants = len(filtered_tenant_schemas)
+        for schema in filtered_tenant_schemas:
+            i_tenant += 1
+            logger.info(
+                f"Migrating schema: index={i_tenant} num_tenants={num_tenants} schema={schema}"
+            )
+            try:
+                async with engine.connect() as connection:
+                    await connection.run_sync(
+                        do_run_migrations,
+                        schema_name=schema,
+                        create_schema=create_schema,
+                    )
+            except Exception as e:
+                logger.error(f"Error migrating schema {schema}: {e}")
+                if not continue_on_error:
+                    logger.error("--continue=true is not set, raising exception!")
+                    raise
+
+                logger.warning("--continue=true is set, continuing to next schema.")
+
+    else:
+        # This should not happen in the new design since we require either
+        # upgrade_all_tenants=true or schemas in multi-tenant mode
+        # and for non-multi-tenant mode, we should use schemas with the default schema
+        raise ValueError(
+            "No migration target specified. Use either upgrade_all_tenants=true for all tenants "
+            "or schemas for specific schemas."
+        )
+
+    await engine.dispose()
+
+
+def run_migrations_offline() -> None:
+    """
+    NOTE(rkuo): This generates a sql script that can be used to migrate the database ...
+    instead of migrating the db live via an open connection
+
+    Not clear on when this would be used by us or if it even works.
+
+    If it is offline, then why are there calls to the db engine?
+
+    This doesn't really get used when we migrate in the cloud."""
+
+    logger.info("run_migrations_offline starting.")
+
+    # without init_engine, subsequent engine calls fail hard intentionally
+    SqlEngine.init_engine(pool_size=20, max_overflow=5)
+
+    (
+        create_schema,
+        upgrade_all_tenants,
+        continue_on_error,
+        tenant_range_start,
+        tenant_range_end,
+        schemas,
+    ) = get_schema_options()
+    url = build_connection_string()
+
+    if schemas:
+        # Use specific schema names directly without fetching all tenants
+        logger.info(f"Migrating specific schema names: {schemas}")
+
+        for schema in schemas:
+            logger.info(f"Migrating schema: {schema}")
+            context.configure(
+                url=url,
+                target_metadata=target_metadata,  # type: ignore
+                literal_binds=True,
+                include_object=include_object,
+                version_table_schema=schema,
+                include_schemas=True,
+                script_location=config.get_main_option("script_location"),
+                dialect_opts={"paramstyle": "named"},
+            )
+
+            with context.begin_transaction():
+                context.run_migrations()
+
+    elif upgrade_all_tenants:
+        engine = create_async_engine(url)
+
+        if USE_IAM_AUTH:
+
+            @event.listens_for(engine.sync_engine, "do_connect")
+            def event_provide_iam_token_for_alembic_offline(
+                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
+            ) -> None:
+                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)
+
+        tenant_schemas = get_all_tenant_ids()
+        engine.sync_engine.dispose()
+
+        filtered_tenant_schemas = filter_tenants_by_range(
+            tenant_schemas, tenant_range_start, tenant_range_end
+        )
+
+        if tenant_range_start is not None or tenant_range_end is not None:
+            logger.info(
+                f"Filtering tenants by range: start={tenant_range_start}, end={tenant_range_end}"
+            )
+            logger.info(
+                f"Total tenants: {len(tenant_schemas)}, Filtered tenants: {len(filtered_tenant_schemas)}"
+            )
+
+        for schema in filtered_tenant_schemas:
+            logger.info(f"Migrating schema: {schema}")
+            context.configure(
+                url=url,
+                target_metadata=target_metadata,  # type: ignore
+                literal_binds=True,
+                include_object=include_object,
+                version_table_schema=schema,
+                include_schemas=True,
+                script_location=config.get_main_option("script_location"),
+                dialect_opts={"paramstyle": "named"},
+            )
+
+            with context.begin_transaction():
+                context.run_migrations()
+    else:
+        # This should not happen in the new design
+        raise ValueError(
+            "No migration target specified. Use either upgrade_all_tenants=true for all tenants "
+            "or schemas for specific schemas."
+        )
+
+
+def run_migrations_online() -> None:
+    logger.info("run_migrations_online starting.")
+    asyncio.run(run_async_migrations())
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/backend/alembic/script.py.mako
+++ b/backend/alembic/script.py.mako
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
--- a/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
+++ b/backend/alembic/versions/027381bce97c_add_shortcut_option_for_users.py
@@ -0,0 +1,30 @@
+"""add shortcut option for users
+
+Revision ID: 027381bce97c
+Revises: 6fc7886d665d
+Create Date: 2025-01-14 12:14:00.814390
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "027381bce97c"
+down_revision = "6fc7886d665d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "shortcut_enabled", sa.Boolean(), nullable=False, server_default="false"
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("user", "shortcut_enabled")
--- a/backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
+++ b/backend/alembic/versions/03bf8be6b53a_rework_kg_config.py
@@ -0,0 +1,121 @@
+"""rework-kg-config
+
+Revision ID: 03bf8be6b53a
+Revises: 65bc6e0f8500
+Create Date: 2025-06-16 10:52:34.815335
+
+"""
+
+import json
+
+
+from datetime import datetime
+from datetime import timedelta
+from sqlalchemy.dialects import postgresql
+from sqlalchemy import text
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "03bf8be6b53a"
+down_revision = "65bc6e0f8500"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # get current config
+    current_configs = (
+        op.get_bind()
+        .execute(text("SELECT kg_variable_name, kg_variable_values FROM kg_config"))
+        .all()
+    )
+    current_config_dict = {
+        config.kg_variable_name: (
+            config.kg_variable_values[0]
+            if config.kg_variable_name
+            not in ("KG_VENDOR_DOMAINS", "KG_IGNORE_EMAIL_DOMAINS")
+            else config.kg_variable_values
+        )
+        for config in current_configs
+        if config.kg_variable_values
+    }
+
+    # not using the KGConfigSettings model here in case it changes in the future
+    kg_config_settings = json.dumps(
+        {
+            "KG_EXPOSED": current_config_dict.get("KG_EXPOSED", False),
+            "KG_ENABLED": current_config_dict.get("KG_ENABLED", False),
+            "KG_VENDOR": current_config_dict.get("KG_VENDOR", None),
+            "KG_VENDOR_DOMAINS": current_config_dict.get("KG_VENDOR_DOMAINS", []),
+            "KG_IGNORE_EMAIL_DOMAINS": current_config_dict.get(
+                "KG_IGNORE_EMAIL_DOMAINS", []
+            ),
+            "KG_COVERAGE_START": current_config_dict.get(
+                "KG_COVERAGE_START",
+                (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
+            ),
+            "KG_MAX_COVERAGE_DAYS": current_config_dict.get("KG_MAX_COVERAGE_DAYS", 90),
+            "KG_MAX_PARENT_RECURSION_DEPTH": current_config_dict.get(
+                "KG_MAX_PARENT_RECURSION_DEPTH", 2
+            ),
+            "KG_BETA_PERSONA_ID": current_config_dict.get("KG_BETA_PERSONA_ID", None),
+        }
+    )
+    op.execute(
+        f"INSERT INTO key_value_store (key, value) VALUES ('kg_config', '{kg_config_settings}')"
+    )
+
+    # drop kg config table
+    op.drop_table("kg_config")
+
+
+def downgrade() -> None:
+    # get current config
+    current_config_dict = {
+        "KG_EXPOSED": False,
+        "KG_ENABLED": False,
+        "KG_VENDOR": [],
+        "KG_VENDOR_DOMAINS": [],
+        "KG_IGNORE_EMAIL_DOMAINS": [],
+        "KG_COVERAGE_START": (datetime.now() - timedelta(days=90)).strftime("%Y-%m-%d"),
+        "KG_MAX_COVERAGE_DAYS": 90,
+        "KG_MAX_PARENT_RECURSION_DEPTH": 2,
+    }
+    current_configs = (
+        op.get_bind()
+        .execute(text("SELECT value FROM key_value_store WHERE key = 'kg_config'"))
+        .one_or_none()
+    )
+    if current_configs is not None:
+        current_config_dict.update(current_configs[0])
+    insert_values = [
+        {
+            "kg_variable_name": name,
+            "kg_variable_values": (
+                [str(val).lower() if isinstance(val, bool) else str(val)]
+                if not isinstance(val, list)
+                else val
+            ),
+        }
+        for name, val in current_config_dict.items()
+    ]
+
+    op.create_table(
+        "kg_config",
+        sa.Column("id", sa.Integer(), primary_key=True, nullable=False, index=True),
+        sa.Column("kg_variable_name", sa.String(), nullable=False, index=True),
+        sa.Column("kg_variable_values", postgresql.ARRAY(sa.String()), nullable=False),
+        sa.UniqueConstraint("kg_variable_name", name="uq_kg_config_variable_name"),
+    )
+    op.bulk_insert(
+        sa.table(
+            "kg_config",
+            sa.column("kg_variable_name", sa.String),
+            sa.column("kg_variable_values", postgresql.ARRAY(sa.String)),
+        ),
+        insert_values,
+    )
+
+    op.execute("DELETE FROM key_value_store WHERE key = 'kg_config'")
--- a/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py
+++ b/backend/alembic/versions/0568ccf46a6b_add_thread_specific_model_selection.py
@@ -0,0 +1,27 @@
+"""Add thread specific model selection
+
+Revision ID: 0568ccf46a6b
+Revises: e209dc5a8156
+Create Date: 2024-06-19 14:25:36.376046
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "0568ccf46a6b"
+down_revision = "e209dc5a8156"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session",
+        sa.Column("current_alternate_model", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "current_alternate_model")
--- a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
+++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
@@ -0,0 +1,32 @@
+"""add search doc relevance details
+
+Revision ID: 05c07bf07c00
+Revises: b896bbd0d5a7
+Create Date: 2024-07-10 17:48:15.886653
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "05c07bf07c00"
+down_revision = "b896bbd0d5a7"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "search_doc",
+        sa.Column("is_relevant", sa.Boolean(), nullable=True),
+    )
+    op.add_column(
+        "search_doc",
+        sa.Column("relevance_explanation", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("search_doc", "relevance_explanation")
+    op.drop_column("search_doc", "is_relevant")
--- a/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py
+++ b/backend/alembic/versions/08a1eda20fe1_add_earliest_indexing_to_connector.py
@@ -0,0 +1,27 @@
+"""add_indexing_start_to_connector
+
+Revision ID: 08a1eda20fe1
+Revises: 8a87bd6ec550
+Create Date: 2024-07-23 11:12:39.462397
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "08a1eda20fe1"
+down_revision = "8a87bd6ec550"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector", sa.Column("indexing_start", sa.DateTime(), nullable=True)
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector", "indexing_start")
--- a/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
+++ b/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
@@ -0,0 +1,32 @@
+"""Add starter prompts
+
+Revision ID: 0a2b51deb0b8
+Revises: 5f4b8568a221
+Create Date: 2024-03-02 23:23:49.960309
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "0a2b51deb0b8"
+down_revision = "5f4b8568a221"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column(
+            "starter_messages",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "starter_messages")
--- a/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
+++ b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
@@ -0,0 +1,114 @@
+"""Enable Encrypted Fields
+
+Revision ID: 0a98909f2757
+Revises: 570282d33c49
+Create Date: 2024-05-05 19:30:34.317972
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.sql import table
+from sqlalchemy.dialects import postgresql
+import json
+
+from onyx.utils.encryption import encrypt_string_to_bytes
+
+# revision identifiers, used by Alembic.
+revision = "0a98909f2757"
+down_revision = "570282d33c49"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    connection = op.get_bind()
+
+    op.alter_column("key_value_store", "value", nullable=True)
+    op.add_column(
+        "key_value_store",
+        sa.Column(
+            "encrypted_value",
+            sa.LargeBinary,
+            nullable=True,
+        ),
+    )
+
+    # Need a temporary column to translate the JSONB to binary
+    op.add_column("credential", sa.Column("temp_column", sa.LargeBinary()))
+
+    creds_table = table(
+        "credential",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "credential_json",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+        ),
+        sa.Column(
+            "temp_column",
+            sa.LargeBinary(),
+            nullable=False,
+        ),
+    )
+
+    results = connection.execute(sa.select(creds_table))
+
+    # This uses the MIT encrypt which does not actually encrypt the credentials
+    # In other words, this upgrade does not apply the encryption. Porting existing sensitive data
+    # and key rotation currently is not supported and will come out in the future
+    for row_id, creds, _ in results:
+        creds_binary = encrypt_string_to_bytes(json.dumps(creds))
+        connection.execute(
+            creds_table.update()
+            .where(creds_table.c.id == row_id)
+            .values(temp_column=creds_binary)
+        )
+
+    op.drop_column("credential", "credential_json")
+    op.alter_column("credential", "temp_column", new_column_name="credential_json")
+
+    op.add_column("llm_provider", sa.Column("temp_column", sa.LargeBinary()))
+
+    llm_table = table(
+        "llm_provider",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "api_key",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column(
+            "temp_column",
+            sa.LargeBinary(),
+            nullable=False,
+        ),
+    )
+    results = connection.execute(sa.select(llm_table))
+
+    for row_id, api_key, _ in results:
+        llm_key = encrypt_string_to_bytes(api_key)
+        connection.execute(
+            llm_table.update()
+            .where(llm_table.c.id == row_id)
+            .values(temp_column=llm_key)
+        )
+
+    op.drop_column("llm_provider", "api_key")
+    op.alter_column("llm_provider", "temp_column", new_column_name="api_key")
+
+
+def downgrade() -> None:
+    # Some information loss but this is ok. Should not allow decryption via downgrade.
+    op.drop_column("credential", "credential_json")
+    op.drop_column("llm_provider", "api_key")
+
+    op.add_column("llm_provider", sa.Column("api_key", sa.String()))
+    op.add_column(
+        "credential",
+        sa.Column("credential_json", postgresql.JSONB(astext_type=sa.Text())),
+    )
+
+    op.execute("DELETE FROM key_value_store WHERE value IS NULL")
+    op.alter_column("key_value_store", "value", nullable=False)
+    op.drop_column("key_value_store", "encrypted_value")
--- a/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
+++ b/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py
@@ -0,0 +1,28 @@
+"""add ccpair deletion failure message
+
+Revision ID: 0ebb1d516877
+Revises: 52a219fb5233
+Create Date: 2024-09-10 15:03:48.233926
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "0ebb1d516877"
+down_revision = "52a219fb5233"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column("deletion_failure_message", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "deletion_failure_message")
--- a/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
+++ b/backend/alembic/versions/0f7ff6d75b57_add_index_to_index_attempt_time_created.py
@@ -0,0 +1,37 @@
+"""add index to index_attempt.time_created
+
+Revision ID: 0f7ff6d75b57
+Revises: 369644546676
+Create Date: 2025-01-10 14:01:14.067144
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "0f7ff6d75b57"
+down_revision = "fec3db967bf7"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        op.f("ix_index_attempt_status"),
+        "index_attempt",
+        ["status"],
+        unique=False,
+    )
+
+    op.create_index(
+        op.f("ix_index_attempt_time_created"),
+        "index_attempt",
+        ["time_created"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_index_attempt_time_created"), table_name="index_attempt")
+
+    op.drop_index(op.f("ix_index_attempt_status"), table_name="index_attempt")
--- a/backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py
+++ b/backend/alembic/versions/15326fcec57e_introduce_onyx_apis.py
@@ -0,0 +1,38 @@
+"""Introduce Onyx APIs
+
+Revision ID: 15326fcec57e
+Revises: 77d07dffae64
+Create Date: 2023-11-11 20:51:24.228999
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+from onyx.configs.constants import DocumentSource
+
+# revision identifiers, used by Alembic.
+revision = "15326fcec57e"
+down_revision = "77d07dffae64"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column("credential", "is_admin", new_column_name="admin_public")
+    op.add_column(
+        "document",
+        sa.Column("from_ingestion_api", sa.Boolean(), nullable=True),
+    )
+    op.alter_column(
+        "connector",
+        "source",
+        type_=sa.String(length=50),
+        existing_type=sa.Enum(DocumentSource, native_enum=False),
+        existing_nullable=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("document", "from_ingestion_api")
+    op.alter_column("credential", "admin_public", new_column_name="is_admin")
--- a/backend/alembic/versions/173cae5bba26_port_config_store.py
+++ b/backend/alembic/versions/173cae5bba26_port_config_store.py
@@ -0,0 +1,30 @@
+"""Port Config Store
+
+Revision ID: 173cae5bba26
+Revises: e50154680a5c
+Create Date: 2024-03-19 15:30:44.425436
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "173cae5bba26"
+down_revision = "e50154680a5c"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "key_value_store",
+        sa.Column("key", sa.String(), nullable=False),
+        sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+        sa.PrimaryKeyConstraint("key"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("key_value_store")
--- a/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
+++ b/backend/alembic/versions/177de57c21c9_display_custom_llm_models.py
@@ -0,0 +1,60 @@
+"""display custom llm models
+
+Revision ID: 177de57c21c9
+Revises: 4ee1287bd26a
+Create Date: 2024-11-21 11:49:04.488677
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy import and_
+
+revision = "177de57c21c9"
+down_revision = "4ee1287bd26a"
+branch_labels = None
+depends_on = None
+depends_on = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    llm_provider = sa.table(
+        "llm_provider",
+        sa.column("id", sa.Integer),
+        sa.column("provider", sa.String),
+        sa.column("model_names", postgresql.ARRAY(sa.String)),
+        sa.column("display_model_names", postgresql.ARRAY(sa.String)),
+    )
+
+    excluded_providers = ["openai", "bedrock", "anthropic", "azure"]
+
+    providers_to_update = sa.select(
+        llm_provider.c.id,
+        llm_provider.c.model_names,
+        llm_provider.c.display_model_names,
+    ).where(
+        and_(
+            ~llm_provider.c.provider.in_(excluded_providers),
+            llm_provider.c.model_names.isnot(None),
+        )
+    )
+
+    results = conn.execute(providers_to_update).fetchall()
+
+    for provider_id, model_names, display_model_names in results:
+        if display_model_names is None:
+            display_model_names = []
+
+        combined_model_names = list(set(display_model_names + model_names))
+        update_stmt = (
+            llm_provider.update()
+            .where(llm_provider.c.id == provider_id)
+            .values(display_model_names=combined_model_names)
+        )
+        conn.execute(update_stmt)
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -0,0 +1,28 @@
+"""Add indexes to document__tag
+
+Revision ID: 1a03d2c2856b
+Revises: 9c00a2bccb83
+Create Date: 2025-02-18 10:45:13.957807
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "1a03d2c2856b"
+down_revision = "9c00a2bccb83"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        op.f("ix_document__tag_tag_id"),
+        "document__tag",
+        ["tag_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")
--- a/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py
+++ b/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py
@@ -0,0 +1,27 @@
+"""add additional data to notifications
+
+Revision ID: 1b10e1fda030
+Revises: 6756efa39ada
+Create Date: 2024-10-15 19:26:44.071259
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "1b10e1fda030"
+down_revision = "6756efa39ada"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "notification", sa.Column("additional_data", postgresql.JSONB(), nullable=True)
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("notification", "additional_data")
--- a/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
+++ b/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py
@@ -0,0 +1,103 @@
+"""add_user_delete_cascades
+
+Revision ID: 1b8206b29c5d
+Revises: 35e6853a51d5
+Create Date: 2024-09-18 11:48:59.418726
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "1b8206b29c5d"
+down_revision = "35e6853a51d5"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
+    op.create_foreign_key(
+        "credential_user_id_fkey",
+        "credential",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
+    op.create_foreign_key(
+        "chat_session_user_id_fkey",
+        "chat_session",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
+    op.create_foreign_key(
+        "chat_folder_user_id_fkey",
+        "chat_folder",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
+    op.create_foreign_key(
+        "prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"], ondelete="CASCADE"
+    )
+
+    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
+    op.create_foreign_key(
+        "notification_user_id_fkey",
+        "notification",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
+    op.create_foreign_key(
+        "inputprompt_user_id_fkey",
+        "inputprompt",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey")
+    op.create_foreign_key(
+        "credential_user_id_fkey", "credential", "user", ["user_id"], ["id"]
+    )
+
+    op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey")
+    op.create_foreign_key(
+        "chat_session_user_id_fkey", "chat_session", "user", ["user_id"], ["id"]
+    )
+
+    op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey")
+    op.create_foreign_key(
+        "chat_folder_user_id_fkey", "chat_folder", "user", ["user_id"], ["id"]
+    )
+
+    op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey")
+    op.create_foreign_key("prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"])
+
+    op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey")
+    op.create_foreign_key(
+        "notification_user_id_fkey", "notification", "user", ["user_id"], ["id"]
+    )
+
+    op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey")
+    op.create_foreign_key(
+        "inputprompt_user_id_fkey", "inputprompt", "user", ["user_id"], ["id"]
+    )
--- a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
+++ b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py
@@ -0,0 +1,135 @@
+"""embedding model -> search settings
+
+Revision ID: 1f60f60c3401
+Revises: f17bf3b0d9f1
+Create Date: 2024-08-25 12:39:51.731632
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
+
+# revision identifiers, used by Alembic.
+revision = "1f60f60c3401"
+down_revision = "f17bf3b0d9f1"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.drop_constraint(
+        "index_attempt__embedding_model_fk", "index_attempt", type_="foreignkey"
+    )
+    # Rename the table
+    op.rename_table("embedding_model", "search_settings")
+
+    # Add new columns
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "multipass_indexing", sa.Boolean(), nullable=False, server_default="false"
+        ),
+    )
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "multilingual_expansion",
+            postgresql.ARRAY(sa.String()),
+            nullable=False,
+            server_default="{}",
+        ),
+    )
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "disable_rerank_for_streaming",
+            sa.Boolean(),
+            nullable=False,
+            server_default="false",
+        ),
+    )
+    op.add_column(
+        "search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
+    )
+    op.add_column(
+        "search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
+    )
+    op.add_column(
+        "search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
+    )
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "num_rerank",
+            sa.Integer(),
+            nullable=False,
+            server_default=str(NUM_POSTPROCESSED_RESULTS),
+        ),
+    )
+
+    # Add the new column as nullable initially
+    op.add_column(
+        "index_attempt", sa.Column("search_settings_id", sa.Integer(), nullable=True)
+    )
+
+    # Populate the new column with data from the existing embedding_model_id
+    op.execute("UPDATE index_attempt SET search_settings_id = embedding_model_id")
+
+    # Create the foreign key constraint
+    op.create_foreign_key(
+        "fk_index_attempt_search_settings",
+        "index_attempt",
+        "search_settings",
+        ["search_settings_id"],
+        ["id"],
+    )
+
+    # Make the new column non-nullable
+    op.alter_column("index_attempt", "search_settings_id", nullable=False)
+
+    # Drop the old embedding_model_id column
+    op.drop_column("index_attempt", "embedding_model_id")
+
+
+def downgrade() -> None:
+    # Add back the embedding_model_id column
+    op.add_column(
+        "index_attempt", sa.Column("embedding_model_id", sa.Integer(), nullable=True)
+    )
+
+    # Populate the old column with data from search_settings_id
+    op.execute("UPDATE index_attempt SET embedding_model_id = search_settings_id")
+
+    # Make the old column non-nullable
+    op.alter_column("index_attempt", "embedding_model_id", nullable=False)
+
+    # Drop the foreign key constraint
+    op.drop_constraint(
+        "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey"
+    )
+
+    # Drop the new search_settings_id column
+    op.drop_column("index_attempt", "search_settings_id")
+
+    # Rename the table back
+    op.rename_table("search_settings", "embedding_model")
+
+    # Remove added columns
+    op.drop_column("embedding_model", "num_rerank")
+    op.drop_column("embedding_model", "rerank_api_key")
+    op.drop_column("embedding_model", "rerank_provider_type")
+    op.drop_column("embedding_model", "rerank_model_name")
+    op.drop_column("embedding_model", "disable_rerank_for_streaming")
+    op.drop_column("embedding_model", "multilingual_expansion")
+    op.drop_column("embedding_model", "multipass_indexing")
+
+    op.create_foreign_key(
+        "index_attempt__embedding_model_fk",
+        "index_attempt",
+        "embedding_model",
+        ["embedding_model_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/213fd978c6d8_notifications.py
+++ b/backend/alembic/versions/213fd978c6d8_notifications.py
@@ -0,0 +1,45 @@
+"""notifications
+
+Revision ID: 213fd978c6d8
+Revises: 5fc1f54cc252
+Create Date: 2024-08-10 11:13:36.070790
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "213fd978c6d8"
+down_revision = "5fc1f54cc252"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "notification",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "notif_type",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column(
+            "user_id",
+            sa.UUID(),
+            nullable=True,
+        ),
+        sa.Column("dismissed", sa.Boolean(), nullable=False),
+        sa.Column("last_shown", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("first_shown", sa.DateTime(timezone=True), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("notification")
--- a/backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py
+++ b/backend/alembic/versions/238b84885828_add_foreign_key_to_user__external_user_.py
@@ -0,0 +1,45 @@
+"""Add foreign key to user__external_user_group_id
+
+Revision ID: 238b84885828
+Revises: a7688ab35c45
+Create Date: 2025-05-19 17:15:33.424584
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "238b84885828"
+down_revision = "a7688ab35c45"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # First, clean up any entries that don't have a valid cc_pair_id
+    op.execute(
+        """
+        DELETE FROM user__external_user_group_id
+        WHERE cc_pair_id NOT IN (SELECT id FROM connector_credential_pair)
+        """
+    )
+
+    # Add foreign key constraint with cascade delete
+    op.create_foreign_key(
+        "fk_user__external_user_group_id_cc_pair_id",
+        "user__external_user_group_id",
+        "connector_credential_pair",
+        ["cc_pair_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    # Drop the foreign key constraint
+    op.drop_constraint(
+        "fk_user__external_user_group_id_cc_pair_id",
+        "user__external_user_group_id",
+        type_="foreignkey",
+    )
--- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
+++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
@@ -0,0 +1,87 @@
+"""remove-feedback-foreignkey-constraint
+
+Revision ID: 23957775e5f5
+Revises: bc9771dccadf
+Create Date: 2024-06-27 16:04:51.480437
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "23957775e5f5"
+down_revision = "bc9771dccadf"
+branch_labels = None  # type: ignore
+depends_on = None  # type: ignore
+
+
+def upgrade() -> None:
+    op.drop_constraint(
+        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_feedback__chat_message_fk",
+        "chat_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+    op.alter_column(
+        "chat_feedback", "chat_message_id", existing_type=sa.Integer(), nullable=True
+    )
+    op.drop_constraint(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        type_="foreignkey",
+    )
+    op.create_foreign_key(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+    op.alter_column(
+        "document_retrieval_feedback",
+        "chat_message_id",
+        existing_type=sa.Integer(),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "chat_feedback", "chat_message_id", existing_type=sa.Integer(), nullable=False
+    )
+    op.drop_constraint(
+        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_feedback__chat_message_fk",
+        "chat_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
+
+    op.alter_column(
+        "document_retrieval_feedback",
+        "chat_message_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+    op.drop_constraint(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        type_="foreignkey",
+    )
+    op.create_foreign_key(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/2666d766cb9b_google_oauth2.py
+++ b/backend/alembic/versions/2666d766cb9b_google_oauth2.py
@@ -0,0 +1,56 @@
+"""Google OAuth2
+
+Revision ID: 2666d766cb9b
+Revises: 6d387b3196c2
+Create Date: 2023-05-05 15:49:35.716016
+
+"""
+
+import fastapi_users_db_sqlalchemy
+import sqlalchemy as sa
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "2666d766cb9b"
+down_revision = "6d387b3196c2"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "oauth_account",
+        sa.Column("id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=False,
+        ),
+        sa.Column("oauth_name", sa.String(length=100), nullable=False),
+        sa.Column("access_token", sa.String(length=1024), nullable=False),
+        sa.Column("expires_at", sa.Integer(), nullable=True),
+        sa.Column("refresh_token", sa.String(length=1024), nullable=True),
+        sa.Column("account_id", sa.String(length=320), nullable=False),
+        sa.Column("account_email", sa.String(length=320), nullable=False),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="cascade"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_oauth_account_account_id"),
+        "oauth_account",
+        ["account_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_oauth_account_oauth_name"),
+        "oauth_account",
+        ["oauth_name"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_oauth_account_oauth_name"), table_name="oauth_account")
+    op.drop_index(op.f("ix_oauth_account_account_id"), table_name="oauth_account")
+    op.drop_table("oauth_account")
--- a/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
+++ b/backend/alembic/versions/26b931506ecb_default_chosen_assistants_to_none.py
@@ -0,0 +1,69 @@
+"""default chosen assistants to none
+
+Revision ID: 26b931506ecb
+Revises: 2daa494a0851
+Create Date: 2024-11-12 13:23:29.858995
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "26b931506ecb"
+down_revision = "2daa494a0851"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "user", sa.Column("chosen_assistants_new", postgresql.JSONB(), nullable=True)
+    )
+
+    op.execute(
+        """
+    UPDATE "user"
+    SET chosen_assistants_new =
+        CASE
+            WHEN chosen_assistants = '[-2, -1, 0]' THEN NULL
+            ELSE chosen_assistants
+        END
+    """
+    )
+
+    op.drop_column("user", "chosen_assistants")
+
+    op.alter_column(
+        "user", "chosen_assistants_new", new_column_name="chosen_assistants"
+    )
+
+
+def downgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "chosen_assistants_old",
+            postgresql.JSONB(),
+            nullable=False,
+            server_default="[-2, -1, 0]",
+        ),
+    )
+
+    op.execute(
+        """
+    UPDATE "user"
+    SET chosen_assistants_old =
+        CASE
+            WHEN chosen_assistants IS NULL THEN '[-2, -1, 0]'::jsonb
+            ELSE chosen_assistants
+        END
+    """
+    )
+
+    op.drop_column("user", "chosen_assistants")
+
+    op.alter_column(
+        "user", "chosen_assistants_old", new_column_name="chosen_assistants"
+    )
--- a/backend/alembic/versions/27c6ecc08586_permission_framework.py
+++ b/backend/alembic/versions/27c6ecc08586_permission_framework.py
@@ -0,0 +1,190 @@
+"""Permission Framework
+
+Revision ID: 27c6ecc08586
+Revises: 2666d766cb9b
+Create Date: 2023-05-24 18:45:17.244495
+
+"""
+
+import fastapi_users_db_sqlalchemy
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "27c6ecc08586"
+down_revision = "2666d766cb9b"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.execute("TRUNCATE TABLE index_attempt")
+    op.create_table(
+        "connector",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column(
+            "source",
+            sa.Enum(
+                "SLACK",
+                "WEB",
+                "GOOGLE_DRIVE",
+                "GITHUB",
+                "CONFLUENCE",
+                name="documentsource",
+                native_enum=False,
+            ),
+            nullable=False,
+        ),
+        sa.Column(
+            "input_type",
+            sa.Enum(
+                "LOAD_STATE",
+                "POLL",
+                "EVENT",
+                name="inputtype",
+                native_enum=False,
+            ),
+            nullable=True,
+        ),
+        sa.Column(
+            "connector_specific_config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+        ),
+        sa.Column("refresh_freq", sa.Integer(), nullable=True),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("disabled", sa.Boolean(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "credential",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "credential_json",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+        ),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("public_doc", sa.Boolean(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "connector_credential_pair",
+        sa.Column("connector_id", sa.Integer(), nullable=False),
+        sa.Column("credential_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["connector_id"],
+            ["connector.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["credential_id"],
+            ["credential.id"],
+        ),
+        sa.PrimaryKeyConstraint("connector_id", "credential_id"),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("connector_id", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("credential_id", sa.Integer(), nullable=True),
+    )
+    op.create_foreign_key(
+        "fk_index_attempt_credential_id",
+        "index_attempt",
+        "credential",
+        ["credential_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "fk_index_attempt_connector_id",
+        "index_attempt",
+        "connector",
+        ["connector_id"],
+        ["id"],
+    )
+    op.drop_column("index_attempt", "connector_specific_config")
+    op.drop_column("index_attempt", "source")
+    op.drop_column("index_attempt", "input_type")
+
+
+def downgrade() -> None:
+    op.execute("TRUNCATE TABLE index_attempt")
+    op.add_column(
+        "index_attempt",
+        sa.Column("input_type", sa.VARCHAR(), autoincrement=False, nullable=False),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("source", sa.VARCHAR(), autoincrement=False, nullable=False),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column(
+            "connector_specific_config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            autoincrement=False,
+            nullable=False,
+        ),
+    )
+
+    # Check if the constraint exists before dropping
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    constraints = inspector.get_foreign_keys("index_attempt")
+
+    if any(
+        constraint["name"] == "fk_index_attempt_credential_id"
+        for constraint in constraints
+    ):
+        op.drop_constraint(
+            "fk_index_attempt_credential_id", "index_attempt", type_="foreignkey"
+        )
+
+    if any(
+        constraint["name"] == "fk_index_attempt_connector_id"
+        for constraint in constraints
+    ):
+        op.drop_constraint(
+            "fk_index_attempt_connector_id", "index_attempt", type_="foreignkey"
+        )
+
+    op.drop_column("index_attempt", "credential_id")
+    op.drop_column("index_attempt", "connector_id")
+    op.drop_table("connector_credential_pair")
+    op.drop_table("credential")
+    op.drop_table("connector")
--- a/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
+++ b/backend/alembic/versions/2955778aa44c_add_chunk_count_to_document.py
@@ -0,0 +1,25 @@
+"""add chunk count to document
+
+Revision ID: 2955778aa44c
+Revises: c0aab6edb6dd
+Create Date: 2025-01-04 11:39:43.268612
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2955778aa44c"
+down_revision = "c0aab6edb6dd"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column("document", sa.Column("chunk_count", sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("document", "chunk_count")
--- a/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
+++ b/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
@@ -0,0 +1,33 @@
+"""set built in to default
+
+Revision ID: 2cdeff6d8c93
+Revises: f5437cc136c5
+Create Date: 2025-02-11 14:57:51.308775
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "2cdeff6d8c93"
+down_revision = "f5437cc136c5"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Prior to this migration / point in the codebase history,
+    # built in personas were implicitly treated as default personas (with no option to change this)
+    # This migration makes that explicit
+    op.execute(
+        """
+        UPDATE persona
+        SET is_default_persona = TRUE
+        WHERE builtin_persona = TRUE
+    """
+    )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py
+++ b/backend/alembic/versions/2d2304e27d8c_add_above_below_to_persona.py
@@ -0,0 +1,33 @@
+"""Add Above Below to Persona
+
+Revision ID: 2d2304e27d8c
+Revises: 4b08d97e175a
+Create Date: 2024-08-21 19:15:15.762948
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "2d2304e27d8c"
+down_revision = "4b08d97e175a"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("persona", sa.Column("chunks_above", sa.Integer(), nullable=True))
+    op.add_column("persona", sa.Column("chunks_below", sa.Integer(), nullable=True))
+
+    op.execute(
+        "UPDATE persona SET chunks_above = 1, chunks_below = 1 WHERE chunks_above IS NULL AND chunks_below IS NULL"
+    )
+
+    op.alter_column("persona", "chunks_above", nullable=False)
+    op.alter_column("persona", "chunks_below", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "chunks_below")
+    op.drop_column("persona", "chunks_above")
--- a/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
+++ b/backend/alembic/versions/2daa494a0851_add_group_sync_time.py
@@ -0,0 +1,31 @@
+"""add-group-sync-time
+
+Revision ID: 2daa494a0851
+Revises: c0fd6e4da83a
+Create Date: 2024-11-11 10:57:22.991157
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "2daa494a0851"
+down_revision = "c0fd6e4da83a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "last_time_external_group_sync",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "last_time_external_group_sync")
--- a/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
+++ b/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
@@ -0,0 +1,37 @@
+"""add chat session specific temperature override
+
+Revision ID: 2f80c6a2550f
+Revises: 33ea50e88f24
+Create Date: 2025-01-31 10:30:27.289646
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2f80c6a2550f"
+down_revision = "33ea50e88f24"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session", sa.Column("temperature_override", sa.Float(), nullable=True)
+    )
+    op.add_column(
+        "user",
+        sa.Column(
+            "temperature_override_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.false(),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "temperature_override")
+    op.drop_column("user", "temperature_override_enabled")
--- a/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
+++ b/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
@@ -0,0 +1,38 @@
+"""Persona Datetime Aware
+
+Revision ID: 30c1d5744104
+Revises: 7f99be1cb9f5
+Create Date: 2023-10-16 23:21:01.283424
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "30c1d5744104"
+down_revision = "7f99be1cb9f5"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("persona", sa.Column("datetime_aware", sa.Boolean(), nullable=True))
+    op.execute("UPDATE persona SET datetime_aware = TRUE")
+    op.alter_column("persona", "datetime_aware", nullable=False)
+    op.create_index(
+        "_default_persona_name_idx",
+        "persona",
+        ["name"],
+        unique=True,
+        postgresql_where=sa.text("default_persona = true"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "_default_persona_name_idx",
+        table_name="persona",
+        postgresql_where=sa.text("default_persona = true"),
+    )
+    op.drop_column("persona", "datetime_aware")
--- a/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py
+++ b/backend/alembic/versions/325975216eb3_add_icon_color_and_icon_shape_to_persona.py
@@ -0,0 +1,71 @@
+"""Add icon_color and icon_shape to Persona
+
+Revision ID: 325975216eb3
+Revises: 91ffac7e65b3
+Create Date: 2024-07-24 21:29:31.784562
+
+"""
+
+import random
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.sql import table, column, select
+
+# revision identifiers, used by Alembic.
+revision = "325975216eb3"
+down_revision = "91ffac7e65b3"
+branch_labels: None = None
+depends_on: None = None
+
+
+colorOptions = [
+    "#FF6FBF",
+    "#6FB1FF",
+    "#B76FFF",
+    "#FFB56F",
+    "#6FFF8D",
+    "#FF6F6F",
+    "#6FFFFF",
+]
+
+
+# Function to generate a random shape ensuring at least 3 of the middle 4 squares are filled
+def generate_random_shape() -> int:
+    center_squares = [12, 10, 6, 14, 13, 11, 7, 15]
+    center_fill = random.choice(center_squares)
+    remaining_squares = [i for i in range(16) if not (center_fill & (1 << i))]
+    random.shuffle(remaining_squares)
+    for i in range(10 - bin(center_fill).count("1")):
+        center_fill |= 1 << remaining_squares[i]
+    return center_fill
+
+
+def upgrade() -> None:
+    op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
+    op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))
+    op.add_column("persona", sa.Column("uploaded_image_id", sa.String(), nullable=True))
+
+    persona = table(
+        "persona",
+        column("id", sa.Integer),
+        column("icon_color", sa.String),
+        column("icon_shape", sa.Integer),
+    )
+
+    conn = op.get_bind()
+    personas = conn.execute(select(persona.c.id))
+
+    for persona_id in personas:
+        random_color = random.choice(colorOptions)
+        random_shape = generate_random_shape()
+        conn.execute(
+            persona.update()
+            .where(persona.c.id == persona_id[0])
+            .values(icon_color=random_color, icon_shape=random_shape)
+        )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "icon_shape")
+    op.drop_column("persona", "uploaded_image_id")
+    op.drop_column("persona", "icon_color")
--- a/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
+++ b/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py
@@ -0,0 +1,51 @@
+"""single tool call per message
+
+Revision ID: 33cb72ea4d80
+Revises: 5b29123cd710
+Create Date: 2024-11-01 12:51:01.535003
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "33cb72ea4d80"
+down_revision = "5b29123cd710"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Step 1: Delete extraneous ToolCall entries
+    # Keep only the ToolCall with the smallest 'id' for each 'message_id'
+    op.execute(
+        sa.text(
+            """
+            DELETE FROM tool_call
+            WHERE id NOT IN (
+                SELECT MIN(id)
+                FROM tool_call
+                WHERE message_id IS NOT NULL
+                GROUP BY message_id
+            );
+        """
+        )
+    )
+
+    # Step 2: Add a unique constraint on message_id
+    op.create_unique_constraint(
+        constraint_name="uq_tool_call_message_id",
+        table_name="tool_call",
+        columns=["message_id"],
+    )
+
+
+def downgrade() -> None:
+    # Step 1: Drop the unique constraint on message_id
+    op.drop_constraint(
+        constraint_name="uq_tool_call_message_id",
+        table_name="tool_call",
+        type_="unique",
+    )
--- a/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
+++ b/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
@@ -0,0 +1,81 @@
+"""foreign key input prompts
+
+Revision ID: 33ea50e88f24
+Revises: a6df6b88ef81
+Create Date: 2025-01-29 10:54:22.141765
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "33ea50e88f24"
+down_revision = "a6df6b88ef81"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Safely drop constraints if exists
+    op.execute(
+        """
+        ALTER TABLE inputprompt__user
+        DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey
+        """
+    )
+    op.execute(
+        """
+        ALTER TABLE inputprompt__user
+        DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey
+        """
+    )
+
+    # Recreate with ON DELETE CASCADE
+    op.create_foreign_key(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        "inputprompt",
+        ["input_prompt_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.create_foreign_key(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    # Drop the new FKs with ondelete
+    op.drop_constraint(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        type_="foreignkey",
+    )
+
+    # Recreate them without cascading
+    op.create_foreign_key(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        "inputprompt",
+        ["input_prompt_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        "user",
+        ["user_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/351faebd379d_add_curator_fields.py
+++ b/backend/alembic/versions/351faebd379d_add_curator_fields.py
@@ -0,0 +1,91 @@
+"""Add curator fields
+
+Revision ID: 351faebd379d
+Revises: ee3f4b47fad5
+Create Date: 2024-08-15 22:37:08.397052
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "351faebd379d"
+down_revision = "ee3f4b47fad5"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    # Add is_curator column to User__UserGroup table
+    op.add_column(
+        "user__user_group",
+        sa.Column("is_curator", sa.Boolean(), nullable=False, server_default="false"),
+    )
+
+    # Use batch mode to modify the enum type
+    with op.batch_alter_table("user", schema=None) as batch_op:
+        batch_op.alter_column(  # type: ignore[attr-defined]
+            "role",
+            type_=sa.Enum(
+                "BASIC",
+                "ADMIN",
+                "CURATOR",
+                "GLOBAL_CURATOR",
+                name="userrole",
+                native_enum=False,
+            ),
+            existing_type=sa.Enum("BASIC", "ADMIN", name="userrole", native_enum=False),
+            existing_nullable=False,
+        )
+    # Create the association table
+    op.create_table(
+        "credential__user_group",
+        sa.Column("credential_id", sa.Integer(), nullable=False),
+        sa.Column("user_group_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["credential_id"],
+            ["credential.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_group_id"],
+            ["user_group.id"],
+        ),
+        sa.PrimaryKeyConstraint("credential_id", "user_group_id"),
+    )
+    op.add_column(
+        "credential",
+        sa.Column(
+            "curator_public", sa.Boolean(), nullable=False, server_default="false"
+        ),
+    )
+
+
+def downgrade() -> None:
+    # Update existing records to ensure they fit within the BASIC/ADMIN roles
+    op.execute(
+        "UPDATE \"user\" SET role = 'ADMIN' WHERE role IN ('CURATOR', 'GLOBAL_CURATOR')"
+    )
+
+    # Remove is_curator column from User__UserGroup table
+    op.drop_column("user__user_group", "is_curator")
+
+    with op.batch_alter_table("user", schema=None) as batch_op:
+        batch_op.alter_column(  # type: ignore[attr-defined]
+            "role",
+            type_=sa.Enum(
+                "BASIC", "ADMIN", name="userrole", native_enum=False, length=20
+            ),
+            existing_type=sa.Enum(
+                "BASIC",
+                "ADMIN",
+                "CURATOR",
+                "GLOBAL_CURATOR",
+                name="userrole",
+                native_enum=False,
+            ),
+            existing_nullable=False,
+        )
+    # Drop the association table
+    op.drop_table("credential__user_group")
+    op.drop_column("credential", "curator_public")
--- a/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
+++ b/backend/alembic/versions/35e518e0ddf4_properly_cascade.py
@@ -0,0 +1,122 @@
+"""properly_cascade
+
+Revision ID: 35e518e0ddf4
+Revises: 91a0a4d62b14
+Create Date: 2024-09-20 21:24:04.891018
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "35e518e0ddf4"
+down_revision = "91a0a4d62b14"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Update chat_message foreign key constraint
+    op.drop_constraint(
+        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_message_chat_session_id_fkey",
+        "chat_message",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Update chat_message__search_doc foreign key constraints
+    op.drop_constraint(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+
+    op.create_foreign_key(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        "search_doc",
+        ["search_doc_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Add CASCADE delete for tool_call foreign key
+    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
+    op.create_foreign_key(
+        "tool_call_message_id_fkey",
+        "tool_call",
+        "chat_message",
+        ["message_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    # Revert chat_message foreign key constraint
+    op.drop_constraint(
+        "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "chat_message_chat_session_id_fkey",
+        "chat_message",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+    )
+
+    # Revert chat_message__search_doc foreign key constraints
+    op.drop_constraint(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+
+    op.create_foreign_key(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "chat_message__search_doc_search_doc_id_fkey",
+        "chat_message__search_doc",
+        "search_doc",
+        ["search_doc_id"],
+        ["id"],
+    )
+
+    # Revert tool_call foreign key constraint
+    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
+    op.create_foreign_key(
+        "tool_call_message_id_fkey",
+        "tool_call",
+        "chat_message",
+        ["message_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
+++ b/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py
@@ -0,0 +1,65 @@
+"""server default chosen assistants
+
+Revision ID: 35e6853a51d5
+Revises: c99d76fcd298
+Create Date: 2024-09-13 13:20:32.885317
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "35e6853a51d5"
+down_revision = "c99d76fcd298"
+branch_labels = None
+depends_on = None
+
+DEFAULT_ASSISTANTS = [-2, -1, 0]
+
+
+def upgrade() -> None:
+    # Step 1: Update any NULL values to the default value
+    # This upgrades existing users without ordered assistant
+    # to have default assistants set to visible assistants which are
+    # accessible by them.
+    op.execute(
+        """
+        UPDATE "user" u
+        SET chosen_assistants = (
+            SELECT jsonb_agg(
+                p.id ORDER BY
+                    COALESCE(p.display_priority, 2147483647) ASC,
+                    p.id ASC
+            )
+            FROM persona p
+            LEFT JOIN persona__user pu ON p.id = pu.persona_id AND pu.user_id = u.id
+            WHERE p.is_visible = true
+            AND (p.is_public = true OR pu.user_id IS NOT NULL)
+        )
+        WHERE chosen_assistants IS NULL
+        OR chosen_assistants = 'null'
+        OR jsonb_typeof(chosen_assistants) = 'null'
+        OR (jsonb_typeof(chosen_assistants) = 'string' AND chosen_assistants = '"null"')
+    """
+    )
+
+    # Step 2: Alter the column to make it non-nullable
+    op.alter_column(
+        "user",
+        "chosen_assistants",
+        type_=postgresql.JSONB(astext_type=sa.Text()),
+        nullable=False,
+        server_default=sa.text(f"'{DEFAULT_ASSISTANTS}'::jsonb"),
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "user",
+        "chosen_assistants",
+        type_=postgresql.JSONB(astext_type=sa.Text()),
+        nullable=True,
+        server_default=None,
+    )
--- a/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
+++ b/backend/alembic/versions/369644546676_add_composite_index_for_index_attempt_.py
@@ -0,0 +1,36 @@
+"""add composite index for index attempt time updated
+
+Revision ID: 369644546676
+Revises: 2955778aa44c
+Create Date: 2025-01-08 15:38:17.224380
+
+"""
+
+from alembic import op
+from sqlalchemy import text
+
+# revision identifiers, used by Alembic.
+revision = "369644546676"
+down_revision = "2955778aa44c"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        "ix_index_attempt_ccpair_search_settings_time_updated",
+        "index_attempt",
+        [
+            "connector_credential_pair_id",
+            "search_settings_id",
+            text("time_updated DESC"),
+        ],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_index_attempt_ccpair_search_settings_time_updated",
+        table_name="index_attempt",
+    )
--- a/backend/alembic/versions/36e9220ab794_update_kg_trigger_functions.py
+++ b/backend/alembic/versions/36e9220ab794_update_kg_trigger_functions.py
@@ -0,0 +1,136 @@
+"""update_kg_trigger_functions
+
+Revision ID: 36e9220ab794
+Revises: c9e2cd766c29
+Create Date: 2025-06-22 17:33:25.833733
+
+"""
+
+from alembic import op
+from sqlalchemy.orm import Session
+from sqlalchemy import text
+from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE
+
+# revision identifiers, used by Alembic.
+revision = "36e9220ab794"
+down_revision = "c9e2cd766c29"
+branch_labels = None
+depends_on = None
+
+
+def _get_tenant_contextvar(session: Session) -> str:
+    """Get the current schema for the migration"""
+    current_tenant = session.execute(text("SELECT current_schema()")).scalar()
+    if isinstance(current_tenant, str):
+        return current_tenant
+    else:
+        raise ValueError("Current tenant is not a string")
+
+
+def upgrade() -> None:
+
+    bind = op.get_bind()
+    session = Session(bind=bind)
+
+    # Create kg_entity trigger to update kg_entity.name and its trigrams
+    tenant_id = _get_tenant_contextvar(session)
+    alphanum_pattern = r"[^a-z0-9]+"
+    truncate_length = 1000
+    function = "update_kg_entity_name"
+    op.execute(
+        text(
+            f"""
+            CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
+            RETURNS TRIGGER AS $$
+            DECLARE
+                name text;
+                cleaned_name text;
+            BEGIN
+                -- Set name to semantic_id if document_id is not NULL
+                IF NEW.document_id IS NOT NULL THEN
+                    SELECT lower(semantic_id) INTO name
+                    FROM "{tenant_id}".document
+                    WHERE id = NEW.document_id;
+                ELSE
+                    name = lower(NEW.name);
+                END IF;
+
+                -- Clean name and truncate if too long
+                cleaned_name = regexp_replace(
+                    name,
+                    '{alphanum_pattern}', '', 'g'
+                );
+                IF length(cleaned_name) > {truncate_length} THEN
+                    cleaned_name = left(cleaned_name, {truncate_length});
+                END IF;
+
+                -- Set name and name trigrams
+                NEW.name = name;
+                NEW.name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name);
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+            """
+        )
+    )
+    trigger = f"{function}_trigger"
+    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".kg_entity')
+    op.execute(
+        f"""
+        CREATE TRIGGER {trigger}
+            BEFORE INSERT OR UPDATE OF name
+            ON "{tenant_id}".kg_entity
+            FOR EACH ROW
+            EXECUTE FUNCTION "{tenant_id}".{function}();
+        """
+    )
+
+    # Create kg_entity trigger to update kg_entity.name and its trigrams
+    function = "update_kg_entity_name_from_doc"
+    op.execute(
+        text(
+            f"""
+            CREATE OR REPLACE FUNCTION "{tenant_id}".{function}()
+            RETURNS TRIGGER AS $$
+            DECLARE
+                doc_name text;
+                cleaned_name text;
+            BEGIN
+                doc_name = lower(NEW.semantic_id);
+
+                -- Clean name and truncate if too long
+                cleaned_name = regexp_replace(
+                    doc_name,
+                    '{alphanum_pattern}', '', 'g'
+                );
+                IF length(cleaned_name) > {truncate_length} THEN
+                    cleaned_name = left(cleaned_name, {truncate_length});
+                END IF;
+
+                -- Set name and name trigrams for all entities referencing this document
+                UPDATE "{tenant_id}".kg_entity
+                SET
+                    name = doc_name,
+                    name_trigrams = {POSTGRES_DEFAULT_SCHEMA_STANDARD_VALUE}.show_trgm(cleaned_name)
+                WHERE document_id = NEW.id;
+                RETURN NEW;
+            END;
+            $$ LANGUAGE plpgsql;
+            """
+        )
+    )
+    trigger = f"{function}_trigger"
+    op.execute(f'DROP TRIGGER IF EXISTS {trigger} ON "{tenant_id}".document')
+    op.execute(
+        f"""
+        CREATE TRIGGER {trigger}
+            AFTER UPDATE OF semantic_id
+            ON "{tenant_id}".document
+            FOR EACH ROW
+            EXECUTE FUNCTION "{tenant_id}".{function}();
+        """
+    )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
+++ b/backend/alembic/versions/3781a5eb12cb_add_chunk_stats_table.py
@@ -0,0 +1,52 @@
+"""add chunk stats table
+
+Revision ID: 3781a5eb12cb
+Revises: df46c75b714e
+Create Date: 2025-03-10 10:02:30.586666
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "3781a5eb12cb"
+down_revision = "df46c75b714e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "chunk_stats",
+        sa.Column("id", sa.String(), primary_key=True, index=True),
+        sa.Column(
+            "document_id",
+            sa.String(),
+            sa.ForeignKey("document.id"),
+            nullable=False,
+            index=True,
+        ),
+        sa.Column("chunk_in_doc_id", sa.Integer(), nullable=False),
+        sa.Column("information_content_boost", sa.Float(), nullable=True),
+        sa.Column(
+            "last_modified",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            index=True,
+            server_default=sa.func.now(),
+        ),
+        sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True, index=True),
+        sa.UniqueConstraint(
+            "document_id", "chunk_in_doc_id", name="uq_chunk_stats_doc_chunk"
+        ),
+    )
+
+    op.create_index(
+        "ix_chunk_sync_status", "chunk_stats", ["last_modified", "last_synced"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_chunk_sync_status", table_name="chunk_stats")
+    op.drop_table("chunk_stats")
--- a/backend/alembic/versions/3879338f8ba1_add_tool_table.py
+++ b/backend/alembic/versions/3879338f8ba1_add_tool_table.py
@@ -0,0 +1,46 @@
+"""Add tool table
+
+Revision ID: 3879338f8ba1
+Revises: f1c6478c3fd8
+Create Date: 2024-05-11 16:11:23.718084
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "3879338f8ba1"
+down_revision = "f1c6478c3fd8"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tool",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("in_code_tool_id", sa.String(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "persona__tool",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("tool_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["tool_id"],
+            ["tool.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "tool_id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("persona__tool")
+    op.drop_table("tool")
--- a/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
+++ b/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
@@ -0,0 +1,42 @@
+"""Add chat session sharing
+
+Revision ID: 38eda64af7fe
+Revises: 776b3bbe9092
+Create Date: 2024-03-27 19:41:29.073594
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "38eda64af7fe"
+down_revision = "776b3bbe9092"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session",
+        sa.Column(
+            "shared_status",
+            sa.Enum(
+                "PUBLIC",
+                "PRIVATE",
+                name="chatsessionsharedstatus",
+                native_enum=False,
+            ),
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE chat_session SET shared_status='PRIVATE'")
+    op.alter_column(
+        "chat_session",
+        "shared_status",
+        nullable=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "shared_status")
--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -0,0 +1,126 @@
+"""Update GitHub connector repo_name to repositories
+
+Revision ID: 3934b1bc7b62
+Revises: b7c2b63c4a03
+Create Date: 2025-03-05 10:50:30.516962
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+import json
+import logging
+
+# revision identifiers, used by Alembic.
+revision = "3934b1bc7b62"
+down_revision = "b7c2b63c4a03"
+branch_labels = None
+depends_on = None
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+
+def upgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    # First get all GitHub connectors
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    # Update each connector's config
+    updated_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                logger.warning(f"Connector {connector_id} has no config, skipping")
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repo_name" not in config:
+                continue
+
+            # Create new config with repositories instead of repo_name
+            new_config = dict(config)
+            repo_name_value = new_config.pop("repo_name")
+            new_config["repositories"] = repo_name_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
+            )
+            updated_count += 1
+        except Exception as e:
+            logger.error(f"Error updating connector {connector_id}: {str(e)}")
+
+
+def downgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    logger.debug(
+        "Starting rollback of GitHub connectors from repositories to repo_name"
+    )
+
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
+
+    # Revert each GitHub connector to use repo_name instead of repositories
+    reverted_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repositories" not in config:
+                continue
+
+            # Create new config with repo_name instead of repositories
+            new_config = dict(config)
+            repositories_value = new_config.pop("repositories")
+            new_config["repo_name"] = repositories_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"new_config": json.dumps(new_config), "connector_id": connector_id},
+            )
+            reverted_count += 1
+        except Exception as e:
+            logger.error(f"Error reverting connector {connector_id}: {str(e)}")
--- a/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py
+++ b/backend/alembic/versions/3a7802814195_add_alternate_assistant_to_chat_message.py
@@ -0,0 +1,35 @@
+"""add alternate assistant to chat message
+
+Revision ID: 3a7802814195
+Revises: 23957775e5f5
+Create Date: 2024-06-05 11:18:49.966333
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "3a7802814195"
+down_revision = "23957775e5f5"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
+    )
+    op.create_foreign_key(
+        "fk_chat_message_persona",
+        "chat_message",
+        "persona",
+        ["alternate_assistant_id"],
+        ["id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("fk_chat_message_persona", "chat_message", type_="foreignkey")
+    op.drop_column("chat_message", "alternate_assistant_id")
--- a/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
+++ b/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
@@ -0,0 +1,50 @@
+"""Move is_public to cc_pair
+
+Revision ID: 3b25685ff73c
+Revises: e0a68a81d434
+Create Date: 2023-10-05 18:47:09.582849
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "3b25685ff73c"
+down_revision = "e0a68a81d434"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column("is_public", sa.Boolean(), nullable=True),
+    )
+    # fill in is_public for existing rows
+    op.execute(
+        "UPDATE connector_credential_pair SET is_public = true WHERE is_public IS NULL"
+    )
+    op.alter_column("connector_credential_pair", "is_public", nullable=False)
+
+    op.add_column(
+        "credential",
+        sa.Column("is_admin", sa.Boolean(), nullable=True),
+    )
+    op.execute("UPDATE credential SET is_admin = true WHERE is_admin IS NULL")
+    op.alter_column("credential", "is_admin", nullable=False)
+
+    op.drop_column("credential", "public_doc")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "credential",
+        sa.Column("public_doc", sa.Boolean(), nullable=True),
+    )
+    # setting public_doc to false for all existing rows to be safe
+    # NOTE: this is likely not the correct state of the world but it's the best we can do
+    op.execute("UPDATE credential SET public_doc = false WHERE public_doc IS NULL")
+    op.alter_column("credential", "public_doc", nullable=False)
+    op.drop_column("connector_credential_pair", "is_public")
+    op.drop_column("credential", "is_admin")
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -0,0 +1,82 @@
+"""improved index
+
+Revision ID: 3bd4c84fe72f
+Revises: 8f43500ee275
+Create Date: 2025-02-26 13:07:56.217791
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "3bd4c84fe72f"
+down_revision = "8f43500ee275"
+branch_labels = None
+depends_on = None
+
+
+# NOTE:
+# This migration addresses issues with the previous migration (8f43500ee275) which caused
+# an outage by creating an index without using CONCURRENTLY. This migration:
+#
+# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes
+# 2. Adds indexes to both chat_message and chat_session tables for comprehensive search
+# 3. Note: CONCURRENTLY was removed due to operational issues
+
+
+def upgrade() -> None:
+    # First, drop any existing indexes to avoid conflicts
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
+    op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
+
+    # Drop existing columns if they exist
+    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
+    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")
+
+    # Create a GIN index for full-text search on chat_message.message
+    op.execute(
+        """
+        ALTER TABLE chat_message
+        ADD COLUMN message_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chat_message_tsv
+        ON chat_message
+        USING GIN (message_tsv)
+        """
+    )
+
+    # Also add a stored tsvector column for chat_session.description
+    op.execute(
+        """
+        ALTER TABLE chat_session
+        ADD COLUMN description_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chat_session_desc_tsv
+        ON chat_session
+        USING GIN (description_tsv)
+        """
+    )
+
+
+def downgrade() -> None:
+    # Drop the indexes first
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_tsv;")
+    op.execute("DROP INDEX IF EXISTS idx_chat_session_desc_tsv;")
+
+    # Then drop the columns
+    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
+    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")
+
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
--- a/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
+++ b/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
@@ -0,0 +1,53 @@
+"""Polling Document Count
+
+Revision ID: 3c5e35aa9af0
+Revises: 27c6ecc08586
+Create Date: 2023-06-14 23:45:51.760440
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "3c5e35aa9af0"
+down_revision = "27c6ecc08586"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "last_successful_index_time",
+            sa.DateTime(timezone=True),
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column(
+            "last_attempt_status",
+            sa.Enum(
+                "NOT_STARTED",
+                "IN_PROGRESS",
+                "SUCCESS",
+                "FAILED",
+                name="indexingstatus",
+                native_enum=False,
+            ),
+            nullable=False,
+        ),
+    )
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column("total_docs_indexed", sa.Integer(), nullable=False),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("connector_credential_pair", "total_docs_indexed")
+    op.drop_column("connector_credential_pair", "last_attempt_status")
+    op.drop_column("connector_credential_pair", "last_successful_index_time")
--- a/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
+++ b/backend/alembic/versions/3c6531f32351_add_back_input_prompts.py
@@ -0,0 +1,60 @@
+"""add back input prompts
+
+Revision ID: 3c6531f32351
+Revises: aeda5f2df4f6
+Create Date: 2025-01-13 12:49:51.705235
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+# revision identifiers, used by Alembic.
+revision = "3c6531f32351"
+down_revision = "aeda5f2df4f6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "inputprompt",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("prompt", sa.String(), nullable=False),
+        sa.Column("content", sa.String(), nullable=False),
+        sa.Column("active", sa.Boolean(), nullable=False),
+        sa.Column("is_public", sa.Boolean(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "inputprompt__user",
+        sa.Column("input_prompt_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
+        ),
+        sa.Column("disabled", sa.Boolean(), nullable=False, default=False),
+        sa.ForeignKeyConstraint(
+            ["input_prompt_id"],
+            ["inputprompt.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("input_prompt_id", "user_id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("inputprompt__user")
+    op.drop_table("inputprompt")
--- a/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
+++ b/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
@@ -0,0 +1,50 @@
+"""Add tables for UI-based LLM configuration
+
+Revision ID: 401c1ac29467
+Revises: 703313b75876
+Create Date: 2024-04-13 18:07:29.153817
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "401c1ac29467"
+down_revision = "703313b75876"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_provider",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("api_key", sa.String(), nullable=True),
+        sa.Column("api_base", sa.String(), nullable=True),
+        sa.Column("api_version", sa.String(), nullable=True),
+        sa.Column(
+            "custom_config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+        sa.Column("default_model_name", sa.String(), nullable=False),
+        sa.Column("fast_default_model_name", sa.String(), nullable=True),
+        sa.Column("is_default_provider", sa.Boolean(), unique=True, nullable=True),
+        sa.Column("model_names", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("name"),
+    )
+
+    op.add_column(
+        "persona",
+        sa.Column("llm_model_provider_override", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "llm_model_provider_override")
+
+    op.drop_table("llm_provider")
--- a/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
+++ b/backend/alembic/versions/43cbbb3f5e6a_rename_index_origin_to_index_recursively.py
@@ -0,0 +1,43 @@
+"""Rename index_origin to index_recursively
+
+Revision ID: 1d6ad76d1f37
+Revises: e1392f05e840
+Create Date: 2024-08-01 12:38:54.466081
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "1d6ad76d1f37"
+down_revision = "e1392f05e840"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        UPDATE connector
+        SET connector_specific_config = jsonb_set(
+            connector_specific_config,
+            '{index_recursively}',
+            'true'::jsonb
+        ) - 'index_origin'
+        WHERE connector_specific_config ? 'index_origin'
+    """
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        """
+        UPDATE connector
+        SET connector_specific_config = jsonb_set(
+            connector_specific_config,
+            '{index_origin}',
+            connector_specific_config->'index_recursively'
+        ) - 'index_recursively'
+        WHERE connector_specific_config ? 'index_recursively'
+    """
+    )
--- a/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py
+++ b/backend/alembic/versions/44f856ae2a4a_add_cloud_embedding_model.py
@@ -0,0 +1,66 @@
+"""add cloud embedding model and update embedding_model
+
+Revision ID: 44f856ae2a4a
+Revises: d716b0791ddd
+Create Date: 2024-06-28 20:01:05.927647
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "44f856ae2a4a"
+down_revision = "d716b0791ddd"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    # Create embedding_provider table
+    op.create_table(
+        "embedding_provider",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("api_key", sa.LargeBinary(), nullable=True),
+        sa.Column("default_model_id", sa.Integer(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("name"),
+    )
+
+    # Add cloud_provider_id to embedding_model table
+    op.add_column(
+        "embedding_model", sa.Column("cloud_provider_id", sa.Integer(), nullable=True)
+    )
+
+    # Add foreign key constraints
+    op.create_foreign_key(
+        "fk_embedding_model_cloud_provider",
+        "embedding_model",
+        "embedding_provider",
+        ["cloud_provider_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "fk_embedding_provider_default_model",
+        "embedding_provider",
+        "embedding_model",
+        ["default_model_id"],
+        ["id"],
+    )
+
+
+def downgrade() -> None:
+    # Remove foreign key constraints
+    op.drop_constraint(
+        "fk_embedding_model_cloud_provider", "embedding_model", type_="foreignkey"
+    )
+    op.drop_constraint(
+        "fk_embedding_provider_default_model", "embedding_provider", type_="foreignkey"
+    )
+
+    # Remove cloud_provider_id column
+    op.drop_column("embedding_model", "cloud_provider_id")
+
+    # Drop embedding_provider table
+    op.drop_table("embedding_provider")
--- a/backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py
+++ b/backend/alembic/versions/4505fd7302e1_added_is_internet_to_dbdoc.py
@@ -0,0 +1,24 @@
+"""added is_internet to DBDoc
+
+Revision ID: 4505fd7302e1
+Revises: c18cdf4b497e
+Create Date: 2024-06-18 20:46:09.095034
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "4505fd7302e1"
+down_revision = "c18cdf4b497e"
+
+
+def upgrade() -> None:
+    op.add_column("search_doc", sa.Column("is_internet", sa.Boolean(), nullable=True))
+    op.add_column("tool", sa.Column("display_name", sa.String(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("tool", "display_name")
+    op.drop_column("search_doc", "is_internet")
--- a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
+++ b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
@@ -0,0 +1,25 @@
+"""Larger Access Tokens for OAUTH
+
+Revision ID: 465f78d9b7f9
+Revises: 3c5e35aa9af0
+Create Date: 2023-07-18 17:33:40.365034
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "465f78d9b7f9"
+down_revision = "3c5e35aa9af0"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column("oauth_account", "access_token", type_=sa.Text())
+
+
+def downgrade() -> None:
+    op.alter_column("oauth_account", "access_token", type_=sa.String(length=1024))
--- a/backend/alembic/versions/46625e4745d4_remove_native_enum.py
+++ b/backend/alembic/versions/46625e4745d4_remove_native_enum.py
@@ -0,0 +1,32 @@
+"""Remove Native Enum
+
+Revision ID: 46625e4745d4
+Revises: 9d97fecfab7f
+Create Date: 2023-10-27 11:38:33.803145
+
+"""
+
+from alembic import op
+from sqlalchemy import String
+
+# revision identifiers, used by Alembic.
+revision = "46625e4745d4"
+down_revision = "9d97fecfab7f"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    # At this point, we directly changed some previous migrations,
+    # https://github.com/onyx-dot-app/onyx/pull/637
+    # Due to using Postgres native Enums, it caused some complications for first time users.
+    # To remove those complications, all Enums are only handled application side moving forward.
+    # This migration exists to ensure that existing users don't run into upgrade issues.
+    op.alter_column("index_attempt", "status", type_=String)
+    op.alter_column("connector_credential_pair", "last_attempt_status", type_=String)
+    op.execute("DROP TYPE IF EXISTS indexingstatus")
+
+
+def downgrade() -> None:
+    # We don't want Native Enums, do nothing
+    pass
--- a/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
+++ b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py
@@ -0,0 +1,47 @@
+"""fix_user__external_user_group_id_fk
+
+Revision ID: 46b7a812670f
+Revises: f32615f71aeb
+Create Date: 2024-09-23 12:58:03.894038
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "46b7a812670f"
+down_revision = "f32615f71aeb"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Drop the existing primary key
+    op.drop_constraint(
+        "user__external_user_group_id_pkey",
+        "user__external_user_group_id",
+        type_="primary",
+    )
+
+    # Add the new composite primary key
+    op.create_primary_key(
+        "user__external_user_group_id_pkey",
+        "user__external_user_group_id",
+        ["user_id", "external_user_group_id", "cc_pair_id"],
+    )
+
+
+def downgrade() -> None:
+    # Drop the composite primary key
+    op.drop_constraint(
+        "user__external_user_group_id_pkey",
+        "user__external_user_group_id",
+        type_="primary",
+    )
+    # Delete all entries from the table
+    op.execute("DELETE FROM user__external_user_group_id")
+
+    # Recreate the original primary key on user_id
+    op.create_primary_key(
+        "user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"]
+    )
--- a/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
+++ b/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
@@ -0,0 +1,29 @@
+"""PG File Store
+
+Revision ID: 4738e4b3bae1
+Revises: e91df4e935ef
+Create Date: 2024-03-20 18:53:32.461518
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "4738e4b3bae1"
+down_revision = "e91df4e935ef"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "file_store",
+        sa.Column("file_name", sa.String(), nullable=False),
+        sa.Column("lobj_oid", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("file_name"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("file_store")
--- a/backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py
+++ b/backend/alembic/versions/473a1a7ca408_add_display_model_names_to_llm_provider.py
@@ -0,0 +1,49 @@
+"""Add display_model_names to llm_provider
+
+Revision ID: 473a1a7ca408
+Revises: 325975216eb3
+Create Date: 2024-07-25 14:31:02.002917
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "473a1a7ca408"
+down_revision = "325975216eb3"
+branch_labels: None = None
+depends_on: None = None
+
+default_models_by_provider = {
+    "openai": ["gpt-4", "gpt-4o", "gpt-4o-mini"],
+    "bedrock": [
+        "meta.llama3-1-70b-instruct-v1:0",
+        "meta.llama3-1-8b-instruct-v1:0",
+        "anthropic.claude-3-opus-20240229-v1:0",
+        "mistral.mistral-large-2402-v1:0",
+        "anthropic.claude-3-5-sonnet-20240620-v1:0",
+    ],
+    "anthropic": ["claude-3-opus-20240229", "claude-3-5-sonnet-20240620"],
+}
+
+
+def upgrade() -> None:
+    op.add_column(
+        "llm_provider",
+        sa.Column("display_model_names", postgresql.ARRAY(sa.String()), nullable=True),
+    )
+
+    connection = op.get_bind()
+    for provider, models in default_models_by_provider.items():
+        connection.execute(
+            sa.text(
+                "UPDATE llm_provider SET display_model_names = :models WHERE provider = :provider"
+            ),
+            {"models": models, "provider": provider},
+        )
+
+
+def downgrade() -> None:
+    op.drop_column("llm_provider", "display_model_names")
--- a/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
+++ b/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
@@ -0,0 +1,74 @@
+"""Create IndexAttempt table
+
+Revision ID: 47433d30de82
+Revises:
+Create Date: 2023-05-04 00:55:32.971991
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "47433d30de82"
+down_revision: None = None
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "index_attempt",
+        sa.Column("id", sa.Integer(), nullable=False),
+        # String type since python enum will change often
+        sa.Column(
+            "source",
+            sa.String(),
+            nullable=False,
+        ),
+        # String type to easily accomodate new ways of pulling
+        # in documents
+        sa.Column(
+            "input_type",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column(
+            "connector_specific_config",
+            postgresql.JSONB(),
+            nullable=False,
+        ),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=True,
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            server_onupdate=sa.text("now()"),  # type: ignore
+            nullable=True,
+        ),
+        sa.Column(
+            "status",
+            sa.Enum(
+                "NOT_STARTED",
+                "IN_PROGRESS",
+                "SUCCESS",
+                "FAILED",
+                name="indexingstatus",
+                native_enum=False,
+            ),
+            nullable=False,
+        ),
+        sa.Column("document_ids", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.Column("error_msg", sa.String(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("index_attempt")
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`backend/tests/integration/tests/pruning/website`