Move web Dockerfile to single stage since multi-stage has been causing issues w/ github actions

Upgrade FE packages
Fix document set editor refresh
2026-02-18 16:25:45 +00:00 · 2024-05-17 00:34:18 -07:00 · 2024-05-16 22:13:02 -07:00 · 2024-05-16 14:42:28 -07:00 · 2024-05-14 22:51:28 -07:00 · 2024-05-14 17:44:33 -07:00
753 changed files with 57241 additions and 15059 deletions
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -1,4 +1,4 @@
-name: Build and Push Backend Images on Tagging
+name: Build and Push Backend Image on Tag

 on:
  push:
@@ -32,3 +32,13 @@ jobs:
        tags: |
          danswer/danswer-backend:${{ github.ref_name }}
          danswer/danswer-backend:latest
+        build-args: |
+          DANSWER_VERSION=${{ github.ref_name }}
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@master
+      with:
+        # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend
+        image-ref: docker.io/danswer/danswer-backend:${{ github.ref_name }}
+        severity: 'CRITICAL,HIGH'
+        trivyignores: ./backend/.trivyignore
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -0,0 +1,42 @@
+name: Build and Push Model Server Image on Tag
+
+on:
+  push:
+    tags:
+      - '*'
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v1
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+
+    - name: Model Server Image Docker Build and Push
+      uses: docker/build-push-action@v2
+      with:
+        context: ./backend
+        file: ./backend/Dockerfile.model_server
+        platforms: linux/amd64,linux/arm64
+        push: true
+        tags: |
+          danswer/danswer-model-server:${{ github.ref_name }}
+          danswer/danswer-model-server:latest
+        build-args: |
+          DANSWER_VERSION=${{ github.ref_name }}
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@master
+      with:
+        image-ref: docker.io/danswer/danswer-model-server:${{ github.ref_name }}
+        severity: 'CRITICAL,HIGH'
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -1,4 +1,4 @@
-name: Build and Push Web Images on Tagging
+name: Build and Push Web Image on Tag

 on:
  push:
@@ -32,3 +32,11 @@ jobs:
        tags: |
          danswer/danswer-web-server:${{ github.ref_name }}
          danswer/danswer-web-server:latest
+        build-args: |
+          DANSWER_VERSION=${{ github.ref_name }}
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@master
+      with:
+        image-ref: docker.io/danswer/danswer-web-server:${{ github.ref_name }}
+        severity: 'CRITICAL,HIGH'
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -20,10 +20,12 @@ jobs:
        cache-dependency-path: |
          backend/requirements/default.txt
          backend/requirements/dev.txt
+          backend/requirements/model_server.txt
    - run: |
        python -m pip install --upgrade pip
        pip install -r backend/requirements/default.txt
        pip install -r backend/requirements/dev.txt
+        pip install -r backend/requirements/model_server.txt

    - name: Run MyPy
      run: |
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -0,0 +1,35 @@
+name: Python Unit Tests
+
+on:
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  backend-check:
+    runs-on: ubuntu-latest
+
+    env:
+      PYTHONPATH: ./backend
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.11'
+        cache: 'pip'
+        cache-dependency-path: |
+          backend/requirements/default.txt
+          backend/requirements/dev.txt
+
+    - name: Install Dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r backend/requirements/default.txt
+        pip install -r backend/requirements/dev.txt
+
+    - name: Run Tests
+      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+      run: py.test -o junit_family=xunit2 -xv --ff backend/tests/unit
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -0,0 +1,21 @@
+name: Quality Checks PR
+concurrency:
+  group: Quality-Checks-PR-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request: null
+
+jobs:
+  quality-checks:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+    - uses: pre-commit/action@v3.0.0
+      with:
+        extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }}
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
 .env
 .DS_store
-.venv
+.venv
+.mypy_cache
+.idea
+/deployment/data/nginx/app.conf
+.vscode/launch.json
--- a/backend/.pre-commit-config.yaml
+++ b/backend/.pre-commit-config.yaml
@@ -28,6 +28,13 @@ repos:
    rev: v0.0.286
    hooks:
      - id: ruff
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.1.0
+    hooks:
+    - id: prettier
+      types_or: [html, css, javascript, ts, tsx]
+      additional_dependencies:
+      - prettier

  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -11,62 +11,6 @@
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
-        {
-            "name": "API Server",
-            "type": "python",
-            "request": "launch",
-            "module": "uvicorn",
-            "cwd": "${workspaceFolder}/backend",
-            "env": {
-                "LOG_LEVEL": "DEBUG",
-                "DISABLE_AUTH": "True",
-                "TYPESENSE_API_KEY": "typesense_api_key",
-                "DYNAMIC_CONFIG_DIR_PATH": "./dynamic_config_storage"
-            },
-            "args": [
-                "danswer.main:app",
-                "--reload",
-                "--port",
-                "8080"
-            ]
-        },
-        {
-            "name": "Indexer",
-            "type": "python",
-            "request": "launch",
-            "program": "danswer/background/update.py",
-            "cwd": "${workspaceFolder}/backend",
-            "env": {
-                "LOG_LEVEL": "DEBUG",
-                "PYTHONPATH": ".",
-                "TYPESENSE_API_KEY": "typesense_api_key",
-                "DYNAMIC_CONFIG_DIR_PATH": "./dynamic_config_storage"
-            }
-        },
-        {
-            "name": "Temp File Deletion",
-            "type": "python",
-            "request": "launch",
-            "program": "danswer/background/file_deletion.py",
-            "cwd": "${workspaceFolder}/backend",
-            "env": {
-                "LOG_LEVEL": "DEBUG",
-                "PYTHONPATH": "${workspaceFolder}/backend"
-            }
-        },
-        // For the listner to access the Slack API,
-        // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
-        {
-            "name": "Slack Bot Listener",
-            "type": "python",
-            "request": "launch",
-            "program": "danswer/listeners/slack_listener.py",
-            "cwd": "${workspaceFolder}/backend",
-            "envFile": "${workspaceFolder}/.env",
-            "env": {
-                "LOG_LEVEL": "DEBUG"
-            }
-        },
        {
            "name": "Web Server",
            "type": "node",
@@ -77,6 +21,85 @@
                "run", "dev"
            ],
            "console": "integratedTerminal"
+        },
+        {
+            "name": "Model Server",
+            "type": "python",
+            "request": "launch",
+            "module": "uvicorn",
+            "cwd": "${workspaceFolder}/backend",
+            "env": {
+                "LOG_LEVEL": "DEBUG",
+                "PYTHONUNBUFFERED": "1"
+            },
+            "args": [
+                "model_server.main:app",
+                "--reload",
+                "--port",
+                "9000"
+            ]
+        },
+        {
+            "name": "API Server",
+            "type": "python",
+            "request": "launch",
+            "module": "uvicorn",
+            "cwd": "${workspaceFolder}/backend",
+            "env": {
+                "LOG_ALL_MODEL_INTERACTIONS": "True",
+                "LOG_LEVEL": "DEBUG",
+                "PYTHONUNBUFFERED": "1"
+            },
+            "args": [
+                "danswer.main:app",
+                "--reload",
+                "--port",
+                "8080"
+            ]
+        },
+        {
+            "name": "Indexing",
+            "type": "python",
+            "request": "launch",
+            "program": "danswer/background/update.py",
+            "cwd": "${workspaceFolder}/backend",
+            "env": {
+                "ENABLE_MINI_CHUNK": "false",
+                "LOG_LEVEL": "DEBUG",
+                "PYTHONUNBUFFERED": "1",
+                "PYTHONPATH": "."
+            }
+        },
+        // Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev
+        {
+            "name": "Background Jobs",
+            "type": "python",
+            "request": "launch",
+            "program": "scripts/dev_run_background_jobs.py",
+            "cwd": "${workspaceFolder}/backend",
+            "env": {
+                "LOG_LEVEL": "DEBUG",
+                "PYTHONUNBUFFERED": "1",
+                "PYTHONPATH": "."
+            },
+            "args": [
+                "--no-indexing"
+            ]
+        },
+        // For the listner to access the Slack API,
+        // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
+        {
+            "name": "Slack Bot",
+            "type": "python",
+            "request": "launch",
+            "program": "danswer/danswerbot/slack/listener.py",
+            "cwd": "${workspaceFolder}/backend",
+            "envFile": "${workspaceFolder}/.env",
+            "env": {
+                "LOG_LEVEL": "DEBUG",
+                "PYTHONUNBUFFERED": "1",
+                "PYTHONPATH": "."
+            }
        }
    ]
 }
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,3 +1,5 @@
+<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md"} -->
+
 # Contributing to Danswer
 Hey there! We are so excited that you're interested in Danswer.

@@ -20,7 +22,7 @@ Your input is vital to making sure that Danswer moves in the right direction.
 Before starting on implementation, please raise a GitHub issue.

 And always feel free to message us (Chris Weaver / Yuhong Sun) on 
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-1u3h3ke3b-VGh1idW19R8oiNRiKBYv2w) / 
+[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ) / 
 [Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all. 


@@ -38,7 +40,7 @@ Our goal is to make contributing as easy as possible. If you run into any issues
 That way we can help future contributors and users can avoid the same issue.

 We also have support channels and generally interesting discussions on our
-[Slack](https://join.slack.com/t/danswer/shared_invite/zt-1u3h3ke3b-VGh1idW19R8oiNRiKBYv2w)
+[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ)
 and 
 [Discord](https://discord.gg/TDJ59cGV2X).

@@ -56,9 +58,10 @@ development purposes but also feel free to just use the containers and update wi


 ### Local Set Up
-It is recommended to use Python versions >= 3.11.
+It is recommended to use Python version 3.11

-This guide skips setting up User Authentication for the purpose of simplicity
+If using a lower version, modifications will have to be made to the code.
+If using a higher version, the version of Tensorflow we use may not be available for your platform.


 #### Installing Requirements
@@ -69,15 +72,20 @@ For convenience here's a command for it:
 python -m venv .venv
 source .venv/bin/activate
 ```
-_For Windows activate via:_
+_For Windows, activate the virtual environment using Command Prompt:_
 ```bash
 .venv\Scripts\activate
 ```
+If using PowerShell, the command slightly differs:
+```powershell
+.venv\Scripts\Activate.ps1
+```

 Install the required python dependencies:
 ```bash
 pip install -r danswer/backend/requirements/default.txt
 pip install -r danswer/backend/requirements/dev.txt
+pip install -r danswer/backend/requirements/model_server.txt
 ```

 Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
@@ -86,7 +94,12 @@ Once the above is done, navigate to `danswer/web` run:
 npm i
 ```

-Install Playwright (required by the Web Connector), with the python venv active, run:
+Install Playwright (required by the Web Connector)
+
+> Note: If you have just done the pip install, open a new terminal and source the python virtual-env again.
+This will update the path to include playwright
+
+Then install Playwright by running:
 ```bash
 playwright install
 ```
@@ -100,26 +113,24 @@ docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational
 (index refers to Vespa and relational_db refers to Postgres)

 #### Running Danswer
-
-Setup a folder to store config. Navigate to `danswer/backend` and run:
-```bash
-mkdir dynamic_config_storage
-```
-
 To start the frontend, navigate to `danswer/web` and run:
 ```bash
 npm run dev
 ```

-Package the Vespa schema. This will only need to be done when the Vespa schema is updated locally.
-
-Nagivate to `danswer/backend/danswer/document_index/vespa/app_config` and run:
+Next, start the model server which runs the local NLP models.
+Navigate to `danswer/backend` and run:
 ```bash
-zip -r ../vespa-app.zip .
+uvicorn model_server.main:app --reload --port 9000
+```
+_For Windows (for compatibility with both PowerShell and Command Prompt):_
+```bash
+powershell -Command "
+    uvicorn model_server.main:app --reload --port 9000
+"
 ```
- Note: If you don't have the `zip` utility, you will need to install it prior to running the above

-The first time running Danswer, you will also need to run the DB migrations for Postgres.
+The first time running Danswer, you will need to run the DB migrations for Postgres.
 After the first time, this is no longer required unless the DB models change.

 Navigate to `danswer/backend` and with the venv active, run:
@@ -137,17 +148,12 @@ python ./scripts/dev_run_background_jobs.py

 To run the backend API server, navigate back to `danswer/backend` and run:
 ```bash
-AUTH_TYPE=disabled \
-DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage \
-VESPA_DEPLOYMENT_ZIP=./danswer/document_index/vespa/vespa-app.zip \
-uvicorn danswer.main:app --reload --port 8080
+AUTH_TYPE=disabled uvicorn danswer.main:app --reload --port 8080
 ```
 _For Windows (for compatibility with both PowerShell and Command Prompt):_
 ```bash
 powershell -Command "
    $env:AUTH_TYPE='disabled'
-    $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'
-    $env:VESPA_DEPLOYMENT_ZIP='./danswer/document_index/vespa/vespa-app.zip'
    uvicorn danswer.main:app --reload --port 8080 
 "
 ```
@@ -166,20 +172,16 @@ pre-commit install

 Additionally, we use `mypy` for static type checking.
 Danswer is fully type-annotated, and we would like to keep it that way! 
-Right now, there is no automated type checking at the moment (coming soon), but we ask you to manually run it before
-creating a pull requests with `python -m mypy .` from the `danswer/backend` directory.
+To run the mypy checks manually, run `python -m mypy .` from the `danswer/backend` directory.


 #### Web
 We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `danswer/web` directory. 
 To run the formatter, use `npx prettier --write .` from the `danswer/web` directory.
-Like `mypy`, we have no automated formatting yet (coming soon), but we request that, for now,
-you run this manually before creating a pull request.
+Please double check that prettier passes before creating a pull request.


 ### Release Process
 Danswer follows the semver versioning standard.
 A set of Docker containers will be pushed automatically to DockerHub with every tag.
 You can see the containers [here](https://hub.docker.com/search?q=danswer%2F).
-
-As pre-1.0 software, even patch releases may contain breaking or non-backwards-compatible changes.
--- a/README.md
+++ b/README.md
@@ -1,15 +1,17 @@
+<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/README.md"} -->
+
 <h2 align="center">
 <a href="https://www.danswer.ai/"> <img width="50%" src="https://github.com/danswer-owners/danswer/blob/1fabd9372d66cd54238847197c33f091a724803b/DanswerWithName.png?raw=true)" /></a>
 </h2>

 <p align="center">
-<p align="center">OpenSource Enterprise Question-Answering</p>
+<p align="center">Open Source Gen-AI Chat + Unified Search.</p>

 <p align="center">
 <a href="https://docs.danswer.dev/" target="_blank">
    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
 </a>
-<a href="https://join.slack.com/t/danswer/shared_invite/zt-1u5ycen3o-6SJbWfivLWP5LPyp_jftuw" target="_blank">
+<a href="https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ" target="_blank">
    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
 </a>
 <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
@@ -20,62 +22,88 @@
 </a>
 </p>

-<strong>[Danswer](https://www.danswer.ai/)</strong> allows you to ask natural language questions against internal documents and get back reliable answers backed by quotes and references from the source material so that you can always trust what you get back. You can connect to a number of common tools such as Slack, GitHub, Confluence, amongst others.
+<strong>[Danswer](https://www.danswer.ai/)</strong> is the AI Assistant connected to your company's docs, apps, and people. 
+Danswer provides a Chat interface and plugs into any LLM of your choice. Danswer can be deployed anywhere and for any 
+scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your 
+own control. Danswer is MIT licensed and designed to be modular and easily extensible. The system also comes fully ready 
+for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for 
+configuring Personas (AI Assistants) and their Prompts.
+
+Danswer also serves as a Unified Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
+By combining LLMs and team specific knowledge, Danswer becomes a subject matter expert for the team. Imagine ChatGPT if
+it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
+supported?" or "Where's the pull request for feature Y?"

 <h3>Usage</h3>

-Danswer provides a fully-featured web UI:
+Danswer Web App:
+
+https://github.com/danswer-ai/danswer/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410


-https://github.com/danswer-ai/danswer/assets/25087905/619607a1-4ad2-41a0-9728-351752acc26e
-
-
-Or, if you prefer, you can plug Danswer into your existing Slack workflows (more integrations to come 😁):
-
+Or, plug Danswer into your existing Slack workflows (more integrations to come 😁):

 https://github.com/danswer-ai/danswer/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b


-For more details on the admin controls, check out our <strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!
+For more details on the Admin UI to manage connectors and users, check out our 
+<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

-<h3>Deployment</h3>
+## Deployment

-Danswer can easily be tested locally or deployed on a virtual machine with a single `docker compose` command. Checkout our [docs](https://docs.danswer.dev/quickstart) to learn more.
+Danswer can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
+`docker compose` command. Checkout our [docs](https://docs.danswer.dev/quickstart) to learn more.

 We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/danswer-ai/danswer/tree/main/deployment/kubernetes).

-## 💃 Features 
-* Direct QA powered by Generative AI models with answers backed by quotes and source links.
-* Intelligent Document Retrieval (Semantic Search/Reranking) using the latest LLMs.
-* An AI Helper backed by a custom Deep Learning model to interpret user intent.
-* User authentication and document level access management.
-* Support for an LLM of your choice (GPT-4, Llama2, Orca, etc.)
-* Management Dashboard to manage connectors and set up features such as live update fetching.
-* One line Docker Compose (or Kubernetes) deployment to host Danswer anywhere.

-## 🔌 Connectors 
+## 💃 Main Features 
+* Chat UI with the ability to select documents to chat with.
+* Create custom AI Assistants with different prompts and backing knowledge sets.
+* Connect Danswer with LLM of your choice (self-host for a fully airgapped solution).
+* Document Search + AI Answers for natural language queries.
+* Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
+* Slack integration to get answers and search results directly in Slack.

-Danswer currently syncs documents (every 10 minutes) from:
+
+## 🚧 Roadmap
+* Chat/Prompt sharing with specific teammates and user groups.
+* Multi-Model model support, chat with images, video etc.
+* Choosing between LLMs and parameters during chat session.
+* Tool calling and agent configurations options.
+* Organizational understanding and ability to locate and suggest experts from your team.
+
+
+## Other Noteable Benefits of Danswer
+* User Authentication with document level access management.
+* Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
+* Admin Dashboard to configure connectors, document-sets, access, etc.
+* Custom deep learning models + learn from user feedback.
+* Easy deployment and ability to host Danswer anywhere of your choosing.
+
+
+## 🔌 Connectors
+Efficiently pulls the latest changes from:
  * Slack
  * GitHub
  * Google Drive
  * Confluence
  * Jira
+  * Zendesk
+  * Gmail
  * Notion
+  * Gong
  * Slab
  * Linear
  * Productboard
  * Guru
-  * Zulip
  * Bookstack
+  * Document360
+  * Sharepoint
+  * Hubspot
  * Local Files
  * Websites
-  * With more to come...
-
-## 🚧 Roadmap
-* Chat/Conversation support.
-* Organizational understanding.
-* Ability to locate and suggest experts.
+  * And more ...

 ## 💡 Contributing
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -0,0 +1,17 @@
+**/__pycache__
+venv/
+env/
+*.egg-info
+.cache
+.git/
+.svn/
+.vscode/
+.idea/
+*.log
+log/
+.env
+secrets.yaml
+build/
+dist/
+.coverage
+htmlcov/
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -1,4 +1,5 @@
 __pycache__/
+.mypy_cache
 .idea/
 site_crawls/
 .ipynb_checkpoints/
@@ -7,3 +8,4 @@ api_keys.py
 .env
 vespa-app.zip
 dynamic_config_storage/
+celerybeat-schedule*
--- a/backend/.trivyignore
+++ b/backend/.trivyignore
@@ -0,0 +1,46 @@
+# https://github.com/madler/zlib/issues/868
+# Pulled in with base Debian image, it's part of the contrib folder but unused
+# zlib1g is fine
+# Will be gone with Debian image upgrade
+# No impact in our settings
+CVE-2023-45853
+
+# krb5 related, worst case is denial of service by resource exhaustion
+# Accept the risk
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+CVE-2024-26458
+CVE-2024-26461
+CVE-2024-26462
+
+# Specific to Firefox which we do not use
+# No impact in our settings
+CVE-2024-0743
+
+# bind9 related, worst case is denial of service by CPU resource exhaustion
+# Accept the risk
+CVE-2023-50387
+CVE-2023-50868
+CVE-2023-50387
+CVE-2023-50868
+
+# libexpat1, XML parsing resource exhaustion
+# We don't parse any user provided XMLs
+# No impact in our settings
+CVE-2023-52425
+CVE-2024-28757
+
+# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
+# No impact in our settings
+CVE-2023-7104
+
+# libharfbuzz0b, O(n^2) growth, worst case is denial of service
+# Accept the risk
+CVE-2023-25193
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,10 +1,25 @@
-FROM python:3.11.4-slim-bookworm
+FROM python:3.11.7-slim-bookworm
+
+LABEL com.danswer.maintainer="founders@danswer.ai"
+LABEL com.danswer.description="This image is for the backend of Danswer. It is MIT Licensed and \
+free for all to use. You can find it at https://hub.docker.com/r/danswer/danswer-backend. For \
+more details, visit https://github.com/danswer-ai/danswer."
+
+# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
+ARG DANSWER_VERSION=0.3-dev
+ENV DANSWER_VERSION=${DANSWER_VERSION}
+RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"

 # Install system dependencies
+# cmake needed for psycopg (postgres)
+# libpq-dev needed for psycopg (postgres)
+# curl included just for users' convenience
+# zip for Vespa step futher down
+# ca-certificates for HTTPS
 RUN apt-get update && \
-    apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
-       libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
-       supervisor zip ca-certificates gnupg && \
+    apt-get install -y cmake curl zip ca-certificates libgnutls30=3.7.9-2+deb12u2 \
+    libblkid1=2.38.1-5+deb12u1 libmount1=2.38.1-5+deb12u1 libsmartcols1=2.38.1-5+deb12u1 \
+    libuuid1=2.38.1-5+deb12u1 && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

@@ -13,45 +28,36 @@ RUN apt-get update && \
 COPY ./requirements/default.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \
    pip uninstall -y py && \
-    playwright install chromium && \
-    playwright install-deps chromium
-
-# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
-# based on the instructions found here:
-# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
-# this is temporarily needed until playwright updates their packaged node version to
-# 20.5.1+
-RUN mkdir -p /etc/apt/keyrings && \
-    curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
-    echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
-    apt-get update && \
-    apt-get install -y nodejs && \
-    cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
-    apt-get remove -y nodejs
+    playwright install chromium && playwright install-deps chromium && \
+    ln -s /usr/local/bin/supervisord /usr/bin/supervisord

 # Cleanup for CVEs and size reduction
-# Remove tornado test key to placate vulnerability scanners
-# More details can be found here:
 # https://github.com/tornadoweb/tornado/issues/3107
-RUN apt-get remove -y linux-libc-dev && \
+# xserver-common and xvfb included by playwright installation but not needed after
+# perl-base is part of the base Python Debian image but not needed for Danswer functionality
+# perl-base could only be removed with --allow-remove-essential
+RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cmake \
+    libldap-2.5-0 libldap-2.5-0 && \
    apt-get autoremove -y && \
    rm -rf /var/lib/apt/lists/* && \
    rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key

+# Pre-downloading models for setups with limited egress
+RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')"
+
+# Pre-downloading NLTK for setups with limited egress
+RUN python -c "import nltk; \
+nltk.download('stopwords', quiet=True); \
+nltk.download('wordnet', quiet=True); \
+nltk.download('punkt', quiet=True);"
+
 # Set up application files
 WORKDIR /app
 COPY ./danswer /app/danswer
+COPY ./shared_configs /app/shared_configs
 COPY ./alembic /app/alembic
 COPY ./alembic.ini /app/alembic.ini
-COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
-
-# Create Vespa app zip
-WORKDIR /app/danswer/document_index/vespa/app_config
-RUN zip -r /app/danswer/vespa-app.zip .
-WORKDIR /app
-
-# TODO: remove this once all users have migrated
-COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py
+COPY supervisord.conf /usr/etc/supervisord.conf

 ENV PYTHONPATH /app

--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -0,0 +1,46 @@
+FROM python:3.11.7-slim-bookworm
+
+LABEL com.danswer.maintainer="founders@danswer.ai"
+LABEL com.danswer.description="This image is for the Danswer model server which runs all of the \
+AI models for Danswer. This container and all the code is MIT Licensed and free for all to use. \
+You can find it at https://hub.docker.com/r/danswer/danswer-model-server. For more details, \
+visit https://github.com/danswer-ai/danswer."
+
+# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
+ARG DANSWER_VERSION=0.3-dev
+ENV DANSWER_VERSION=${DANSWER_VERSION}
+RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
+
+COPY ./requirements/model_server.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
+
+RUN apt-get remove -y --allow-remove-essential perl-base && \
+    apt-get autoremove -y
+
+# Pre-downloading models for setups with limited egress
+RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \
+from huggingface_hub import snapshot_download; \
+AutoTokenizer.from_pretrained('danswer/intent-model'); \
+AutoTokenizer.from_pretrained('intfloat/e5-base-v2'); \
+AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
+snapshot_download('danswer/intent-model'); \
+snapshot_download('intfloat/e5-base-v2'); \
+snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')"
+
+WORKDIR /app
+
+# Utils used by model server
+COPY ./danswer/utils/logger.py /app/danswer/utils/logger.py
+
+# Place to fetch version information
+COPY ./danswer/__init__.py /app/danswer/__init__.py
+
+# Shared between Danswer Backend and Model Server
+COPY ./shared_configs /app/shared_configs
+
+# Model Server main code
+COPY ./model_server /app/model_server
+
+ENV PYTHONPATH /app
+
+CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -1,4 +1,8 @@
-Generic single-database configuration with an async dbapi.
+<!-- DANSWER_METADATA={"link": "https://github.com/danswer-ai/danswer/blob/main/backend/alembic/README.md"} -->
+
+# Alembic DB Migrations
+These files are for creating/updating the tables in the Relational DB (Postgres).
+Danswer migrations use a generic single-database configuration with an async dbapi.

 ## To generate new migrations: 
 run from danswer/backend:
@@ -7,7 +11,6 @@ run from danswer/backend:
 More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

 ## Running migrations
-
 To run all un-applied migrations:
 `alembic upgrade head`

--- a/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
+++ b/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py
@@ -0,0 +1,31 @@
+"""Add starter prompts
+
+Revision ID: 0a2b51deb0b8
+Revises: 5f4b8568a221
+Create Date: 2024-03-02 23:23:49.960309
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "0a2b51deb0b8"
+down_revision = "5f4b8568a221"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column(
+            "starter_messages",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "starter_messages")
--- a/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
+++ b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py
@@ -0,0 +1,113 @@
+"""Enable Encrypted Fields
+
+Revision ID: 0a98909f2757
+Revises: 570282d33c49
+Create Date: 2024-05-05 19:30:34.317972
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.sql import table
+from sqlalchemy.dialects import postgresql
+import json
+
+from danswer.utils.encryption import encrypt_string_to_bytes
+
+# revision identifiers, used by Alembic.
+revision = "0a98909f2757"
+down_revision = "570282d33c49"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    connection = op.get_bind()
+
+    op.alter_column("key_value_store", "value", nullable=True)
+    op.add_column(
+        "key_value_store",
+        sa.Column(
+            "encrypted_value",
+            sa.LargeBinary,
+            nullable=True,
+        ),
+    )
+
+    # Need a temporary column to translate the JSONB to binary
+    op.add_column("credential", sa.Column("temp_column", sa.LargeBinary()))
+
+    creds_table = table(
+        "credential",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "credential_json",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+        ),
+        sa.Column(
+            "temp_column",
+            sa.LargeBinary(),
+            nullable=False,
+        ),
+    )
+
+    results = connection.execute(sa.select(creds_table))
+
+    # This uses the MIT encrypt which does not actually encrypt the credentials
+    # In other words, this upgrade does not apply the encryption. Porting existing sensitive data
+    # and key rotation currently is not supported and will come out in the future
+    for row_id, creds, _ in results:
+        creds_binary = encrypt_string_to_bytes(json.dumps(creds))
+        connection.execute(
+            creds_table.update()
+            .where(creds_table.c.id == row_id)
+            .values(temp_column=creds_binary)
+        )
+
+    op.drop_column("credential", "credential_json")
+    op.alter_column("credential", "temp_column", new_column_name="credential_json")
+
+    op.add_column("llm_provider", sa.Column("temp_column", sa.LargeBinary()))
+
+    llm_table = table(
+        "llm_provider",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "api_key",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column(
+            "temp_column",
+            sa.LargeBinary(),
+            nullable=False,
+        ),
+    )
+    results = connection.execute(sa.select(llm_table))
+
+    for row_id, api_key, _ in results:
+        llm_key = encrypt_string_to_bytes(api_key)
+        connection.execute(
+            llm_table.update()
+            .where(llm_table.c.id == row_id)
+            .values(temp_column=llm_key)
+        )
+
+    op.drop_column("llm_provider", "api_key")
+    op.alter_column("llm_provider", "temp_column", new_column_name="api_key")
+
+
+def downgrade() -> None:
+    # Some information loss but this is ok. Should not allow decryption via downgrade.
+    op.drop_column("credential", "credential_json")
+    op.drop_column("llm_provider", "api_key")
+
+    op.add_column("llm_provider", sa.Column("api_key", sa.String()))
+    op.add_column(
+        "credential",
+        sa.Column("credential_json", postgresql.JSONB(astext_type=sa.Text())),
+    )
+
+    op.execute("DELETE FROM key_value_store WHERE value IS NULL")
+    op.alter_column("key_value_store", "value", nullable=False)
+    op.drop_column("key_value_store", "encrypted_value")
--- a/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py
+++ b/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py
@@ -0,0 +1,37 @@
+"""Introduce Danswer APIs
+
+Revision ID: 15326fcec57e
+Revises: 77d07dffae64
+Create Date: 2023-11-11 20:51:24.228999
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+from danswer.configs.constants import DocumentSource
+
+# revision identifiers, used by Alembic.
+revision = "15326fcec57e"
+down_revision = "77d07dffae64"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column("credential", "is_admin", new_column_name="admin_public")
+    op.add_column(
+        "document",
+        sa.Column("from_ingestion_api", sa.Boolean(), nullable=True),
+    )
+    op.alter_column(
+        "connector",
+        "source",
+        type_=sa.String(length=50),
+        existing_type=sa.Enum(DocumentSource, native_enum=False),
+        existing_nullable=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("document", "from_ingestion_api")
+    op.alter_column("credential", "admin_public", new_column_name="is_admin")
--- a/backend/alembic/versions/173cae5bba26_port_config_store.py
+++ b/backend/alembic/versions/173cae5bba26_port_config_store.py
@@ -0,0 +1,29 @@
+"""Port Config Store
+
+Revision ID: 173cae5bba26
+Revises: e50154680a5c
+Create Date: 2024-03-19 15:30:44.425436
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "173cae5bba26"
+down_revision = "e50154680a5c"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "key_value_store",
+        sa.Column("key", sa.String(), nullable=False),
+        sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+        sa.PrimaryKeyConstraint("key"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("key_value_store")
--- a/backend/alembic/versions/2666d766cb9b_google_oauth2.py
+++ b/backend/alembic/versions/2666d766cb9b_google_oauth2.py
@@ -13,8 +13,8 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision = "2666d766cb9b"
 down_revision = "6d387b3196c2"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/27c6ecc08586_permission_framework.py
+++ b/backend/alembic/versions/27c6ecc08586_permission_framework.py
@@ -13,8 +13,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "27c6ecc08586"
 down_revision = "2666d766cb9b"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
+++ b/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "30c1d5744104"
 down_revision = "7f99be1cb9f5"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/3879338f8ba1_add_tool_table.py
+++ b/backend/alembic/versions/3879338f8ba1_add_tool_table.py
@@ -0,0 +1,45 @@
+"""Add tool table
+
+Revision ID: 3879338f8ba1
+Revises: f1c6478c3fd8
+Create Date: 2024-05-11 16:11:23.718084
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "3879338f8ba1"
+down_revision = "f1c6478c3fd8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tool",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("description", sa.Text(), nullable=True),
+        sa.Column("in_code_tool_id", sa.String(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "persona__tool",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("tool_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["tool_id"],
+            ["tool.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "tool_id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("persona__tool")
+    op.drop_table("tool")
--- a/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
+++ b/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py
@@ -0,0 +1,41 @@
+"""Add chat session sharing
+
+Revision ID: 38eda64af7fe
+Revises: 776b3bbe9092
+Create Date: 2024-03-27 19:41:29.073594
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "38eda64af7fe"
+down_revision = "776b3bbe9092"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session",
+        sa.Column(
+            "shared_status",
+            sa.Enum(
+                "PUBLIC",
+                "PRIVATE",
+                name="chatsessionsharedstatus",
+                native_enum=False,
+            ),
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE chat_session SET shared_status='PRIVATE'")
+    op.alter_column(
+        "chat_session",
+        "shared_status",
+        nullable=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "shared_status")
--- a/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
+++ b/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "3b25685ff73c"
 down_revision = "e0a68a81d434"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
+++ b/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py
@@ -12,8 +12,8 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision = "3c5e35aa9af0"
 down_revision = "27c6ecc08586"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
+++ b/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py
@@ -0,0 +1,49 @@
+"""Add tables for UI-based LLM configuration
+
+Revision ID: 401c1ac29467
+Revises: 703313b75876
+Create Date: 2024-04-13 18:07:29.153817
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "401c1ac29467"
+down_revision = "703313b75876"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_provider",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("api_key", sa.String(), nullable=True),
+        sa.Column("api_base", sa.String(), nullable=True),
+        sa.Column("api_version", sa.String(), nullable=True),
+        sa.Column(
+            "custom_config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+        sa.Column("default_model_name", sa.String(), nullable=False),
+        sa.Column("fast_default_model_name", sa.String(), nullable=True),
+        sa.Column("is_default_provider", sa.Boolean(), unique=True, nullable=True),
+        sa.Column("model_names", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("name"),
+    )
+
+    op.add_column(
+        "persona",
+        sa.Column("llm_model_provider_override", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "llm_model_provider_override")
+
+    op.drop_table("llm_provider")
--- a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
+++ b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "465f78d9b7f9"
 down_revision = "3c5e35aa9af0"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/46625e4745d4_remove_native_enum.py
+++ b/backend/alembic/versions/46625e4745d4_remove_native_enum.py
@@ -11,8 +11,8 @@ from sqlalchemy import String
 # revision identifiers, used by Alembic.
 revision = "46625e4745d4"
 down_revision = "9d97fecfab7f"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
+++ b/backend/alembic/versions/4738e4b3bae1_pg_file_store.py
@@ -0,0 +1,28 @@
+"""PG File Store
+
+Revision ID: 4738e4b3bae1
+Revises: e91df4e935ef
+Create Date: 2024-03-20 18:53:32.461518
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "4738e4b3bae1"
+down_revision = "e91df4e935ef"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "file_store",
+        sa.Column("file_name", sa.String(), nullable=False),
+        sa.Column("lobj_oid", sa.Integer(), nullable=False),
+        sa.PrimaryKeyConstraint("file_name"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("file_store")
--- a/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
+++ b/backend/alembic/versions/47433d30de82_create_indexattempt_table.py
@@ -11,9 +11,9 @@ from sqlalchemy.dialects import postgresql

 # revision identifiers, used by Alembic.
 revision = "47433d30de82"
-down_revision = None
-branch_labels = None
-depends_on = None
+down_revision: None = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py
+++ b/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py
@@ -0,0 +1,23 @@
+"""Add name to api_key
+
+Revision ID: 475fcefe8826
+Revises: ecab2b3f1a3b
+Create Date: 2024-04-11 11:05:18.414438
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "475fcefe8826"
+down_revision = "ecab2b3f1a3b"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("api_key", sa.Column("name", sa.String(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("api_key", "name")
--- a/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py
+++ b/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py
@@ -0,0 +1,28 @@
+"""Add additional retrieval controls to Persona
+
+Revision ID: 50b683a8295c
+Revises: 7da0ae5ad583
+Create Date: 2023-11-27 17:23:29.668422
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "50b683a8295c"
+down_revision = "7da0ae5ad583"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("persona", sa.Column("num_chunks", sa.Integer(), nullable=True))
+    op.add_column(
+        "persona",
+        sa.Column("apply_llm_relevance_filter", sa.Boolean(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "apply_llm_relevance_filter")
+    op.drop_column("persona", "num_chunks")
--- a/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py
+++ b/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py
@@ -0,0 +1,27 @@
+"""Track Danswerbot Explicitly
+
+Revision ID: 570282d33c49
+Revises: 7547d982db8f
+Create Date: 2024-05-04 17:49:28.568109
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "570282d33c49"
+down_revision = "7547d982db8f"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session", sa.Column("danswerbot_flow", sa.Boolean(), nullable=True)
+    )
+    op.execute("UPDATE chat_session SET danswerbot_flow = one_shot")
+    op.alter_column("chat_session", "danswerbot_flow", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "danswerbot_flow")
--- a/backend/alembic/versions/57b53544726e_add_document_set_tables.py
+++ b/backend/alembic/versions/57b53544726e_add_document_set_tables.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "57b53544726e"
 down_revision = "800f48024ae9"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/5809c0787398_add_chat_sessions.py
+++ b/backend/alembic/versions/5809c0787398_add_chat_sessions.py
@@ -13,8 +13,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "5809c0787398"
 down_revision = "d929f0c1c6af"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py
+++ b/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "5e84129c8be3"
 down_revision = "e6a4bbc13fe4"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py
+++ b/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py
@@ -0,0 +1,27 @@
+"""add removed documents to index_attempt
+
+Revision ID: 5f4b8568a221
+Revises: dbaa756c2ccf
+Create Date: 2024-02-16 15:02:03.319907
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "5f4b8568a221"
+down_revision = "8987770549c0"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "index_attempt",
+        sa.Column("docs_removed_from_index", sa.Integer()),
+    )
+    op.execute("UPDATE index_attempt SET docs_removed_from_index = 0")
+
+
+def downgrade() -> None:
+    op.drop_column("index_attempt", "docs_removed_from_index")
--- a/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py
+++ b/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py
@@ -0,0 +1,45 @@
+"""Add user-configured names to LLMProvider
+
+Revision ID: 643a84a42a33
+Revises: 0a98909f2757
+Create Date: 2024-05-07 14:54:55.493100
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "643a84a42a33"
+down_revision = "0a98909f2757"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("llm_provider", sa.Column("provider", sa.String(), nullable=True))
+    # move "name" -> "provider" to match the new schema
+    op.execute("UPDATE llm_provider SET provider = name")
+    # pretty up display name
+    op.execute("UPDATE llm_provider SET name = 'OpenAI' WHERE name = 'openai'")
+    op.execute("UPDATE llm_provider SET name = 'Anthropic' WHERE name = 'anthropic'")
+    op.execute("UPDATE llm_provider SET name = 'Azure OpenAI' WHERE name = 'azure'")
+    op.execute("UPDATE llm_provider SET name = 'AWS Bedrock' WHERE name = 'bedrock'")
+
+    # update personas to use the new provider names
+    op.execute(
+        "UPDATE persona SET llm_model_provider_override = 'OpenAI' WHERE llm_model_provider_override = 'openai'"
+    )
+    op.execute(
+        "UPDATE persona SET llm_model_provider_override = 'Anthropic' WHERE llm_model_provider_override = 'anthropic'"
+    )
+    op.execute(
+        "UPDATE persona SET llm_model_provider_override = 'Azure OpenAI' WHERE llm_model_provider_override = 'azure'"
+    )
+    op.execute(
+        "UPDATE persona SET llm_model_provider_override = 'AWS Bedrock' WHERE llm_model_provider_override = 'bedrock'"
+    )
+
+
+def downgrade() -> None:
+    op.execute("UPDATE llm_provider SET name = provider")
+    op.drop_column("llm_provider", "provider")
--- a/backend/alembic/versions/6d387b3196c2_basic_auth.py
+++ b/backend/alembic/versions/6d387b3196c2_basic_auth.py
@@ -13,8 +13,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "6d387b3196c2"
 down_revision = "47433d30de82"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
+++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py
@@ -0,0 +1,83 @@
+"""Add TokenRateLimit Tables
+
+Revision ID: 703313b75876
+Revises: fad14119fb92
+Create Date: 2024-04-15 01:36:02.952809
+
+"""
+import json
+from typing import cast
+from alembic import op
+import sqlalchemy as sa
+from danswer.dynamic_configs.factory import get_dynamic_config_store
+
+# revision identifiers, used by Alembic.
+revision = "703313b75876"
+down_revision = "fad14119fb92"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "token_rate_limit",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("enabled", sa.Boolean(), nullable=False),
+        sa.Column("token_budget", sa.Integer(), nullable=False),
+        sa.Column("period_hours", sa.Integer(), nullable=False),
+        sa.Column(
+            "scope",
+            sa.String(length=10),
+            nullable=False,
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "token_rate_limit__user_group",
+        sa.Column("rate_limit_id", sa.Integer(), nullable=False),
+        sa.Column("user_group_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["rate_limit_id"],
+            ["token_rate_limit.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_group_id"],
+            ["user_group.id"],
+        ),
+        sa.PrimaryKeyConstraint("rate_limit_id", "user_group_id"),
+    )
+
+    try:
+        settings_json = cast(
+            str, get_dynamic_config_store().load("token_budget_settings")
+        )
+        settings = json.loads(settings_json)
+
+        is_enabled = settings.get("enable_token_budget", False)
+        token_budget = settings.get("token_budget", -1)
+        period_hours = settings.get("period_hours", -1)
+
+        if is_enabled and token_budget > 0 and period_hours > 0:
+            op.execute(
+                f"INSERT INTO token_rate_limit \
+                    (enabled, token_budget, period_hours, scope) VALUES \
+                        ({is_enabled}, {token_budget}, {period_hours}, 'GLOBAL')"
+            )
+
+        # Delete the dynamic config
+        get_dynamic_config_store().delete("token_budget_settings")
+
+    except Exception:
+        # Ignore if the dynamic config is not found
+        pass
+
+
+def downgrade() -> None:
+    op.drop_table("token_rate_limit__user_group")
+    op.drop_table("token_rate_limit")
--- a/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py
+++ b/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py
@@ -0,0 +1,81 @@
+"""Permission Auto Sync Framework
+
+Revision ID: 72bdc9929a46
+Revises: 475fcefe8826
+Create Date: 2024-04-14 21:15:28.659634
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "72bdc9929a46"
+down_revision = "475fcefe8826"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "email_to_external_user_cache",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("external_user_id", sa.String(), nullable=False),
+        sa.Column("user_id", sa.UUID(), nullable=True),
+        sa.Column("user_email", sa.String(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "external_permission",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.UUID(), nullable=True),
+        sa.Column("user_email", sa.String(), nullable=False),
+        sa.Column(
+            "source_type",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column("external_permission_group", sa.String(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "permission_sync_run",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "source_type",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column("update_type", sa.String(), nullable=False),
+        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
+        sa.Column(
+            "status",
+            sa.String(),
+            nullable=False,
+        ),
+        sa.Column("error_msg", sa.Text(), nullable=True),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["cc_pair_id"],
+            ["connector_credential_pair.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("permission_sync_run")
+    op.drop_table("external_permission")
+    op.drop_table("email_to_external_user_cache")
--- a/backend/alembic/versions/7547d982db8f_chat_folders.py
+++ b/backend/alembic/versions/7547d982db8f_chat_folders.py
@@ -0,0 +1,51 @@
+"""Chat Folders
+
+Revision ID: 7547d982db8f
+Revises: ef7da92f7213
+Create Date: 2024-05-02 15:18:56.573347
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+# revision identifiers, used by Alembic.
+revision = "7547d982db8f"
+down_revision = "ef7da92f7213"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "chat_folder",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("name", sa.String(), nullable=True),
+        sa.Column("display_priority", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.add_column("chat_session", sa.Column("folder_id", sa.Integer(), nullable=True))
+    op.create_foreign_key(
+        "chat_session_chat_folder_fk",
+        "chat_session",
+        "chat_folder",
+        ["folder_id"],
+        ["id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint(
+        "chat_session_chat_folder_fk", "chat_session", type_="foreignkey"
+    )
+    op.drop_column("chat_session", "folder_id")
+    op.drop_table("chat_folder")
--- a/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py
+++ b/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "767f1c2a00eb"
 down_revision = "dba7f71618f5"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py
+++ b/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py
@@ -0,0 +1,32 @@
+"""CC-Pair Name not Unique
+
+Revision ID: 76b60d407dfb
+Revises: b156fa702355
+Create Date: 2023-12-22 21:42:10.018804
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "76b60d407dfb"
+down_revision = "b156fa702355"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.execute("DELETE FROM connector_credential_pair WHERE name IS NULL")
+    op.drop_constraint(
+        "connector_credential_pair__name__key",
+        "connector_credential_pair",
+        type_="unique",
+    )
+    op.alter_column(
+        "connector_credential_pair", "name", existing_type=sa.String(), nullable=False
+    )
+
+
+def downgrade() -> None:
+    # This wasn't really required by the code either, no good reason to make it unique again
+    pass
--- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
+++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
@@ -0,0 +1,71 @@
+"""Remove Remaining Enums
+
+Revision ID: 776b3bbe9092
+Revises: 4738e4b3bae1
+Create Date: 2024-03-22 21:34:27.629444
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+from danswer.db.models import IndexModelStatus
+from danswer.search.enums import RecencyBiasSetting
+from danswer.search.models import SearchType
+
+# revision identifiers, used by Alembic.
+revision = "776b3bbe9092"
+down_revision = "4738e4b3bae1"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "persona",
+        "search_type",
+        type_=sa.String,
+        existing_type=sa.Enum(SearchType, native_enum=False),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "persona",
+        "recency_bias",
+        type_=sa.String,
+        existing_type=sa.Enum(RecencyBiasSetting, native_enum=False),
+        existing_nullable=False,
+    )
+
+    # Because the indexmodelstatus enum does not have a mapping to a string type
+    # we need this workaround instead of directly changing the type
+    op.add_column("embedding_model", sa.Column("temp_status", sa.String))
+    op.execute("UPDATE embedding_model SET temp_status = status::text")
+    op.drop_column("embedding_model", "status")
+    op.alter_column("embedding_model", "temp_status", new_column_name="status")
+
+    op.execute("DROP TYPE IF EXISTS searchtype")
+    op.execute("DROP TYPE IF EXISTS recencybiassetting")
+    op.execute("DROP TYPE IF EXISTS indexmodelstatus")
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "persona",
+        "search_type",
+        type_=sa.Enum(SearchType, native_enum=False),
+        existing_type=sa.String(length=50),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "persona",
+        "recency_bias",
+        type_=sa.Enum(RecencyBiasSetting, native_enum=False),
+        existing_type=sa.String(length=50),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "embedding_model",
+        "status",
+        type_=sa.Enum(IndexModelStatus, native_enum=False),
+        existing_type=sa.String(length=50),
+        existing_nullable=False,
+    )
--- a/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py
+++ b/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py
@@ -12,8 +12,8 @@ from sqlalchemy import String
 # revision identifiers, used by Alembic.
 revision = "77d07dffae64"
 down_revision = "d61e513bef0a"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/78dbe7e38469_task_tracking.py
+++ b/backend/alembic/versions/78dbe7e38469_task_tracking.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "78dbe7e38469"
 down_revision = "7ccea01261f6"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/79acd316403a_add_api_key_table.py
+++ b/backend/alembic/versions/79acd316403a_add_api_key_table.py
@@ -0,0 +1,48 @@
+"""Add api_key table
+
+Revision ID: 79acd316403a
+Revises: 904e5138fffb
+Create Date: 2024-01-11 17:56:37.934381
+
+"""
+from alembic import op
+import fastapi_users_db_sqlalchemy
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "79acd316403a"
+down_revision = "904e5138fffb"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "api_key",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("hashed_api_key", sa.String(), nullable=False),
+        sa.Column("api_key_display", sa.String(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=False,
+        ),
+        sa.Column(
+            "owner_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("api_key_display"),
+        sa.UniqueConstraint("hashed_api_key"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("api_key")
--- a/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py
+++ b/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "7ccea01261f6"
 down_revision = "a570b80a5f20"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py
+++ b/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py
@@ -0,0 +1,23 @@
+"""Add description to persona
+
+Revision ID: 7da0ae5ad583
+Revises: e86866a9c78a
+Create Date: 2023-11-27 00:16:19.959414
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "7da0ae5ad583"
+down_revision = "e86866a9c78a"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("persona", sa.Column("description", sa.String(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "description")
--- a/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py
+++ b/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "7da543f5672f"
 down_revision = "febe9eaa0644"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/7f726bad5367_slack_followup.py
+++ b/backend/alembic/versions/7f726bad5367_slack_followup.py
@@ -0,0 +1,26 @@
+"""Slack Followup
+
+Revision ID: 7f726bad5367
+Revises: 79acd316403a
+Create Date: 2024-01-15 00:19:55.991224
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "7f726bad5367"
+down_revision = "79acd316403a"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_feedback",
+        sa.Column("required_followup", sa.Boolean(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_feedback", "required_followup")
--- a/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py
+++ b/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py
@@ -11,8 +11,8 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision = "7f99be1cb9f5"
 down_revision = "78dbe7e38469"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py
+++ b/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py
@@ -12,8 +12,8 @@ from sqlalchemy.schema import Sequence, CreateSequence
 # revision identifiers, used by Alembic.
 revision = "800f48024ae9"
 down_revision = "767f1c2a00eb"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py
+++ b/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py
@@ -0,0 +1,36 @@
+"""Add chat session to query_event
+
+Revision ID: 80696cf850ae
+Revises: 15326fcec57e
+Create Date: 2023-11-26 02:38:35.008070
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "80696cf850ae"
+down_revision = "15326fcec57e"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "query_event",
+        sa.Column("chat_session_id", sa.Integer(), nullable=True),
+    )
+    op.create_foreign_key(
+        "fk_query_event_chat_session_id",
+        "query_event",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint(
+        "fk_query_event_chat_session_id", "query_event", type_="foreignkey"
+    )
+    op.drop_column("query_event", "chat_session_id")
--- a/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py
+++ b/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py
@@ -0,0 +1,34 @@
+"""Add is_visible to Persona
+
+Revision ID: 891cd83c87a8
+Revises: 76b60d407dfb
+Create Date: 2023-12-21 11:55:54.132279
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "891cd83c87a8"
+down_revision = "76b60d407dfb"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column("is_visible", sa.Boolean(), nullable=True),
+    )
+    op.execute("UPDATE persona SET is_visible = true")
+    op.alter_column("persona", "is_visible", nullable=False)
+
+    op.add_column(
+        "persona",
+        sa.Column("display_priority", sa.Integer(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "is_visible")
+    op.drop_column("persona", "display_priority")
--- a/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py
+++ b/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py
@@ -0,0 +1,25 @@
+"""Add full exception stack trace
+
+Revision ID: 8987770549c0
+Revises: ec3ec2eabf7b
+Create Date: 2024-02-10 19:31:28.339135
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "8987770549c0"
+down_revision = "ec3ec2eabf7b"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "index_attempt", sa.Column("full_exception_trace", sa.Text(), nullable=True)
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("index_attempt", "full_exception_trace")
--- a/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py
+++ b/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "8aabb57f3b49"
 down_revision = "5e84129c8be3"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/8e26726b7683_chat_context_addition.py
+++ b/backend/alembic/versions/8e26726b7683_chat_context_addition.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "8e26726b7683"
 down_revision = "5809c0787398"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/904451035c9b_store_tool_details.py
+++ b/backend/alembic/versions/904451035c9b_store_tool_details.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "904451035c9b"
 down_revision = "3b25685ff73c"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/904e5138fffb_tags.py
+++ b/backend/alembic/versions/904e5138fffb_tags.py
@@ -0,0 +1,61 @@
+"""Tags
+
+Revision ID: 904e5138fffb
+Revises: 891cd83c87a8
+Create Date: 2024-01-01 10:44:43.733974
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "904e5138fffb"
+down_revision = "891cd83c87a8"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tag",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("tag_key", sa.String(), nullable=False),
+        sa.Column("tag_value", sa.String(), nullable=False),
+        sa.Column("source", sa.String(), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "tag_key", "tag_value", "source", name="_tag_key_value_source_uc"
+        ),
+    )
+    op.create_table(
+        "document__tag",
+        sa.Column("document_id", sa.String(), nullable=False),
+        sa.Column("tag_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["document_id"],
+            ["document.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["tag_id"],
+            ["tag.id"],
+        ),
+        sa.PrimaryKeyConstraint("document_id", "tag_id"),
+    )
+
+    op.add_column(
+        "search_doc",
+        sa.Column(
+            "doc_metadata",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE search_doc SET doc_metadata = '{}' WHERE doc_metadata IS NULL")
+    op.alter_column("search_doc", "doc_metadata", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_table("document__tag")
+    op.drop_table("tag")
+    op.drop_column("search_doc", "doc_metadata")
--- a/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py
+++ b/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py
@@ -0,0 +1,36 @@
+"""Remove DocumentSource from Tag
+
+Revision ID: 91fd3b470d1a
+Revises: 173cae5bba26
+Create Date: 2024-03-21 12:05:23.956734
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from danswer.configs.constants import DocumentSource
+
+# revision identifiers, used by Alembic.
+revision = "91fd3b470d1a"
+down_revision = "173cae5bba26"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "tag",
+        "source",
+        type_=sa.String(length=50),
+        existing_type=sa.Enum(DocumentSource, native_enum=False),
+        existing_nullable=False,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "tag",
+        "source",
+        type_=sa.Enum(DocumentSource, native_enum=False),
+        existing_type=sa.String(length=50),
+        existing_nullable=False,
+    )
--- a/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py
+++ b/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "9d97fecfab7f"
 down_revision = "ffc707a226b4"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/a570b80a5f20_usergroup_tables.py
+++ b/backend/alembic/versions/a570b80a5f20_usergroup_tables.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "a570b80a5f20"
 down_revision = "904451035c9b"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py
+++ b/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "ae62505e3acc"
 down_revision = "7da543f5672f"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py
+++ b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py
@@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "b082fec533f0"
 down_revision = "df0c7ad8a076"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/b156fa702355_chat_reworked.py
+++ b/backend/alembic/versions/b156fa702355_chat_reworked.py
@@ -0,0 +1,520 @@
+"""Chat Reworked
+
+Revision ID: b156fa702355
+Revises: baf71f781b9e
+Create Date: 2023-12-12 00:57:41.823371
+
+"""
+import fastapi_users_db_sqlalchemy
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import ENUM
+from danswer.configs.constants import DocumentSource
+
+# revision identifiers, used by Alembic.
+revision = "b156fa702355"
+down_revision = "baf71f781b9e"
+branch_labels: None = None
+depends_on: None = None
+
+
+searchtype_enum = ENUM(
+    "KEYWORD", "SEMANTIC", "HYBRID", name="searchtype", create_type=True
+)
+recencybiassetting_enum = ENUM(
+    "FAVOR_RECENT",
+    "BASE_DECAY",
+    "NO_DECAY",
+    "AUTO",
+    name="recencybiassetting",
+    create_type=True,
+)
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+    searchtype_enum.create(bind)
+    recencybiassetting_enum.create(bind)
+
+    # This is irrecoverable, whatever
+    op.execute("DELETE FROM chat_feedback")
+    op.execute("DELETE FROM document_retrieval_feedback")
+
+    op.create_table(
+        "search_doc",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("document_id", sa.String(), nullable=False),
+        sa.Column("chunk_ind", sa.Integer(), nullable=False),
+        sa.Column("semantic_id", sa.String(), nullable=False),
+        sa.Column("link", sa.String(), nullable=True),
+        sa.Column("blurb", sa.String(), nullable=False),
+        sa.Column("boost", sa.Integer(), nullable=False),
+        sa.Column(
+            "source_type",
+            sa.Enum(DocumentSource, native=False),
+            nullable=False,
+        ),
+        sa.Column("hidden", sa.Boolean(), nullable=False),
+        sa.Column("score", sa.Float(), nullable=False),
+        sa.Column("match_highlights", postgresql.ARRAY(sa.String()), nullable=False),
+        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("primary_owners", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.Column("secondary_owners", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "prompt",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("description", sa.String(), nullable=False),
+        sa.Column("system_prompt", sa.Text(), nullable=False),
+        sa.Column("task_prompt", sa.Text(), nullable=False),
+        sa.Column("include_citations", sa.Boolean(), nullable=False),
+        sa.Column("datetime_aware", sa.Boolean(), nullable=False),
+        sa.Column("default_prompt", sa.Boolean(), nullable=False),
+        sa.Column("deleted", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_table(
+        "persona__prompt",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("prompt_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["prompt_id"],
+            ["prompt.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "prompt_id"),
+    )
+
+    # Changes to persona first so chat_sessions can have the right persona
+    # The empty persona will be overwritten on server startup
+    op.add_column(
+        "persona",
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "persona",
+        sa.Column(
+            "search_type",
+            searchtype_enum,
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE persona SET search_type = 'HYBRID'")
+    op.alter_column("persona", "search_type", nullable=False)
+    op.add_column(
+        "persona",
+        sa.Column("llm_relevance_filter", sa.Boolean(), nullable=True),
+    )
+    op.execute("UPDATE persona SET llm_relevance_filter = TRUE")
+    op.alter_column("persona", "llm_relevance_filter", nullable=False)
+    op.add_column(
+        "persona",
+        sa.Column("llm_filter_extraction", sa.Boolean(), nullable=True),
+    )
+    op.execute("UPDATE persona SET llm_filter_extraction = TRUE")
+    op.alter_column("persona", "llm_filter_extraction", nullable=False)
+    op.add_column(
+        "persona",
+        sa.Column(
+            "recency_bias",
+            recencybiassetting_enum,
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE persona SET recency_bias = 'BASE_DECAY'")
+    op.alter_column("persona", "recency_bias", nullable=False)
+    op.alter_column("persona", "description", existing_type=sa.VARCHAR(), nullable=True)
+    op.execute("UPDATE persona SET description = ''")
+    op.alter_column("persona", "description", nullable=False)
+    op.create_foreign_key("persona__user_fk", "persona", "user", ["user_id"], ["id"])
+    op.drop_column("persona", "datetime_aware")
+    op.drop_column("persona", "tools")
+    op.drop_column("persona", "hint_text")
+    op.drop_column("persona", "apply_llm_relevance_filter")
+    op.drop_column("persona", "retrieval_enabled")
+    op.drop_column("persona", "system_text")
+
+    # Need to create a persona row so fk can work
+    result = bind.execute(sa.text("SELECT 1 FROM persona WHERE id = 0"))
+    exists = result.fetchone()
+    if not exists:
+        op.execute(
+            sa.text(
+                """
+                INSERT INTO persona (
+                    id, user_id, name, description, search_type, num_chunks,
+                    llm_relevance_filter, llm_filter_extraction, recency_bias,
+                    llm_model_version_override, default_persona, deleted
+                ) VALUES (
+                    0, NULL, '', '', 'HYBRID', NULL,
+                    TRUE, TRUE, 'BASE_DECAY', NULL, TRUE, FALSE
+                )
+                """
+            )
+        )
+    delete_statement = sa.text(
+        """
+        DELETE FROM persona
+        WHERE name = 'Danswer' AND default_persona = TRUE AND id != 0
+        """
+    )
+
+    bind.execute(delete_statement)
+
+    op.add_column(
+        "chat_feedback",
+        sa.Column("chat_message_id", sa.Integer(), nullable=False),
+    )
+    op.drop_constraint(
+        "chat_feedback_chat_message_chat_session_id_chat_message_me_fkey",
+        "chat_feedback",
+        type_="foreignkey",
+    )
+    op.drop_column("chat_feedback", "chat_message_edit_number")
+    op.drop_column("chat_feedback", "chat_message_chat_session_id")
+    op.drop_column("chat_feedback", "chat_message_message_number")
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "id",
+            sa.Integer(),
+            primary_key=True,
+            autoincrement=True,
+            nullable=False,
+            unique=True,
+        ),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("parent_message", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("latest_child_message", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
+    )
+    op.add_column("chat_message", sa.Column("prompt_id", sa.Integer(), nullable=True))
+    op.add_column(
+        "chat_message",
+        sa.Column("citations", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    )
+    op.add_column("chat_message", sa.Column("error", sa.Text(), nullable=True))
+    op.drop_constraint("fk_chat_message_persona_id", "chat_message", type_="foreignkey")
+    op.create_foreign_key(
+        "chat_message__prompt_fk", "chat_message", "prompt", ["prompt_id"], ["id"]
+    )
+    op.drop_column("chat_message", "parent_edit_number")
+    op.drop_column("chat_message", "persona_id")
+    op.drop_column("chat_message", "reference_docs")
+    op.drop_column("chat_message", "edit_number")
+    op.drop_column("chat_message", "latest")
+    op.drop_column("chat_message", "message_number")
+    op.add_column("chat_session", sa.Column("one_shot", sa.Boolean(), nullable=True))
+    op.execute("UPDATE chat_session SET one_shot = TRUE")
+    op.alter_column("chat_session", "one_shot", nullable=False)
+    op.alter_column(
+        "chat_session",
+        "persona_id",
+        existing_type=sa.INTEGER(),
+        nullable=True,
+    )
+    op.execute("UPDATE chat_session SET persona_id = 0")
+    op.alter_column("chat_session", "persona_id", nullable=False)
+    op.add_column(
+        "document_retrieval_feedback",
+        sa.Column("chat_message_id", sa.Integer(), nullable=False),
+    )
+    op.drop_constraint(
+        "document_retrieval_feedback_qa_event_id_fkey",
+        "document_retrieval_feedback",
+        type_="foreignkey",
+    )
+    op.create_foreign_key(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
+    op.drop_column("document_retrieval_feedback", "qa_event_id")
+
+    # Relation table must be created after the other tables are correct
+    op.create_table(
+        "chat_message__search_doc",
+        sa.Column("chat_message_id", sa.Integer(), nullable=False),
+        sa.Column("search_doc_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["chat_message_id"],
+            ["chat_message.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["search_doc_id"],
+            ["search_doc.id"],
+        ),
+        sa.PrimaryKeyConstraint("chat_message_id", "search_doc_id"),
+    )
+
+    # Needs to be created after chat_message id field is added
+    op.create_foreign_key(
+        "chat_feedback__chat_message_fk",
+        "chat_feedback",
+        "chat_message",
+        ["chat_message_id"],
+        ["id"],
+    )
+
+    op.drop_table("query_event")
+
+
+def downgrade() -> None:
+    op.drop_constraint(
+        "chat_feedback__chat_message_fk", "chat_feedback", type_="foreignkey"
+    )
+    op.drop_constraint(
+        "document_retrieval_feedback__chat_message_fk",
+        "document_retrieval_feedback",
+        type_="foreignkey",
+    )
+    op.drop_constraint("persona__user_fk", "persona", type_="foreignkey")
+    op.drop_constraint("chat_message__prompt_fk", "chat_message", type_="foreignkey")
+    op.drop_constraint(
+        "chat_message__search_doc_chat_message_id_fkey",
+        "chat_message__search_doc",
+        type_="foreignkey",
+    )
+    op.add_column(
+        "persona",
+        sa.Column("system_text", sa.TEXT(), autoincrement=False, nullable=True),
+    )
+    op.add_column(
+        "persona",
+        sa.Column(
+            "retrieval_enabled",
+            sa.BOOLEAN(),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.execute("UPDATE persona SET retrieval_enabled = TRUE")
+    op.alter_column("persona", "retrieval_enabled", nullable=False)
+    op.add_column(
+        "persona",
+        sa.Column(
+            "apply_llm_relevance_filter",
+            sa.BOOLEAN(),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "persona",
+        sa.Column("hint_text", sa.TEXT(), autoincrement=False, nullable=True),
+    )
+    op.add_column(
+        "persona",
+        sa.Column(
+            "tools",
+            postgresql.JSONB(astext_type=sa.Text()),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "persona",
+        sa.Column("datetime_aware", sa.BOOLEAN(), autoincrement=False, nullable=True),
+    )
+    op.execute("UPDATE persona SET datetime_aware = TRUE")
+    op.alter_column("persona", "datetime_aware", nullable=False)
+    op.alter_column("persona", "description", existing_type=sa.VARCHAR(), nullable=True)
+    op.drop_column("persona", "recency_bias")
+    op.drop_column("persona", "llm_filter_extraction")
+    op.drop_column("persona", "llm_relevance_filter")
+    op.drop_column("persona", "search_type")
+    op.drop_column("persona", "user_id")
+    op.add_column(
+        "document_retrieval_feedback",
+        sa.Column("qa_event_id", sa.INTEGER(), autoincrement=False, nullable=False),
+    )
+    op.drop_column("document_retrieval_feedback", "chat_message_id")
+    op.alter_column(
+        "chat_session", "persona_id", existing_type=sa.INTEGER(), nullable=True
+    )
+    op.drop_column("chat_session", "one_shot")
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "message_number",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=False,
+            primary_key=True,
+        ),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("latest", sa.BOOLEAN(), autoincrement=False, nullable=False),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "edit_number",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=False,
+            primary_key=True,
+        ),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "reference_docs",
+            postgresql.JSONB(astext_type=sa.Text()),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("persona_id", sa.INTEGER(), autoincrement=False, nullable=True),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "parent_edit_number",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=True,
+        ),
+    )
+    op.create_foreign_key(
+        "fk_chat_message_persona_id",
+        "chat_message",
+        "persona",
+        ["persona_id"],
+        ["id"],
+    )
+    op.drop_column("chat_message", "error")
+    op.drop_column("chat_message", "citations")
+    op.drop_column("chat_message", "prompt_id")
+    op.drop_column("chat_message", "rephrased_query")
+    op.drop_column("chat_message", "latest_child_message")
+    op.drop_column("chat_message", "parent_message")
+    op.drop_column("chat_message", "id")
+    op.add_column(
+        "chat_feedback",
+        sa.Column(
+            "chat_message_message_number",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=False,
+        ),
+    )
+    op.add_column(
+        "chat_feedback",
+        sa.Column(
+            "chat_message_chat_session_id",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=False,
+            primary_key=True,
+        ),
+    )
+    op.add_column(
+        "chat_feedback",
+        sa.Column(
+            "chat_message_edit_number",
+            sa.INTEGER(),
+            autoincrement=False,
+            nullable=False,
+        ),
+    )
+    op.drop_column("chat_feedback", "chat_message_id")
+    op.create_table(
+        "query_event",
+        sa.Column("id", sa.INTEGER(), autoincrement=True, nullable=False),
+        sa.Column("query", sa.VARCHAR(), autoincrement=False, nullable=False),
+        sa.Column(
+            "selected_search_flow",
+            sa.VARCHAR(),
+            autoincrement=False,
+            nullable=True,
+        ),
+        sa.Column("llm_answer", sa.VARCHAR(), autoincrement=False, nullable=True),
+        sa.Column("feedback", sa.VARCHAR(), autoincrement=False, nullable=True),
+        sa.Column("user_id", sa.UUID(), autoincrement=False, nullable=True),
+        sa.Column(
+            "time_created",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column(
+            "retrieved_document_ids",
+            postgresql.ARRAY(sa.VARCHAR()),
+            autoincrement=False,
+            nullable=True,
+        ),
+        sa.Column("chat_session_id", sa.INTEGER(), autoincrement=False, nullable=True),
+        sa.ForeignKeyConstraint(
+            ["chat_session_id"],
+            ["chat_session.id"],
+            name="fk_query_event_chat_session_id",
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"], ["user.id"], name="query_event_user_id_fkey"
+        ),
+        sa.PrimaryKeyConstraint("id", name="query_event_pkey"),
+    )
+    op.drop_table("chat_message__search_doc")
+    op.drop_table("persona__prompt")
+    op.drop_table("prompt")
+    op.drop_table("search_doc")
+    op.create_unique_constraint(
+        "uq_chat_message_combination",
+        "chat_message",
+        ["chat_session_id", "message_number", "edit_number"],
+    )
+    op.create_foreign_key(
+        "chat_feedback_chat_message_chat_session_id_chat_message_me_fkey",
+        "chat_feedback",
+        "chat_message",
+        [
+            "chat_message_chat_session_id",
+            "chat_message_message_number",
+            "chat_message_edit_number",
+        ],
+        ["chat_session_id", "message_number", "edit_number"],
+    )
+    op.create_foreign_key(
+        "document_retrieval_feedback_qa_event_id_fkey",
+        "document_retrieval_feedback",
+        "query_event",
+        ["qa_event_id"],
+        ["id"],
+    )
+
+    op.execute("DROP TYPE IF EXISTS searchtype")
+    op.execute("DROP TYPE IF EXISTS recencybiassetting")
+    op.execute("DROP TYPE IF EXISTS documentsource")
--- a/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py
+++ b/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py
@@ -0,0 +1,26 @@
+"""Add llm_model_version_override to Persona
+
+Revision ID: baf71f781b9e
+Revises: 50b683a8295c
+Create Date: 2023-12-06 21:56:50.286158
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "baf71f781b9e"
+down_revision = "50b683a8295c"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "persona",
+        sa.Column("llm_model_version_override", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "llm_model_version_override")
--- a/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py
+++ b/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "d5645c915d0e"
 down_revision = "8e26726b7683"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py
+++ b/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "d61e513bef0a"
 down_revision = "46625e4745d4"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/d7111c1238cd_remove_document_ids.py
+++ b/backend/alembic/versions/d7111c1238cd_remove_document_ids.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "d7111c1238cd"
 down_revision = "465f78d9b7f9"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/d929f0c1c6af_feedback_feature.py
+++ b/backend/alembic/versions/d929f0c1c6af_feedback_feature.py
@@ -13,8 +13,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "d929f0c1c6af"
 down_revision = "8aabb57f3b49"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py
+++ b/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "dba7f71618f5"
 down_revision = "d5645c915d0e"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py
+++ b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py
@@ -0,0 +1,139 @@
+"""Embedding Models
+
+Revision ID: dbaa756c2ccf
+Revises: 7f726bad5367
+Create Date: 2024-01-25 17:12:31.813160
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import table, column, String, Integer, Boolean
+
+from danswer.db.embedding_model import (
+    get_new_default_embedding_model,
+    get_old_default_embedding_model,
+    user_has_overridden_embedding_model,
+)
+from danswer.db.models import IndexModelStatus
+
+# revision identifiers, used by Alembic.
+revision = "dbaa756c2ccf"
+down_revision = "7f726bad5367"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "embedding_model",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("model_name", sa.String(), nullable=False),
+        sa.Column("model_dim", sa.Integer(), nullable=False),
+        sa.Column("normalize", sa.Boolean(), nullable=False),
+        sa.Column("query_prefix", sa.String(), nullable=False),
+        sa.Column("passage_prefix", sa.String(), nullable=False),
+        sa.Column("index_name", sa.String(), nullable=False),
+        sa.Column(
+            "status",
+            sa.Enum(IndexModelStatus, native=False),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    # since all index attempts must be associated with an embedding model,
+    # need to put something in here to avoid nulls. On server startup,
+    # this value will be overriden
+    EmbeddingModel = table(
+        "embedding_model",
+        column("id", Integer),
+        column("model_name", String),
+        column("model_dim", Integer),
+        column("normalize", Boolean),
+        column("query_prefix", String),
+        column("passage_prefix", String),
+        column("index_name", String),
+        column(
+            "status", sa.Enum(IndexModelStatus, name="indexmodelstatus", native=False)
+        ),
+    )
+    # insert an embedding model row that corresponds to the embedding model
+    # the user selected via env variables before this change. This is needed since
+    # all index_attempts must be associated with an embedding model, so without this
+    # we will run into violations of non-null contraints
+    old_embedding_model = get_old_default_embedding_model()
+    op.bulk_insert(
+        EmbeddingModel,
+        [
+            {
+                "model_name": old_embedding_model.model_name,
+                "model_dim": old_embedding_model.model_dim,
+                "normalize": old_embedding_model.normalize,
+                "query_prefix": old_embedding_model.query_prefix,
+                "passage_prefix": old_embedding_model.passage_prefix,
+                "index_name": old_embedding_model.index_name,
+                "status": old_embedding_model.status,
+            }
+        ],
+    )
+    # if the user has not overridden the default embedding model via env variables,
+    # insert the new default model into the database to auto-upgrade them
+    if not user_has_overridden_embedding_model():
+        new_embedding_model = get_new_default_embedding_model(is_present=False)
+        op.bulk_insert(
+            EmbeddingModel,
+            [
+                {
+                    "model_name": new_embedding_model.model_name,
+                    "model_dim": new_embedding_model.model_dim,
+                    "normalize": new_embedding_model.normalize,
+                    "query_prefix": new_embedding_model.query_prefix,
+                    "passage_prefix": new_embedding_model.passage_prefix,
+                    "index_name": new_embedding_model.index_name,
+                    "status": IndexModelStatus.FUTURE,
+                }
+            ],
+        )
+
+    op.add_column(
+        "index_attempt",
+        sa.Column("embedding_model_id", sa.Integer(), nullable=True),
+    )
+    op.execute(
+        "UPDATE index_attempt SET embedding_model_id=1 WHERE embedding_model_id IS NULL"
+    )
+    op.alter_column(
+        "index_attempt",
+        "embedding_model_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
+    op.create_foreign_key(
+        "index_attempt__embedding_model_fk",
+        "index_attempt",
+        "embedding_model",
+        ["embedding_model_id"],
+        ["id"],
+    )
+    op.create_index(
+        "ix_embedding_model_present_unique",
+        "embedding_model",
+        ["status"],
+        unique=True,
+        postgresql_where=sa.text("status = 'PRESENT'"),
+    )
+    op.create_index(
+        "ix_embedding_model_future_unique",
+        "embedding_model",
+        ["status"],
+        unique=True,
+        postgresql_where=sa.text("status = 'FUTURE'"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint(
+        "index_attempt__embedding_model_fk", "index_attempt", type_="foreignkey"
+    )
+    op.drop_column("index_attempt", "embedding_model_id")
+    op.drop_table("embedding_model")
+    op.execute("DROP TYPE indexmodelstatus;")
--- a/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py
+++ b/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "df0c7ad8a076"
 down_revision = "d7111c1238cd"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py
+++ b/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "e0a68a81d434"
 down_revision = "ae62505e3acc"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/e50154680a5c_no_source_enum.py
+++ b/backend/alembic/versions/e50154680a5c_no_source_enum.py
@@ -0,0 +1,38 @@
+"""No Source Enum
+
+Revision ID: e50154680a5c
+Revises: fcd135795f21
+Create Date: 2024-03-14 18:06:08.523106
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+from danswer.configs.constants import DocumentSource
+
+# revision identifiers, used by Alembic.
+revision = "e50154680a5c"
+down_revision = "fcd135795f21"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "search_doc",
+        "source_type",
+        type_=sa.String(length=50),
+        existing_type=sa.Enum(DocumentSource, native_enum=False),
+        existing_nullable=False,
+    )
+    op.execute("DROP TYPE IF EXISTS documentsource")
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "search_doc",
+        "source_type",
+        type_=sa.Enum(DocumentSource, native_enum=False),
+        existing_type=sa.String(length=50),
+        existing_nullable=False,
+    )
--- a/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py
+++ b/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py
@@ -11,8 +11,8 @@ from alembic import op
 # revision identifiers, used by Alembic.
 revision = "e6a4bbc13fe4"
 down_revision = "b082fec533f0"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py
+++ b/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py
@@ -0,0 +1,27 @@
+"""Add persona to chat_session
+
+Revision ID: e86866a9c78a
+Revises: 80696cf850ae
+Create Date: 2023-11-26 02:51:47.657357
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "e86866a9c78a"
+down_revision = "80696cf850ae"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("chat_session", sa.Column("persona_id", sa.Integer(), nullable=True))
+    op.create_foreign_key(
+        "fk_chat_session_persona_id", "chat_session", "persona", ["persona_id"], ["id"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("fk_chat_session_persona_id", "chat_session", type_="foreignkey")
+    op.drop_column("chat_session", "persona_id")
--- a/backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py
+++ b/backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py
@@ -0,0 +1,118 @@
+"""Private Personas DocumentSets
+
+Revision ID: e91df4e935ef
+Revises: 91fd3b470d1a
+Create Date: 2024-03-17 11:47:24.675881
+
+"""
+import fastapi_users_db_sqlalchemy
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "e91df4e935ef"
+down_revision = "91fd3b470d1a"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "document_set__user",
+        sa.Column("document_set_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["document_set_id"],
+            ["document_set.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("document_set_id", "user_id"),
+    )
+    op.create_table(
+        "persona__user",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "user_id"),
+    )
+    op.create_table(
+        "document_set__user_group",
+        sa.Column("document_set_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_group_id",
+            sa.Integer(),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["document_set_id"],
+            ["document_set.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_group_id"],
+            ["user_group.id"],
+        ),
+        sa.PrimaryKeyConstraint("document_set_id", "user_group_id"),
+    )
+    op.create_table(
+        "persona__user_group",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column(
+            "user_group_id",
+            sa.Integer(),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["user_group_id"],
+            ["user_group.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "user_group_id"),
+    )
+
+    op.add_column(
+        "document_set",
+        sa.Column("is_public", sa.Boolean(), nullable=True),
+    )
+    # fill in is_public for existing rows
+    op.execute("UPDATE document_set SET is_public = true WHERE is_public IS NULL")
+    op.alter_column("document_set", "is_public", nullable=False)
+
+    op.add_column(
+        "persona",
+        sa.Column("is_public", sa.Boolean(), nullable=True),
+    )
+    # fill in is_public for existing rows
+    op.execute("UPDATE persona SET is_public = true WHERE is_public IS NULL")
+    op.alter_column("persona", "is_public", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_column("persona", "is_public")
+
+    op.drop_column("document_set", "is_public")
+
+    op.drop_table("persona__user")
+    op.drop_table("document_set__user")
+    op.drop_table("persona__user_group")
+    op.drop_table("document_set__user_group")
--- a/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py
+++ b/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py
@@ -0,0 +1,27 @@
+"""Index From Beginning
+
+Revision ID: ec3ec2eabf7b
+Revises: dbaa756c2ccf
+Create Date: 2024-02-06 22:03:28.098158
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "ec3ec2eabf7b"
+down_revision = "dbaa756c2ccf"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "index_attempt", sa.Column("from_beginning", sa.Boolean(), nullable=True)
+    )
+    op.execute("UPDATE index_attempt SET from_beginning = False")
+    op.alter_column("index_attempt", "from_beginning", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_column("index_attempt", "from_beginning")
--- a/backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py
+++ b/backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py
@@ -0,0 +1,40 @@
+"""Add overrides to the chat session
+
+Revision ID: ecab2b3f1a3b
+Revises: 38eda64af7fe
+Create Date: 2024-04-01 19:08:21.359102
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "ecab2b3f1a3b"
+down_revision = "38eda64af7fe"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session",
+        sa.Column(
+            "llm_override",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "chat_session",
+        sa.Column(
+            "prompt_override",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "prompt_override")
+    op.drop_column("chat_session", "llm_override")
--- a/backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py
+++ b/backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py
@@ -0,0 +1,27 @@
+"""Add files to ChatMessage
+
+Revision ID: ef7da92f7213
+Revises: 401c1ac29467
+Create Date: 2024-04-28 16:59:33.199153
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "ef7da92f7213"
+down_revision = "401c1ac29467"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_message",
+        sa.Column("files", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "files")
--- a/backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py
+++ b/backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py
@@ -0,0 +1,25 @@
+"""Add pre-defined feedback
+
+Revision ID: f1c6478c3fd8
+Revises: 643a84a42a33
+Create Date: 2024-05-09 18:11:49.210667
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+revision = "f1c6478c3fd8"
+down_revision = "643a84a42a33"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_feedback",
+        sa.Column("predefined_feedback", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_feedback", "predefined_feedback")
--- a/backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py
+++ b/backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py
@@ -0,0 +1,39 @@
+"""Delete Tags with wrong Enum
+
+Revision ID: fad14119fb92
+Revises: 72bdc9929a46
+Create Date: 2024-04-25 17:05:09.695703
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "fad14119fb92"
+down_revision = "72bdc9929a46"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    # Some documents may lose their tags but this is the only way as the enum
+    # mapping may have changed since tag switched to string (it will be reindexed anyway)
+    op.execute(
+        """
+        DELETE FROM document__tag
+        WHERE tag_id IN (
+            SELECT id FROM tag
+            WHERE source ~ '^[0-9]+$'
+        )
+        """
+    )
+
+    op.execute(
+        """
+        DELETE FROM tag
+        WHERE source ~ '^[0-9]+$'
+        """
+    )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py
+++ b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py
@@ -0,0 +1,39 @@
+"""Add slack bot display type
+
+Revision ID: fcd135795f21
+Revises: 0a2b51deb0b8
+Create Date: 2024-03-04 17:03:27.116284
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "fcd135795f21"
+down_revision = "0a2b51deb0b8"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "slack_bot_config",
+        sa.Column(
+            "response_type",
+            sa.Enum(
+                "QUOTES",
+                "CITATIONS",
+                name="slackbotresponsetype",
+                native_enum=False,
+            ),
+            nullable=True,
+        ),
+    )
+    op.execute(
+        "UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL"
+    )
+    op.alter_column("slack_bot_config", "response_type", nullable=False)
+
+
+def downgrade() -> None:
+    op.drop_column("slack_bot_config", "response_type")
--- a/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py
+++ b/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "febe9eaa0644"
 down_revision = "57b53544726e"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py
+++ b/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py
@@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql
 # revision identifiers, used by Alembic.
 revision = "ffc707a226b4"
 down_revision = "30c1d5744104"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/danswer/init.py
+++ b/backend/danswer/init.py
@@ -0,0 +1,3 @@
+import os
+
+__version__ = os.environ.get("DANSWER_VERSION", "") or "0.3-dev"
--- a/backend/danswer/direct_qa/init.py
+++ b/backend/danswer/direct_qa/init.py
--- a/backend/danswer/access/access.py
+++ b/backend/danswer/access/access.py
@@ -4,7 +4,7 @@ from danswer.access.models import DocumentAccess
 from danswer.configs.constants import PUBLIC_DOC_PAT
 from danswer.db.document import get_acccess_info_for_documents
 from danswer.db.models import User
-from danswer.server.models import ConnectorCredentialPairIdentifier
+from danswer.server.documents.models import ConnectorCredentialPairIdentifier
 from danswer.utils.variable_functionality import fetch_versioned_implementation


--- a/backend/danswer/auth/users.py
+++ b/backend/danswer/auth/users.py
@@ -23,24 +23,28 @@ from fastapi_users.authentication import CookieTransport
 from fastapi_users.authentication import Strategy
 from fastapi_users.authentication.strategy.db import AccessTokenDatabase
 from fastapi_users.authentication.strategy.db import DatabaseStrategy
-from fastapi_users.db import SQLAlchemyUserDatabase
 from fastapi_users.openapi import OpenAPIResponseType
+from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase
 from sqlalchemy.orm import Session

 from danswer.auth.schemas import UserCreate
 from danswer.auth.schemas import UserRole
 from danswer.configs.app_configs import AUTH_TYPE
 from danswer.configs.app_configs import DISABLE_AUTH
+from danswer.configs.app_configs import EMAIL_FROM
 from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION
-from danswer.configs.app_configs import SECRET
 from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
 from danswer.configs.app_configs import SMTP_PASS
 from danswer.configs.app_configs import SMTP_PORT
 from danswer.configs.app_configs import SMTP_SERVER
 from danswer.configs.app_configs import SMTP_USER
+from danswer.configs.app_configs import USER_AUTH_SECRET
 from danswer.configs.app_configs import VALID_EMAIL_DOMAINS
 from danswer.configs.app_configs import WEB_DOMAIN
 from danswer.configs.constants import AuthType
+from danswer.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
+from danswer.configs.constants import DANSWER_API_KEY_PREFIX
+from danswer.configs.constants import UNNAMED_KEY_PLACEHOLDER
 from danswer.db.auth import get_access_token_db
 from danswer.db.auth import get_user_count
 from danswer.db.auth import get_user_db
@@ -48,6 +52,8 @@ from danswer.db.engine import get_session
 from danswer.db.models import AccessToken
 from danswer.db.models import User
 from danswer.utils.logger import setup_logger
+from danswer.utils.telemetry import optional_telemetry
+from danswer.utils.telemetry import RecordType
 from danswer.utils.variable_functionality import fetch_versioned_implementation


@@ -66,6 +72,26 @@ def verify_auth_setting() -> None:
    logger.info(f"Using Auth Type: {AUTH_TYPE.value}")


+def get_display_email(email: str | None, space_less: bool = False) -> str:
+    if email and email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN):
+        name = email.split("@")[0]
+        if name == DANSWER_API_KEY_PREFIX + UNNAMED_KEY_PLACEHOLDER:
+            return "Unnamed API Key"
+
+        if space_less:
+            return name
+
+        return name.replace("API_KEY__", "API Key: ")
+
+    return email or ""
+
+
+def user_needs_to_be_verified() -> bool:
+    # all other auth types besides basic should require users to be
+    # verified
+    return AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION
+
+
 def get_user_whitelist() -> list[str]:
    global _user_whitelist
    if _user_whitelist is None:
@@ -99,13 +125,18 @@ def verify_email_domain(email: str) -> None:
            )


-def send_user_verification_email(user_email: str, token: str) -> None:
+def send_user_verification_email(
+    user_email: str,
+    token: str,
+    mail_from: str = EMAIL_FROM,
+) -> None:
    msg = MIMEMultipart()
    msg["Subject"] = "Danswer Email Verification"
-    msg["From"] = "no-reply@danswer.dev"
    msg["To"] = user_email
+    if mail_from:
+        msg["From"] = mail_from

-    link = f"{WEB_DOMAIN}/verify-email?token={token}"
+    link = f"{WEB_DOMAIN}/auth/verify-email?token={token}"

    body = MIMEText(f"Click the following link to verify your email address: {link}")
    msg.attach(body)
@@ -119,8 +150,8 @@ def send_user_verification_email(user_email: str, token: str) -> None:


 class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
-    reset_password_token_secret = SECRET
-    verification_token_secret = SECRET
+    reset_password_token_secret = USER_AUTH_SECRET
+    verification_token_secret = USER_AUTH_SECRET

    async def create(
        self,
@@ -170,6 +201,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        self, user: User, request: Optional[Request] = None
    ) -> None:
        logger.info(f"User {user.id} has registered.")
+        optional_telemetry(
+            record_type=RecordType.SIGN_UP,
+            data={"action": "create"},
+            user_id=str(user.id),
+        )

    async def on_after_forgot_password(
        self, user: User, token: str, request: Optional[Request] = None
@@ -194,7 +230,10 @@ async def get_user_manager(
    yield UserManager(user_db)


-cookie_transport = CookieTransport(cookie_max_age=SESSION_EXPIRE_TIME_SECONDS)
+cookie_transport = CookieTransport(
+    cookie_max_age=SESSION_EXPIRE_TIME_SECONDS,
+    cookie_secure=WEB_DOMAIN.startswith("https"),
+)


 def get_database_strategy(
@@ -253,15 +292,36 @@ fastapi_users = FastAPIUserWithLogoutRouter[User, uuid.UUID](
 )


-optional_valid_user = fastapi_users.current_user(
-    active=True, verified=REQUIRE_EMAIL_VERIFICATION, optional=True
-)
+# NOTE: verified=REQUIRE_EMAIL_VERIFICATION is not used here since we
+# take care of that in `double_check_user` ourself. This is needed, since
+# we want the /me endpoint to still return a user even if they are not
+# yet verified, so that the frontend knows they exist
+optional_fastapi_current_user = fastapi_users.current_user(active=True, optional=True)


-async def double_check_user(
+async def optional_user_(
    request: Request,
    user: User | None,
    db_session: Session,
+) -> User | None:
+    """NOTE: `request` and `db_session` are not used here, but are included
+    for the EE version of this function."""
+    return user
+
+
+async def optional_user(
+    request: Request,
+    user: User | None = Depends(optional_fastapi_current_user),
+    db_session: Session = Depends(get_session),
+) -> User | None:
+    versioned_fetch_user = fetch_versioned_implementation(
+        "danswer.auth.users", "optional_user_"
+    )
+    return await versioned_fetch_user(request, user, db_session)
+
+
+async def double_check_user(
+    user: User | None,
    optional: bool = DISABLE_AUTH,
 ) -> User | None:
    if optional:
@@ -273,19 +333,19 @@ async def double_check_user(
            detail="Access denied. User is not authenticated.",
        )

+    if user_needs_to_be_verified() and not user.is_verified:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Access denied. User is not verified.",
+        )
+
    return user


 async def current_user(
-    request: Request,
-    user: User | None = Depends(optional_valid_user),
-    db_session: Session = Depends(get_session),
+    user: User | None = Depends(optional_user),
 ) -> User | None:
-    double_check_user = fetch_versioned_implementation(
-        "danswer.auth.users", "double_check_user"
-    )
-    user = await double_check_user(request, user, db_session)
-    return user
+    return await double_check_user(user)


 async def current_admin_user(user: User | None = Depends(current_user)) -> User | None:
--- a/backend/danswer/background/celery/celery.py
+++ b/backend/danswer/background/celery/celery.py
@@ -1,6 +1,4 @@
-import os
 from datetime import timedelta
-from pathlib import Path
 from typing import cast

 from celery import Celery  # type: ignore
@@ -10,16 +8,14 @@ from danswer.background.connector_deletion import delete_connector_credential_pa
 from danswer.background.task_utils import build_celery_task_wrapper
 from danswer.background.task_utils import name_cc_cleanup_task
 from danswer.background.task_utils import name_document_set_sync_task
-from danswer.configs.app_configs import FILE_CONNECTOR_TMP_STORAGE_PATH
 from danswer.configs.app_configs import JOB_TIMEOUT
-from danswer.connectors.file.utils import file_age_in_hours
 from danswer.db.connector_credential_pair import get_connector_credential_pair
 from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
 from danswer.db.document import prepare_to_modify_documents
 from danswer.db.document_set import delete_document_set
 from danswer.db.document_set import fetch_document_sets
 from danswer.db.document_set import fetch_document_sets_for_documents
-from danswer.db.document_set import fetch_documents_for_document_set
+from danswer.db.document_set import fetch_documents_for_document_set_paginated
 from danswer.db.document_set import get_document_set_by_id
 from danswer.db.document_set import mark_document_set_as_synced
 from danswer.db.engine import build_connection_string
@@ -28,20 +24,20 @@ from danswer.db.engine import SYNC_DB_API
 from danswer.db.models import DocumentSet
 from danswer.db.tasks import check_live_task_not_timed_out
 from danswer.db.tasks import get_latest_task
+from danswer.document_index.document_index_utils import get_both_index_names
 from danswer.document_index.factory import get_default_document_index
-from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.interfaces import UpdateRequest
-from danswer.utils.batching import batch_generator
 from danswer.utils.logger import setup_logger

 logger = setup_logger()

-celery_broker_url = "sqla+" + build_connection_string(db_api=SYNC_DB_API)
-celery_backend_url = "db+" + build_connection_string(db_api=SYNC_DB_API)
+connection_string = build_connection_string(db_api=SYNC_DB_API)
+celery_broker_url = f"sqla+{connection_string}"
+celery_backend_url = f"db+{connection_string}"
 celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)


-_SYNC_BATCH_SIZE = 1000
+_SYNC_BATCH_SIZE = 100


 #####
@@ -66,20 +62,25 @@ def cleanup_connector_credential_pair_task(
            connector_id=connector_id,
            credential_id=credential_id,
        )
-        if not cc_pair or not check_deletion_attempt_is_allowed(
-            connector_credential_pair=cc_pair
-        ):
+        if not cc_pair:
            raise ValueError(
-                "Cannot run deletion attempt - connector_credential_pair is not deletable. "
-                "This is likely because there is an ongoing / planned indexing attempt OR the "
-                "connector is not disabled."
+                f"Cannot run deletion attempt - connector_credential_pair with Connector ID: "
+                f"{connector_id} and Credential ID: {credential_id} does not exist."
            )

+        deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed(cc_pair)
+        if deletion_attempt_disallowed_reason:
+            raise ValueError(deletion_attempt_disallowed_reason)
+
        try:
            # The bulk of the work is in here, updates Postgres and Vespa
+            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
+            document_index = get_default_document_index(
+                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
+            )
            return delete_connector_credential_pair(
                db_session=db_session,
-                document_index=get_default_document_index(),
+                document_index=document_index,
                cc_pair=cc_pair,
            )
        except Exception as e:
@@ -93,17 +94,13 @@ def sync_document_set_task(document_set_id: int) -> None:
    """For document sets marked as not up to date, sync the state from postgres
    into the datastore. Also handles deletions."""

-    def _sync_document_batch(
-        document_ids: list[str], document_index: DocumentIndex
-    ) -> None:
+    def _sync_document_batch(document_ids: list[str], db_session: Session) -> None:
        logger.debug(f"Syncing document sets for: {document_ids}")
-        # begin a transaction, release lock at the end
-        with Session(get_sqlalchemy_engine()) as db_session:
-            # acquires a lock on the documents so that no other process can modify them
-            prepare_to_modify_documents(
-                db_session=db_session, document_ids=document_ids
-            )

+        # Acquires a lock on the documents so that no other process can modify them
+        with prepare_to_modify_documents(
+            db_session=db_session, document_ids=document_ids
+        ):
            # get current state of document sets for these documents
            document_set_map = {
                document_id: document_sets
@@ -113,31 +110,36 @@ def sync_document_set_task(document_set_id: int) -> None:
            }

            # update Vespa
-            document_index.update(
-                update_requests=[
-                    UpdateRequest(
-                        document_ids=[document_id],
-                        document_sets=set(document_set_map.get(document_id, [])),
-                    )
-                    for document_id in document_ids
-                ]
+            curr_ind_name, sec_ind_name = get_both_index_names(db_session)
+            document_index = get_default_document_index(
+                primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name
            )
+            update_requests = [
+                UpdateRequest(
+                    document_ids=[document_id],
+                    document_sets=set(document_set_map.get(document_id, [])),
+                )
+                for document_id in document_ids
+            ]
+            document_index.update(update_requests=update_requests)

    with Session(get_sqlalchemy_engine()) as db_session:
        try:
-            document_index = get_default_document_index()
-            documents_to_update = fetch_documents_for_document_set(
-                document_set_id=document_set_id,
-                db_session=db_session,
-                current_only=False,
-            )
-            for document_batch in batch_generator(
-                documents_to_update, _SYNC_BATCH_SIZE
-            ):
+            cursor = None
+            while True:
+                document_batch, cursor = fetch_documents_for_document_set_paginated(
+                    document_set_id=document_set_id,
+                    db_session=db_session,
+                    current_only=False,
+                    last_document_id=cursor,
+                    limit=_SYNC_BATCH_SIZE,
+                )
                _sync_document_batch(
                    document_ids=[document.id for document in document_batch],
-                    document_index=document_index,
+                    db_session=db_session,
                )
+                if cursor is None:
+                    break

            # if there are no connectors, then delete the document set. Otherwise, just
            # mark it as successfully synced.
@@ -178,7 +180,7 @@ def check_for_document_sets_sync_task() -> None:
    with Session(get_sqlalchemy_engine()) as db_session:
        # check if any document sets are not synced
        document_set_info = fetch_document_sets(
-            db_session=db_session, include_outdated=True
+            user_id=None, db_session=db_session, include_outdated=True
        )
        for document_set, _ in document_set_info:
            if not document_set.is_up_to_date:
@@ -199,19 +201,6 @@ def check_for_document_sets_sync_task() -> None:
                )


-@celery_app.task(name="clean_old_temp_files_task", soft_time_limit=JOB_TIMEOUT)
-def clean_old_temp_files_task(
-    age_threshold_in_hours: float | int = 24 * 7,  # 1 week,
-    base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH,
-) -> None:
-    """Files added via the File connector need to be deleted after ingestion
-    Currently handled async of the indexing job"""
-    os.makedirs(base_path, exist_ok=True)
-    for file in os.listdir(base_path):
-        if file_age_in_hours(file) > age_threshold_in_hours:
-            os.remove(Path(base_path) / file)
-
-
 #####
 # Celery Beat (Periodic Tasks) Settings
 #####
@@ -220,8 +209,4 @@ celery_app.conf.beat_schedule = {
        "task": "check_for_document_sets_sync_task",
        "schedule": timedelta(seconds=5),
    },
-    "clean-old-temp-files": {
-        "task": "clean_old_temp_files_task",
-        "schedule": timedelta(minutes=30),
-    },
 }
--- a/backend/danswer/background/celery/celery_utils.py
+++ b/backend/danswer/background/celery/celery_utils.py
@@ -2,7 +2,7 @@ from sqlalchemy.orm import Session

 from danswer.background.task_utils import name_cc_cleanup_task
 from danswer.db.tasks import get_latest_task
-from danswer.server.models import DeletionAttemptSnapshot
+from danswer.server.documents.models import DeletionAttemptSnapshot


 def get_deletion_status(
--- a/backend/danswer/background/connector_deletion.py
+++ b/backend/danswer/background/connector_deletion.py
@@ -11,8 +11,6 @@ connector / credential pair from the access list
 (6) delete all relevant entries from postgres
 """
 import time
-from collections.abc import Callable
-from typing import cast

 from sqlalchemy.orm import Session

@@ -21,8 +19,8 @@ from danswer.db.connector import fetch_connector_by_id
 from danswer.db.connector_credential_pair import (
    delete_connector_credential_pair__no_commit,
 )
-from danswer.db.document import delete_document_by_connector_credential_pair
-from danswer.db.document import delete_documents_complete
+from danswer.db.document import delete_document_by_connector_credential_pair__no_commit
+from danswer.db.document import delete_documents_complete__no_commit
 from danswer.db.document import get_document_connector_cnts
 from danswer.db.document import get_documents_for_connector_credential_pair
 from danswer.db.document import prepare_to_modify_documents
@@ -35,9 +33,8 @@ from danswer.db.index_attempt import delete_index_attempts
 from danswer.db.models import ConnectorCredentialPair
 from danswer.document_index.interfaces import DocumentIndex
 from danswer.document_index.interfaces import UpdateRequest
-from danswer.server.models import ConnectorCredentialPairIdentifier
+from danswer.server.documents.models import ConnectorCredentialPairIdentifier
 from danswer.utils.logger import setup_logger
-from danswer.utils.variable_functionality import fetch_versioned_implementation

 logger = setup_logger()

@@ -50,56 +47,65 @@ def _delete_connector_credential_pair_batch(
    credential_id: int,
    document_index: DocumentIndex,
 ) -> None:
+    """
+    Removes a batch of documents ids from a cc-pair. If no other cc-pair uses a document anymore
+    it gets permanently deleted.
+    """
    with Session(get_sqlalchemy_engine()) as db_session:
        # acquire lock for all documents in this batch so that indexing can't
        # override the deletion
-        prepare_to_modify_documents(db_session=db_session, document_ids=document_ids)
-
-        document_connector_cnts = get_document_connector_cnts(
+        with prepare_to_modify_documents(
            db_session=db_session, document_ids=document_ids
-        )
-
-        # figure out which docs need to be completely deleted
-        document_ids_to_delete = [
-            document_id for document_id, cnt in document_connector_cnts if cnt == 1
-        ]
-        logger.debug(f"Deleting documents: {document_ids_to_delete}")
-        document_index.delete(doc_ids=document_ids_to_delete)
-        delete_documents_complete(
-            db_session=db_session,
-            document_ids=document_ids_to_delete,
-        )
-
-        # figure out which docs need to be updated
-        document_ids_to_update = [
-            document_id for document_id, cnt in document_connector_cnts if cnt > 1
-        ]
-        access_for_documents = get_access_for_documents(
-            document_ids=document_ids_to_update,
-            db_session=db_session,
-            cc_pair_to_delete=ConnectorCredentialPairIdentifier(
-                connector_id=connector_id,
-                credential_id=credential_id,
-            ),
-        )
-        update_requests = [
-            UpdateRequest(
-                document_ids=[document_id],
-                access=access,
+        ):
+            document_connector_cnts = get_document_connector_cnts(
+                db_session=db_session, document_ids=document_ids
            )
-            for document_id, access in access_for_documents.items()
-        ]
-        logger.debug(f"Updating documents: {document_ids_to_update}")
-        document_index.update(update_requests=update_requests)
-        delete_document_by_connector_credential_pair(
-            db_session=db_session,
-            document_ids=document_ids_to_update,
-            connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
-                connector_id=connector_id,
-                credential_id=credential_id,
-            ),
-        )
-        db_session.commit()
+
+            # figure out which docs need to be completely deleted
+            document_ids_to_delete = [
+                document_id for document_id, cnt in document_connector_cnts if cnt == 1
+            ]
+            logger.debug(f"Deleting documents: {document_ids_to_delete}")
+
+            document_index.delete(doc_ids=document_ids_to_delete)
+
+            delete_documents_complete__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_delete,
+            )
+
+            # figure out which docs need to be updated
+            document_ids_to_update = [
+                document_id for document_id, cnt in document_connector_cnts if cnt > 1
+            ]
+            access_for_documents = get_access_for_documents(
+                document_ids=document_ids_to_update,
+                db_session=db_session,
+                cc_pair_to_delete=ConnectorCredentialPairIdentifier(
+                    connector_id=connector_id,
+                    credential_id=credential_id,
+                ),
+            )
+            update_requests = [
+                UpdateRequest(
+                    document_ids=[document_id],
+                    access=access,
+                )
+                for document_id, access in access_for_documents.items()
+            ]
+            logger.debug(f"Updating documents: {document_ids_to_update}")
+
+            document_index.update(update_requests=update_requests)
+
+            delete_document_by_connector_credential_pair__no_commit(
+                db_session=db_session,
+                document_ids=document_ids_to_update,
+                connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
+                    connector_id=connector_id,
+                    credential_id=credential_id,
+                ),
+            )
+            db_session.commit()


 def cleanup_synced_entities(
@@ -173,14 +179,8 @@ def delete_connector_credential_pair(

    # Clean up document sets / access information from Postgres
    # and sync these updates to Vespa
-    cleanup_synced_entities__versioned = cast(
-        Callable[[ConnectorCredentialPair, Session], None],
-        fetch_versioned_implementation(
-            "danswer.background.connector_deletion",
-            "cleanup_synced_entities",
-        ),
-    )
-    cleanup_synced_entities__versioned(cc_pair, db_session)
+    # TODO: add user group cleanup with `fetch_versioned_implementation`
+    cleanup_synced_entities(cc_pair, db_session)

    # clean up the rest of the related Postgres entities
    delete_index_attempts(
--- a/backend/danswer/background/indexing/checkpointing.py
+++ b/backend/danswer/background/indexing/checkpointing.py
@@ -0,0 +1,80 @@
+"""Experimental functionality related to splitting up indexing
+into a series of checkpoints to better handle intermittent failures
+/ jobs being killed by cloud providers."""
+import datetime
+
+from danswer.configs.app_configs import EXPERIMENTAL_CHECKPOINTING_ENABLED
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc
+
+
+def _2010_dt() -> datetime.datetime:
+    return datetime.datetime(year=2010, month=1, day=1, tzinfo=datetime.timezone.utc)
+
+
+def _2020_dt() -> datetime.datetime:
+    return datetime.datetime(year=2020, month=1, day=1, tzinfo=datetime.timezone.utc)
+
+
+def _default_end_time(
+    last_successful_run: datetime.datetime | None,
+) -> datetime.datetime:
+    """If year is before 2010, go to the beginning of 2010.
+    If year is 2010-2020, go in 5 year increments.
+    If year > 2020, then go in 180 day increments.
+
+    For connectors that don't support a `filter_by` and instead rely on `sort_by`
+    for polling, then this will cause a massive duplication of fetches. For these
+    connectors, you may want to override this function to return a more reasonable
+    plan (e.g. extending the 2020+ windows to 6 months, 1 year, or higher)."""
+    last_successful_run = (
+        datetime_to_utc(last_successful_run) if last_successful_run else None
+    )
+    if last_successful_run is None or last_successful_run < _2010_dt():
+        return _2010_dt()
+
+    if last_successful_run < _2020_dt():
+        return min(last_successful_run + datetime.timedelta(days=365 * 5), _2020_dt())
+
+    return last_successful_run + datetime.timedelta(days=180)
+
+
+def find_end_time_for_indexing_attempt(
+    last_successful_run: datetime.datetime | None,
+    # source_type can be used to override the default for certain connectors, currently unused
+    source_type: DocumentSource,
+) -> datetime.datetime | None:
+    """Is the current time unless the connector is run over a large period, in which case it is
+    split up into large time segments that become smaller as it approaches the present
+    """
+    # NOTE: source_type can be used to override the default for certain connectors
+    end_of_window = _default_end_time(last_successful_run)
+    now = datetime.datetime.now(tz=datetime.timezone.utc)
+    if end_of_window < now:
+        return end_of_window
+
+    # None signals that we should index up to current time
+    return None
+
+
+def get_time_windows_for_index_attempt(
+    last_successful_run: datetime.datetime, source_type: DocumentSource
+) -> list[tuple[datetime.datetime, datetime.datetime]]:
+    if not EXPERIMENTAL_CHECKPOINTING_ENABLED:
+        return [(last_successful_run, datetime.datetime.now(tz=datetime.timezone.utc))]
+
+    time_windows: list[tuple[datetime.datetime, datetime.datetime]] = []
+    start_of_window: datetime.datetime | None = last_successful_run
+    while start_of_window:
+        end_of_window = find_end_time_for_indexing_attempt(
+            last_successful_run=start_of_window, source_type=source_type
+        )
+        time_windows.append(
+            (
+                start_of_window,
+                end_of_window or datetime.datetime.now(tz=datetime.timezone.utc),
+            )
+        )
+        start_of_window = end_of_window
+
+    return time_windows
--- a/backend/danswer/background/indexing/dask_utils.py
+++ b/backend/danswer/background/indexing/dask_utils.py
@@ -0,0 +1,33 @@
+import asyncio
+
+import psutil
+from dask.distributed import WorkerPlugin
+from distributed import Worker
+
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class ResourceLogger(WorkerPlugin):
+    def __init__(self, log_interval: int = 60 * 5):
+        self.log_interval = log_interval
+
+    def setup(self, worker: Worker) -> None:
+        """This method will be called when the plugin is attached to a worker."""
+        self.worker = worker
+        worker.loop.add_callback(self.log_resources)
+
+    async def log_resources(self) -> None:
+        """Periodically log CPU and memory usage.
+
+        NOTE: must be async or else will clog up the worker indefinitely due to the fact that
+        Dask uses Tornado under the hood (which is async)"""
+        while True:
+            cpu_percent = psutil.cpu_percent(interval=None)
+            memory_available_gb = psutil.virtual_memory().available / (1024.0**3)
+            # You can now log these values or send them to a monitoring service
+            logger.debug(
+                f"Worker {self.worker.address}: CPU usage {cpu_percent}%, Memory available {memory_available_gb}GB"
+            )
+            await asyncio.sleep(self.log_interval)
--- a/Show More
+++ b/Show More