Remove comment

Handle error and log
2026-02-16 23:35:46 +00:00 · 2026-01-08 19:21:24 -08:00 · 2026-01-08 19:21:08 -08:00 · 2026-01-08 19:20:28 -08:00 · 2026-01-08 19:16:12 -08:00 · 2026-01-08 17:29:00 -08:00
1829 changed files with 155631 additions and 80786 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,8 @@
+# Exclude these commits from git blame (e.g. mass reformatting).
+# These are ignored by GitHub automatically.
+# To enable this locally, run:
+#
+#    git config blame.ignoreRevsFile .git-blame-ignore-revs
+
+3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)
+ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,10 @@
 * @onyx-dot-app/onyx-core-team
 # Helm charts Owners
 /helm/ @justin-tahara
+
+# Web standards updates
+/web/STANDARDS.md @raunakab @Weves
+
+# Agent context files
+/CLAUDE.md.template @Weves
+/AGENTS.md.template @Weves
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -17,6 +17,7 @@ self-hosted-runner:
    - runner=16cpu-linux-x64
    - ubuntu-slim # Currently in public preview
    - volume=40gb
+    - volume=50gb

 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -1,135 +0,0 @@
-name: 'Build and Push Docker Image with Retry'
-description: 'Attempts to build and push a Docker image, with a retry on failure'
-inputs:
-  context:
-    description: 'Build context'
-    required: true
-  file:
-    description: 'Dockerfile location'
-    required: true
-  platforms:
-    description: 'Target platforms'
-    required: true
-  pull:
-    description: 'Always attempt to pull a newer version of the image'
-    required: false
-    default: 'true'
-  push:
-    description: 'Push the image to registry'
-    required: false
-    default: 'true'
-  load:
-    description: 'Load the image into Docker daemon'
-    required: false
-    default: 'true'
-  tags:
-    description: 'Image tags'
-    required: true
-  no-cache:
-    description: 'Read from cache'
-    required: false
-    default: 'false'
-  cache-from:
-    description: 'Cache sources'
-    required: false
-  cache-to:
-    description: 'Cache destinations'
-    required: false
-  outputs:
-    description: 'Output destinations'
-    required: false
-  provenance:
-    description: 'Generate provenance attestation'
-    required: false
-    default: 'false'
-  build-args:
-    description: 'Build arguments'
-    required: false
-  retry-wait-time:
-    description: 'Time to wait before attempt 2 in seconds'
-    required: false
-    default: '60'
-  retry-wait-time-2:
-    description: 'Time to wait before attempt 3 in seconds'
-    required: false
-    default: '120'
-
-runs:
-  using: "composite"
-  steps:
-    - name: Build and push Docker image (Attempt 1 of 3)
-      id: buildx1
-      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-      continue-on-error: true
-      with:
-        context: ${{ inputs.context }}
-        file: ${{ inputs.file }}
-        platforms: ${{ inputs.platforms }}
-        pull: ${{ inputs.pull }}
-        push: ${{ inputs.push }}
-        load: ${{ inputs.load }}
-        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
-        cache-from: ${{ inputs.cache-from }}
-        cache-to: ${{ inputs.cache-to }}
-        outputs: ${{ inputs.outputs }}
-        provenance: ${{ inputs.provenance }}
-        build-args: ${{ inputs.build-args }}
-
-    - name: Wait before attempt 2
-      if: steps.buildx1.outcome != 'success'
-      run: |
-        echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
-        sleep ${{ inputs.retry-wait-time }}
-      shell: bash
-
-    - name: Build and push Docker image (Attempt 2 of 3)
-      id: buildx2
-      if: steps.buildx1.outcome != 'success'
-      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-      with:
-        context: ${{ inputs.context }}
-        file: ${{ inputs.file }}
-        platforms: ${{ inputs.platforms }}
-        pull: ${{ inputs.pull }}
-        push: ${{ inputs.push }}
-        load: ${{ inputs.load }}
-        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
-        cache-from: ${{ inputs.cache-from }}
-        cache-to: ${{ inputs.cache-to }}
-        outputs: ${{ inputs.outputs }}
-        provenance: ${{ inputs.provenance }}
-        build-args: ${{ inputs.build-args }}
-
-    - name: Wait before attempt 3
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      run: |
-        echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
-        sleep ${{ inputs.retry-wait-time-2 }}
-      shell: bash
-
-    - name: Build and push Docker image (Attempt 3 of 3)
-      id: buildx3
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
-      uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
-      with:
-        context: ${{ inputs.context }}
-        file: ${{ inputs.file }}
-        platforms: ${{ inputs.platforms }}
-        pull: ${{ inputs.pull }}
-        push: ${{ inputs.push }}
-        load: ${{ inputs.load }}
-        tags: ${{ inputs.tags }}
-        no-cache: ${{ inputs.no-cache }}
-        cache-from: ${{ inputs.cache-from }}
-        cache-to: ${{ inputs.cache-to }}
-        outputs: ${{ inputs.outputs }}
-        provenance: ${{ inputs.provenance }}
-        build-args: ${{ inputs.build-args }}
-
-    - name: Report failure
-      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
-      run: |
-        echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
-      shell: bash
--- a/.github/actions/prepare-build/action.yml
+++ b/.github/actions/prepare-build/action.yml
@@ -1,42 +0,0 @@
-name: "Prepare Build (OpenAPI generation)"
-description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
-inputs:
-  docker-username:
-    required: true
-  docker-password:
-    required: true
-runs:
-  using: "composite"
-  steps:
-    - name: Setup Python and Install Dependencies
-      uses: ./.github/actions/setup-python-and-install-dependencies
-
-    - name: Generate OpenAPI schema
-      shell: bash
-      working-directory: backend
-      env:
-        PYTHONPATH: "."
-      run: |
-        python scripts/onyx_openapi_schema.py --filename generated/openapi.json
-
-    # needed for pulling openapitools/openapi-generator-cli
-    # otherwise, we hit the "Unauthenticated users" limit
-    # https://docs.docker.com/docker-hub/usage/
-    - name: Login to Docker Hub
-      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
-      with:
-        username: ${{ inputs['docker-username'] }}
-        password: ${{ inputs['docker-password'] }}
-
-    - name: Generate OpenAPI Python client
-      shell: bash
-      run: |
-        docker run --rm \
-          -v "${{ github.workspace }}/backend/generated:/local" \
-          openapitools/openapi-generator-cli generate \
-          -i /local/openapi.json \
-          -g python \
-          -o /local/onyx_openapi_client \
-          --package-name onyx_openapi_client \
-          --skip-validate-spec \
-          --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
--- a/.github/actions/setup-playwright/action.yml
+++ b/.github/actions/setup-playwright/action.yml
@@ -7,9 +7,9 @@ runs:
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/ms-playwright
-        key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
+        key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
        restore-keys: |
-          ${{ runner.os }}-playwright-
+          ${{ runner.os }}-${{ runner.arch }}-playwright-

    - name: Install playwright
      shell: bash
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -1,22 +1,45 @@
 name: "Setup Python and Install Dependencies"
 description: "Sets up Python with uv and installs deps"
+inputs:
+  requirements:
+    description: "Newline-separated list of requirement files to install (relative to repo root)"
+    required: true
 runs:
  using: "composite"
  steps:
-    - name: Setup uv
-      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
-      # TODO: Enable caching once there is a uv.lock file checked in.
-      # with:
-      #   enable-cache: true
+    - name: Compute requirements hash
+      id: req-hash
+      shell: bash
+      env:
+        REQUIREMENTS: ${{ inputs.requirements }}
+      run: |
+        # Hash the contents of the specified requirement files
+        hash=""
+        while IFS= read -r req; do
+          if [ -n "$req" ] && [ -f "$req" ]; then
+            hash="$hash$(sha256sum "$req")"
+          fi
+        done <<< "$REQUIREMENTS"
+        echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"

+    # NOTE: This comes before Setup uv since clean-ups run in reverse chronological order
+    # such that Setup uv's prune-cache is able to prune the cache before we upload.
    - name: Cache uv cache directory
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
        path: ~/.cache/uv
-        key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
+        key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
        restore-keys: |
          ${{ runner.os }}-uv-

+    - name: Setup uv
+      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
+      with:
+        version: "0.9.9"
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
    - name: Setup Python
      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
      with:
@@ -24,15 +47,30 @@ runs:

    - name: Create virtual environment
      shell: bash
-      run: |
-        uv venv ${{ runner.temp }}/venv
-        echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
-        echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH
+      env:
+        VENV_DIR: ${{ runner.temp }}/venv
+      run: | # zizmor: ignore[github-env]
+        uv venv "$VENV_DIR"
+        # Validate path before adding to GITHUB_PATH to prevent code injection
+        if [ -d "$VENV_DIR/bin" ]; then
+          realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
+        else
+          echo "Error: $VENV_DIR/bin does not exist"
+          exit 1
+        fi

    - name: Install Python dependencies with uv
      shell: bash
+      env:
+        REQUIREMENTS: ${{ inputs.requirements }}
      run: |
-        uv pip install \
-          -r backend/requirements/default.txt \
-          -r backend/requirements/dev.txt \
-          -r backend/requirements/model_server.txt
+        # Build the uv pip install command with each requirement file as array elements
+        cmd=("uv" "pip" "install")
+        while IFS= read -r req; do
+          # Skip empty lines
+          if [ -n "$req" ]; then
+            cmd+=("-r" "$req")
+          fi
+        done <<< "$REQUIREMENTS"
+        echo "Running: ${cmd[*]}"
+        "${cmd[@]}"
--- a/.github/actions/slack-notify/action.yml
+++ b/.github/actions/slack-notify/action.yml
@@ -21,26 +21,27 @@ runs:
      shell: bash
      env:
        SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
+        FAILED_JOBS: ${{ inputs.failed-jobs }}
+        TITLE: ${{ inputs.title }}
+        REF_NAME: ${{ inputs.ref-name }}
+        REPO: ${{ github.repository }}
+        WORKFLOW: ${{ github.workflow }}
+        RUN_NUMBER: ${{ github.run_number }}
+        RUN_ID: ${{ github.run_id }}
+        SERVER_URL: ${{ github.server_url }}
+        GITHUB_REF_NAME: ${{ github.ref_name }}
      run: |
        if [ -z "$SLACK_WEBHOOK_URL" ]; then
          echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
          exit 0
        fi

-        # Get inputs with defaults
-        FAILED_JOBS="${{ inputs.failed-jobs }}"
-        TITLE="${{ inputs.title }}"
-        REF_NAME="${{ inputs.ref-name }}"
-        REPO="${{ github.repository }}"
-        WORKFLOW="${{ github.workflow }}"
-        RUN_NUMBER="${{ github.run_number }}"
-        RUN_ID="${{ github.run_id }}"
-        SERVER_URL="${{ github.server_url }}"
+        # Build workflow URL
        WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"

        # Use ref_name from input or fall back to github.ref_name
        if [ -z "$REF_NAME" ]; then
-          REF_NAME="${{ github.ref_name }}"
+          REF_NAME="$GITHUB_REF_NAME"
        fi

        # Escape JSON special characters
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,6 +4,8 @@ updates:
    directory: "/"
    schedule:
      interval: "weekly"
+    cooldown:
+      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
@@ -13,6 +15,8 @@ updates:
    directory: "/backend"
    schedule:
      interval: "weekly"
+    cooldown:
+      default-days: 7
    open-pull-requests-limit: 3
    assignees:
      - "jmelahman"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,10 +1,10 @@
 ## Description

-[Provide a brief description of the changes in this PR]
+<!--- Provide a brief description of the changes in this PR --->

 ## How Has This Been Tested?

-[Describe the tests you ran to verify your changes]
+<!--- Describe the tests you ran to verify your changes --->

 ## Additional Options

--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -1,27 +0,0 @@
-name: Check Lazy Imports
-concurrency:
-  group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
-  cancel-in-progress: true
-
-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - 'release/**'
-
-jobs:
-  check-lazy-imports:
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-    - name: Set up Python
-      uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
-      with:
-        python-version: '3.11'
-
-    - name: Check lazy imports
-      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -10,11 +10,15 @@ on:
        description: "The version (ie v1.0.0-beta.0) to tag as beta"
        required: true

+permissions:
+  contents: read
+
 jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
+    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -29,13 +33,19 @@ jobs:
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}

      - name: Pull, Tag and Push API Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}

      - name: Pull, Tag and Push Model Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -10,11 +10,15 @@ on:
        description: "The version (ie v0.0.1) to tag as latest"
        required: true

+permissions:
+  contents: read
+
 jobs:
  tag:
    # See https://runs-on.com/runners/linux/
    # use a lower powered instance since this just does i/o to docker hub
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
+    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -29,13 +33,19 @@ jobs:
        run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV

      - name: Pull, Tag and Push Web Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}

      - name: Pull, Tag and Push API Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}

      - name: Pull, Tag and Push Model Server Image
+        env:
+          VERSION: ${{ github.event.inputs.version }}
        run: |
-          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}
+          docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -12,11 +12,13 @@ jobs:
    permissions:
      contents: write
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
      - name: Checkout
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Install Helm CLI
        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
@@ -30,6 +32,7 @@ jobs:
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
+          helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
          helm repo update

      - name: Build chart dependencies
--- a/.github/workflows/merge-group.yml
+++ b/.github/workflows/merge-group.yml
@@ -0,0 +1,31 @@
+name: Merge Group-Specific
+
+on:
+  merge_group:
+
+permissions:
+  contents: read
+
+jobs:
+  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
+  # There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
+  # integration tests. That job runs on both pull_request and merge_group events, and this job
+  # exists solely to provide a fast-passing check with the same name for branch protection.
+  # The actual tests remain enforced on presubmit (pull_request events).
+  required:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - name: Success
+        run: echo "Success"
+  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
+  # There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
+  # the actual playwright tests. That job runs on both pull_request and merge_group events, and
+  # this job exists solely to provide a fast-passing check with the same name for branch protection.
+  # The actual tests remain enforced on presubmit (pull_request events).
+  playwright-required:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - name: Success
+        run: echo "Success"
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -11,8 +11,9 @@ permissions:
 jobs:
  stale:
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
-      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # ratchet:actions/stale@v9
+      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -15,19 +15,25 @@ on:
 permissions:
  actions: read
  contents: read
-  security-events: write

 jobs:
  scan-licenses:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
+    timeout-minutes: 45
+    permissions:
+      actions: read
+      contents: read
+      security-events: write

    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
        with:
          python-version: '3.11'
          cache: 'pip'
@@ -54,7 +60,9 @@ jobs:

      - name: Print report
        if: always()
-        run: echo "${{ steps.license_check_report.outputs.report }}"
+        env:
+          REPORT: ${{ steps.license_check_report.outputs.report }}
+        run: echo "$REPORT"

      - name: Install npm dependencies
        working-directory: ./web
@@ -82,6 +90,7 @@ jobs:
  scan-trivy:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
+    timeout-minutes: 45

    steps:
    - name: Set up Docker Buildx
--- a/.github/workflows/pr-database-tests.yml
+++ b/.github/workflows/pr-database-tests.yml
@@ -0,0 +1,62 @@
+name: Database Tests
+concurrency:
+  group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read
+
+jobs:
+  database-tests:
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-database-tests"
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
+        with:
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Generate OpenAPI schema and Python client
+        shell: bash
+        run: |
+          ods openapi all
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Start Docker containers
+        working-directory: ./deployment/docker_compose
+        run: |
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
+            relational_db
+
+      - name: Run Database Tests
+        working-directory: ./backend
+        run: pytest -m alembic tests/integration/tests/migrations/
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -7,14 +7,22 @@ on:
  merge_group:
  pull_request:
    branches: [main]
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 env:
-  # AWS
-  S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
-  S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
+  # AWS credentials for S3-specific test
+  S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
+  S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}

  # MinIO
  S3_ENDPOINT_URL: "http://localhost:9004"
+  S3_AWS_ACCESS_KEY_ID: "minioadmin"
+  S3_AWS_SECRET_ACCESS_KEY: "minioadmin"

  # Confluence
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
@@ -24,19 +32,31 @@ env:
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

+  # Jira
+  JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
+
  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+  VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
+  VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}
+
+  # Code Interpreter
+  # TODO: debug why this is failing and enable
+  CODE_INTERPRETER_BASE_URL: http://localhost:8000

 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
+    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Discover test directories
        id: set-matrix
@@ -53,6 +73,7 @@ jobs:
      - runner=2cpu-linux-arm64
      - ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=s3-cache
+    timeout-minutes: 45
    strategy:
      fail-fast: false
      matrix:
@@ -61,15 +82,23 @@ jobs:
    env:
      PYTHONPATH: ./backend
      MODEL_SERVER_HOST: "disabled"
+      DISABLE_TELEMETRY: "true"

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
+        with:
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/ee.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright
@@ -83,10 +112,25 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

+      - name: Create .env file for Docker Compose
+        run: |
+          cat <<EOF > deployment/docker_compose/.env
+          CODE_INTERPRETER_BETA_ENABLED=true
+          DISABLE_TELEMETRY=true
+          EOF
+
      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
+          docker compose \
+            -f docker-compose.yml \
+            -f docker-compose.dev.yml \
+            up -d \
+            minio \
+            relational_db \
+            cache \
+            index \
+            code-interpreter

      - name: Run migrations
        run: |
@@ -97,10 +141,39 @@ jobs:

      - name: Run Tests for ${{ matrix.test-dir }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
+        env:
+          TEST_DIR: ${{ matrix.test-dir }}
        run: |
          py.test \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
            --ff \
-            backend/tests/external_dependency_unit/${{ matrix.test-dir }}
+            backend/tests/external_dependency_unit/${TEST_DIR}
+
+      - name: Collect Docker logs on failure
+        if: failure()
+        run: |
+          mkdir -p docker-logs
+          cd deployment/docker_compose
+
+          # Get list of running containers
+          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
+
+          # Collect logs from each container
+          for container in $containers; do
+            container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
+            echo "Collecting logs from $container_name..."
+            docker logs $container > ../../docker-logs/${container_name}.log 2>&1
+          done
+
+          cd ../..
+          echo "Docker logs collected in docker-logs directory"
+
+      - name: Upload Docker logs
+        if: failure()
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
+        with:
+          name: docker-logs-${{ matrix.test-dir }}
+          path: docker-logs/
+          retention-days: 7
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -6,226 +6,245 @@ concurrency:
 on:
  merge_group:
  pull_request:
-    branches: [ main ]
-  workflow_dispatch:  # Allows manual triggering
+    branches: [main]
+  push:
+    tags:
+      - "v*.*.*"
+  workflow_dispatch: # Allows manual triggering
+
+permissions:
+  contents: read

 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
+    runs-on:
+      [
+        runs-on,
+        runner=8cpu-linux-x64,
+        hdd=256,
+        "run-id=${{ github.run_id }}-helm-chart-check",
+      ]
+    timeout-minutes: 45

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
-    - name: Checkout code
-      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-      with:
-        fetch-depth: 0
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          fetch-depth: 0
+          persist-credentials: false

-    - name: Set up Helm
-      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
-      with:
-        version: v3.19.0
+      - name: Set up Helm
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
+        with:
+          version: v3.19.0

-    - name: Set up chart-testing
-      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0
+      - name: Set up chart-testing
+        # NOTE: This is Jamison's patch from https://github.com/helm/chart-testing-action/pull/194
+        uses: helm/chart-testing-action@8958a6ac472cbd8ee9a8fbb6f1acbc1b0e966e44 # zizmor: ignore[impostor-commit]
+        with:
+          uv_version: "0.9.9"

-    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
-    - name: Run chart-testing (list-changed)
-      id: list-changed
-      run: |
-        echo "default_branch: ${{ github.event.repository.default_branch }}"
-        changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
-        echo "list-changed output: $changed"
-        if [[ -n "$changed" ]]; then
-          echo "changed=true" >> "$GITHUB_OUTPUT"
-        fi
-
-    # uncomment to force run chart-testing
-#     - name: Force run chart-testing (list-changed)
-#       id: list-changed
-#       run: echo "changed=true" >> $GITHUB_OUTPUT
-
-    # lint all charts if any changes were detected
-    - name: Run chart-testing (lint)
-      if: steps.list-changed.outputs.changed == 'true'
-      run: ct lint --config ct.yaml --all
-      # the following would lint only changed charts, but linting isn't expensive
-      # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
-
-    - name: Create kind cluster
-      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
-
-    - name: Pre-install cluster status check
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Pre-install Cluster Status ==="
-        kubectl get nodes -o wide
-        kubectl get pods --all-namespaces
-        kubectl get storageclass
-
-    - name: Add Helm repositories and update
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Adding Helm repositories ==="
-        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
-        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-        helm repo add minio https://charts.min.io/
-        helm repo update
-
-    - name: Install Redis operator
-      if: steps.list-changed.outputs.changed == 'true'
-      shell: bash
-      run: |
-        echo "=== Installing redis-operator CRDs ==="
-        helm upgrade --install redis-operator ot-container-kit/redis-operator \
-          --namespace redis-operator --create-namespace --wait --timeout 300s
-
-    - name: Pre-pull required images
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Pre-pulling required images to avoid timeout ==="
-        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
-        echo "Kind cluster: $KIND_CLUSTER"
-
-        IMAGES=(
-          "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
-          "quay.io/opstree/redis:v7.0.15"
-          "docker.io/onyxdotapp/onyx-web-server:latest"
-        )
-
-        for image in "${IMAGES[@]}"; do
-          echo "Pre-pulling $image"
-          if docker pull "$image"; then
-            kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
-          else
-            echo "Failed to pull $image"
+      # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
+      - name: Run chart-testing (list-changed)
+        id: list-changed
+        env:
+          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+        run: |
+          echo "default_branch: ${DEFAULT_BRANCH}"
+          changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
+          echo "list-changed output: $changed"
+          if [[ -n "$changed" ]]; then
+            echo "changed=true" >> "$GITHUB_OUTPUT"
          fi
-        done

-        echo "=== Images loaded into Kind cluster ==="
-        docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
+      # uncomment to force run chart-testing
+      #     - name: Force run chart-testing (list-changed)
+      #       id: list-changed
+      #       run: echo "changed=true" >> $GITHUB_OUTPUT
+      # lint all charts if any changes were detected
+      - name: Run chart-testing (lint)
+        if: steps.list-changed.outputs.changed == 'true'
+        run: ct lint --config ct.yaml --all
+        # the following would lint only changed charts, but linting isn't expensive
+        # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}

-    - name: Validate chart dependencies
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Validating chart dependencies ==="
-        cd deployment/helm/charts/onyx
-        helm dependency update
-        helm lint .
+      - name: Create kind cluster
+        if: steps.list-changed.outputs.changed == 'true'
+        uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

-    - name: Run chart-testing (install) with enhanced monitoring
-      timeout-minutes: 25
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Starting chart installation with monitoring ==="
+      - name: Pre-install cluster status check
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Pre-install Cluster Status ==="
+          kubectl get nodes -o wide
+          kubectl get pods --all-namespaces
+          kubectl get storageclass

-        # Function to monitor cluster state
-        monitor_cluster() {
-          while true; do
-            echo "=== Cluster Status Check at $(date) ==="
-            # Only show non-running pods to reduce noise
-            NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
-            if [ "$NON_RUNNING_PODS" -gt 0 ]; then
-              echo "Non-running pods:"
-              kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+      - name: Add Helm repositories and update
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Adding Helm repositories ==="
+          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
+          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
+          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
+          helm repo add minio https://charts.min.io/
+          helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
+          helm repo update
+
+      - name: Install Redis operator
+        if: steps.list-changed.outputs.changed == 'true'
+        shell: bash
+        run: |
+          echo "=== Installing redis-operator CRDs ==="
+          helm upgrade --install redis-operator ot-container-kit/redis-operator \
+            --namespace redis-operator --create-namespace --wait --timeout 300s
+
+      - name: Pre-pull required images
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Pre-pulling required images to avoid timeout ==="
+          KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
+          echo "Kind cluster: $KIND_CLUSTER"
+
+          IMAGES=(
+            "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
+            "quay.io/opstree/redis:v7.0.15"
+            "docker.io/onyxdotapp/onyx-web-server:latest"
+          )
+
+          for image in "${IMAGES[@]}"; do
+            echo "Pre-pulling $image"
+            if docker pull "$image"; then
+              kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
            else
-              echo "All pods running successfully"
+              echo "Failed to pull $image"
            fi
-            # Only show recent events if there are issues
-            RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
-            if [ -n "$RECENT_EVENTS" ]; then
-              echo "Recent warnings/errors:"
-              echo "$RECENT_EVENTS"
-            fi
-            sleep 60
          done
-        }

-        # Start monitoring in background
-        monitor_cluster &
-        MONITOR_PID=$!
+          echo "=== Images loaded into Kind cluster ==="
+          docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."

-        # Set up cleanup
-        cleanup() {
-          echo "=== Cleaning up monitoring process ==="
-          kill $MONITOR_PID 2>/dev/null || true
+      - name: Validate chart dependencies
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Validating chart dependencies ==="
+          cd deployment/helm/charts/onyx
+          helm dependency update
+          helm lint .
+
+      - name: Run chart-testing (install) with enhanced monitoring
+        timeout-minutes: 25
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Starting chart installation with monitoring ==="
+
+          # Function to monitor cluster state
+          monitor_cluster() {
+            while true; do
+              echo "=== Cluster Status Check at $(date) ==="
+              # Only show non-running pods to reduce noise
+              NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
+              if [ "$NON_RUNNING_PODS" -gt 0 ]; then
+                echo "Non-running pods:"
+                kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+              else
+                echo "All pods running successfully"
+              fi
+              # Only show recent events if there are issues
+              RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
+              if [ -n "$RECENT_EVENTS" ]; then
+                echo "Recent warnings/errors:"
+                echo "$RECENT_EVENTS"
+              fi
+              sleep 60
+            done
+          }
+
+          # Start monitoring in background
+          monitor_cluster &
+          MONITOR_PID=$!
+
+          # Set up cleanup
+          cleanup() {
+            echo "=== Cleaning up monitoring process ==="
+            kill $MONITOR_PID 2>/dev/null || true
+            echo "=== Final cluster state ==="
+            kubectl get pods --all-namespaces
+            kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
+          }
+
+          # Trap cleanup on exit
+          trap cleanup EXIT
+
+          # Run the actual installation with detailed logging
+          echo "=== Starting ct install ==="
+          set +e
+          ct install --all \
+            --helm-extra-set-args="\
+              --set=nginx.enabled=false \
+              --set=minio.enabled=false \
+              --set=vespa.enabled=false \
+              --set=slackbot.enabled=false \
+              --set=postgresql.enabled=true \
+              --set=postgresql.nameOverride=cloudnative-pg \
+              --set=postgresql.cluster.storage.storageClass=standard \
+              --set=redis.enabled=true \
+              --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
+              --set=webserver.replicaCount=1 \
+              --set=api.replicaCount=0 \
+              --set=inferenceCapability.replicaCount=0 \
+              --set=indexCapability.replicaCount=0 \
+              --set=celery_beat.replicaCount=0 \
+              --set=celery_worker_heavy.replicaCount=0 \
+              --set=celery_worker_docfetching.replicaCount=0 \
+              --set=celery_worker_docprocessing.replicaCount=0 \
+              --set=celery_worker_light.replicaCount=0 \
+              --set=celery_worker_monitoring.replicaCount=0 \
+              --set=celery_worker_primary.replicaCount=0 \
+              --set=celery_worker_user_file_processing.replicaCount=0 \
+              --set=celery_worker_user_files_indexing.replicaCount=0" \
+            --helm-extra-args="--timeout 900s --debug" \
+            --debug --config ct.yaml
+          CT_EXIT=$?
+          set -e
+
+          if [[ $CT_EXIT -ne 0 ]]; then
+            echo "ct install failed with exit code $CT_EXIT"
+            exit $CT_EXIT
+          else
+            echo "=== Installation completed successfully ==="
+          fi
+
+          kubectl get pods --all-namespaces
+
+      - name: Post-install verification
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Post-install verification ==="
+          kubectl get pods --all-namespaces
+          kubectl get services --all-namespaces
+          # Only show issues if they exist
+          kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
+
+      - name: Cleanup on failure
+        if: failure() && steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Cleanup on failure ==="
          echo "=== Final cluster state ==="
          kubectl get pods --all-namespaces
-          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
-        }
+          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10

-        # Trap cleanup on exit
-        trap cleanup EXIT
+          echo "=== Pod descriptions for debugging ==="
+          kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"

-        # Run the actual installation with detailed logging
-        echo "=== Starting ct install ==="
-        set +e
-        ct install --all \
-          --helm-extra-set-args="\
-            --set=nginx.enabled=false \
-            --set=minio.enabled=false \
-            --set=vespa.enabled=false \
-            --set=slackbot.enabled=false \
-            --set=postgresql.enabled=true \
-            --set=postgresql.nameOverride=cloudnative-pg \
-            --set=postgresql.cluster.storage.storageClass=standard \
-            --set=redis.enabled=true \
-            --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
-            --set=webserver.replicaCount=1 \
-            --set=api.replicaCount=0 \
-            --set=inferenceCapability.replicaCount=0 \
-            --set=indexCapability.replicaCount=0 \
-            --set=celery_beat.replicaCount=0 \
-            --set=celery_worker_heavy.replicaCount=0 \
-            --set=celery_worker_docfetching.replicaCount=0 \
-            --set=celery_worker_docprocessing.replicaCount=0 \
-            --set=celery_worker_light.replicaCount=0 \
-            --set=celery_worker_monitoring.replicaCount=0 \
-            --set=celery_worker_primary.replicaCount=0 \
-            --set=celery_worker_user_file_processing.replicaCount=0 \
-            --set=celery_worker_user_files_indexing.replicaCount=0" \
-          --helm-extra-args="--timeout 900s --debug" \
-          --debug --config ct.yaml
-        CT_EXIT=$?
-        set -e
+          echo "=== Recent logs for debugging ==="
+          kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

-        if [[ $CT_EXIT -ne 0 ]]; then
-          echo "ct install failed with exit code $CT_EXIT"
-          exit $CT_EXIT
-        else
-          echo "=== Installation completed successfully ==="
-        fi
-
-        kubectl get pods --all-namespaces
-
-    - name: Post-install verification
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Post-install verification ==="
-        kubectl get pods --all-namespaces
-        kubectl get services --all-namespaces
-        # Only show issues if they exist
-        kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
-
-    - name: Cleanup on failure
-      if: failure() && steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Cleanup on failure ==="
-        echo "=== Final cluster state ==="
-        kubectl get pods --all-namespaces
-        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
-
-        echo "=== Pod descriptions for debugging ==="
-        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
-
-        echo "=== Recent logs for debugging ==="
-        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
-
-        echo "=== Helm releases ==="
-        helm list --all-namespaces
-      # the following would install only changed charts, but we only have one chart so
-      # don't worry about that for now
-      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
+          echo "=== Helm releases ==="
+          helm list --all-namespaces
+        # the following would install only changed charts, but we only have one chart so
+        # don't worry about that for now
+        # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -9,6 +9,12 @@ on:
    branches:
      - main
      - "release/**"
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 env:
  # Test Environment Variables
@@ -27,22 +33,30 @@ env:
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
+  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
+  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
+  GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
+  GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
+  GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}

 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
+    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Discover test directories
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -58,13 +72,35 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-
  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -85,17 +121,46 @@ jobs:
          file: ./backend/Dockerfile
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
+            type=registry,ref=onyxdotapp/onyx-backend:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-
  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -116,16 +181,31 @@ jobs:
          file: ./backend/Dockerfile.model_server
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
-
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
+            type=registry,ref=onyxdotapp/onyx-model-server:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

  build-integration-image:
-    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -139,11 +219,42 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+
      - name: Build and push integration test image with Docker Bake
        env:
-          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
          TAG: integration-test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --push integration
+          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        run: |
+          docker buildx bake --push \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
+            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
+            integration

  integration-tests:
    needs:
@@ -158,6 +269,7 @@ jobs:
      - runner=4cpu-linux-arm64
      - ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=ecr-cache
+    timeout-minutes: 45

    strategy:
      fail-fast: false
@@ -167,7 +279,9 @@ jobs:
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -180,19 +294,29 @@ jobs:

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
+      - name: Create .env file for Docker Compose
+        env:
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
+        run: |
+          cat <<EOF > deployment/docker_compose/.env
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+          AUTH_TYPE=basic
+          POSTGRES_POOL_PRE_PING=true
+          POSTGRES_USE_NULL_POOL=true
+          REQUIRE_EMAIL_VERIFICATION=false
+          DISABLE_TELEMETRY=true
+          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
+          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
+          INTEGRATION_TESTS_MODE=true
+          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
+          MCP_SERVER_ENABLED=true
+          EOF
+
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          AUTH_TYPE=basic \
-          POSTGRES_POOL_PRE_PING=true \
-          POSTGRES_USE_NULL_POOL=true \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
-          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
-          INTEGRATION_TESTS_MODE=true \
-          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
            index \
@@ -205,39 +329,45 @@ jobs:
            -d
        id: start_docker

-      - name: Wait for service to be ready
+      - name: Wait for services to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-api_server-1 &
+          wait_for_service() {
+            local url=$1
+            local label=$2
+            local timeout=${3:-300}  # default 5 minutes
+            local start_time
+            start_time=$(date +%s)

-          start_time=$(date +%s)
-          timeout=300  # 5 minutes in seconds
+            while true; do
+              local current_time
+              current_time=$(date +%s)
+              local elapsed_time=$((current_time - start_time))

-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
+              if [ $elapsed_time -ge $timeout ]; then
+                echo "Timeout reached. ${label} did not become ready in $timeout seconds."
+                exit 1
+              fi

-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Service did not become ready in 5 minutes."
-              exit 1
-            fi
+              local response
+              response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")

-            # Use curl with error handling to ignore specific exit code 56
-            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+              if [ "$response" = "200" ]; then
+                echo "${label} is ready!"
+                break
+              elif [ "$response" = "curl_error" ]; then
+                echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
+              else
+                echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
+              fi

-            if [ "$response" = "200" ]; then
-              echo "Service is ready!"
-              break
-            elif [ "$response" = "curl_error" ]; then
-              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
-            else
-              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
-            fi
+              sleep 5
+            done
+          }

-            sleep 5
-          done
-          echo "Finished waiting for service."
+          wait_for_service "http://localhost:8080/health" "API server"
+          echo "Finished waiting for services."

      - name: Start Mock Services
        run: |
@@ -267,6 +397,7 @@ jobs:
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -280,6 +411,11 @@ jobs:
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
+              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
+              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
+              -e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
+              -e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
+              -e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
@@ -308,20 +444,24 @@ jobs:
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-
  multitenant-tests:
    needs:
+      [build-backend-image, build-model-server-image, build-integration-image]
+    runs-on:
      [
-        build-backend-image,
-        build-model-server-image,
-        build-integration-image,
+        runs-on,
+        runner=8cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-multitenant-tests",
+        "extras=ecr-cache",
      ]
-    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
+    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -330,6 +470,9 @@ jobs:
          password: ${{ secrets.DOCKER_TOKEN }}

      - name: Start Docker containers for multi-tenant tests
+        env:
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
@@ -337,8 +480,9 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
-          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
+          OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \
+          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
+          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
          DEV_MODE=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
@@ -379,6 +523,9 @@ jobs:
          echo "Finished waiting for service."

      - name: Run Multi-Tenant Integration Tests
+        env:
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
        run: |
          echo "Running multi-tenant integration tests..."
          docker run --rm --network onyx_default \
@@ -394,6 +541,7 @@ jobs:
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
@@ -402,7 +550,7 @@ jobs:
            -e REQUIRE_EMAIL_VERIFICATION=false \
            -e DISABLE_TELEMETRY=true \
            -e DEV_MODE=true \
-            ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
+            ${ECR_CACHE}:integration-test-${RUN_ID} \
            /app/tests/integration/multitenant_tests

      - name: Dump API server logs (multi-tenant)
@@ -433,16 +581,10 @@ jobs:
  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
+    timeout-minutes: 45
    needs: [integration-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
-        with:
-          script: |
-            const needs = ${{ toJSON(needs) }};
-            const failed = Object.values(needs).some(n => n.result !== 'success');
-            if (failed) {
-              core.setFailed('One or more upstream jobs failed or were cancelled.');
-            } else {
-              core.notice('All required jobs succeeded.');
-            }
+      - name: Check job status
+        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
+        run: exit 1
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -3,21 +3,35 @@ concurrency:
  group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

-on: push
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 jobs:
  jest-tests:
    name: Jest Tests
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: 'npm'
+          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
--- a/.github/workflows/pr-labeler.yml
+++ b/.github/workflows/pr-labeler.yml
@@ -1,7 +1,7 @@
 name: PR Labeler

 on:
-  pull_request_target:
+  pull_request:
    branches:
      - main
    types:
@@ -12,11 +12,11 @@ on:

 permissions:
  contents: read
-  pull-requests: write

 jobs:
  validate_pr_title:
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
      - name: Check PR title for Conventional Commits
        env:
--- a/.github/workflows/pr-linear-check.yml
+++ b/.github/workflows/pr-linear-check.yml
@@ -7,9 +7,13 @@ on:
  pull_request:
    types: [opened, edited, reopened, synchronize]

+permissions:
+  contents: read
+
 jobs:
  linear-check:
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
      - name: Check PR body for Linear link or override
        env:
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -6,11 +6,18 @@ concurrency:
 on:
  merge_group:
    types: [checks_requested]
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 env:
  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
  CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
@@ -28,17 +35,20 @@ jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
+    timeout-minutes: 45
    outputs:
      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Discover test directories
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -55,11 +65,34 @@ jobs:
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -80,16 +113,46 @@ jobs:
          file: ./backend/Dockerfile
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
+            type=registry,ref=onyxdotapp/onyx-backend:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -110,15 +173,45 @@ jobs:
          file: ./backend/Dockerfile.model_server
          push: true
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
+            type=registry,ref=onyxdotapp/onyx-model-server:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

  build-integration-image:
-    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -134,9 +227,26 @@ jobs:

      - name: Build and push integration test image with Docker Bake
        env:
-          REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
+          INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
          TAG: integration-test-${{ github.run_id }}
-        run: cd backend && docker buildx bake --push integration
+          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        run: |
+          docker buildx bake --push \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
+            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
+            --set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
+            --set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
+            --set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
+            --set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
+            integration

  integration-tests-mit:
    needs:
@@ -151,6 +261,7 @@ jobs:
      - runner=4cpu-linux-arm64
      - ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
      - extras=ecr-cache
+    timeout-minutes: 45

    strategy:
      fail-fast: false
@@ -160,7 +271,9 @@ jobs:
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -173,17 +286,27 @@ jobs:

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      # NOTE: don't need web server for integration tests
+      - name: Create .env file for Docker Compose
+        env:
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
+        run: |
+          cat <<EOF > deployment/docker_compose/.env
+          AUTH_TYPE=basic
+          POSTGRES_POOL_PRE_PING=true
+          POSTGRES_USE_NULL_POOL=true
+          REQUIRE_EMAIL_VERIFICATION=false
+          DISABLE_TELEMETRY=true
+          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
+          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
+          INTEGRATION_TESTS_MODE=true
+          MCP_SERVER_ENABLED=true
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
+          EOF
+
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          AUTH_TYPE=basic \
-          POSTGRES_POOL_PRE_PING=true \
-          POSTGRES_USE_NULL_POOL=true \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
-          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
-          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
            relational_db \
            index \
@@ -196,39 +319,45 @@ jobs:
            -d
        id: start_docker

-      - name: Wait for service to be ready
+      - name: Wait for services to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-api_server-1 &
+          wait_for_service() {
+            local url=$1
+            local label=$2
+            local timeout=${3:-300}  # default 5 minutes
+            local start_time
+            start_time=$(date +%s)

-          start_time=$(date +%s)
-          timeout=300  # 5 minutes in seconds
+            while true; do
+              local current_time
+              current_time=$(date +%s)
+              local elapsed_time=$((current_time - start_time))

-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
+              if [ $elapsed_time -ge $timeout ]; then
+                echo "Timeout reached. ${label} did not become ready in $timeout seconds."
+                exit 1
+              fi

-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Service did not become ready in 5 minutes."
-              exit 1
-            fi
+              local response
+              response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")

-            # Use curl with error handling to ignore specific exit code 56
-            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+              if [ "$response" = "200" ]; then
+                echo "${label} is ready!"
+                break
+              elif [ "$response" = "curl_error" ]; then
+                echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
+              else
+                echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
+              fi

-            if [ "$response" = "200" ]; then
-              echo "Service is ready!"
-              break
-            elif [ "$response" = "curl_error" ]; then
-              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
-            else
-              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
-            fi
+              sleep 5
+            done
+          }

-            sleep 5
-          done
-          echo "Finished waiting for service."
+          wait_for_service "http://localhost:8080/health" "API server"
+          echo "Finished waiting for services."

      - name: Start Mock Services
        run: |
@@ -259,6 +388,7 @@ jobs:
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -300,20 +430,13 @@ jobs:
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-
  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
+    timeout-minutes: 45
    needs: [integration-tests-mit]
    if: ${{ always() }}
    steps:
-      - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
-        with:
-          script: |
-            const needs = ${{ toJSON(needs) }};
-            const failed = Object.values(needs).some(n => n.result !== 'success');
-            if (failed) {
-              core.setFailed('One or more upstream jobs failed or were cancelled.');
-            } else {
-              core.notice('All required jobs succeeded.');
-            }
+      - name: Check job status
+        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
+        run: exit 1
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -3,7 +3,18 @@ concurrency:
  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

-on: push
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 env:
  # Test Environment Variables
@@ -24,6 +35,13 @@ env:
  MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
  MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}

+  # for MCP API Key tests
+  MCP_API_KEY: test-api-key-12345
+  MCP_API_KEY_TEST_PORT: 8005
+  MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
+  MCP_API_KEY_SERVER_HOST: 0.0.0.0
+  MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal
+
  MOCK_LLM_RESPONSE: true
  MCP_TEST_SERVER_PORT: 8004
  MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
@@ -36,12 +54,35 @@ env:

 jobs:
  build-web-image:
-    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=4cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-web-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -62,17 +103,47 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
          push: true
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
+            type=registry,ref=onyxdotapp/onyx-web-server:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -93,17 +164,47 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
          push: true
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
+            type=registry,ref=onyxdotapp/onyx-backend:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
+    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -124,14 +225,27 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
          push: true
-          cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
-          cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
+          cache-from: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
+            type=registry,ref=onyxdotapp/onyx-model-server:latest
+          cache-to: |
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  playwright-tests:
    needs: [build-web-image, build-backend-image, build-model-server-image]
    name: Playwright Tests (${{ matrix.project }})
-    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
+    runs-on:
+      - runs-on
+      - runner=8cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
+      - "extras=ecr-cache"
+      - volume=50gb
+    timeout-minutes: 45
    strategy:
      fail-fast: false
      matrix:
@@ -140,15 +254,15 @@ jobs:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
-          fetch-depth: 0
+          persist-credentials: false

      - name: Setup node
-        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: 'npm'
+          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
@@ -168,18 +282,26 @@ jobs:
        run: npx playwright install --with-deps

      - name: Create .env file for Docker Compose
+        env:
+          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
+          EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
+          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
+          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          AUTH_TYPE=basic
-          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
-          EXA_API_KEY=${{ env.EXA_API_KEY }}
+          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
+          EXA_API_KEY=${EXA_API_KEY_VALUE}
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
-          ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
-          ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
-          ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
+          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
+          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
+          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
          EOF
+          if [ "${{ matrix.project }}" = "no-auth" ]; then
+            echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
+          fi

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -193,7 +315,7 @@ jobs:
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
        id: start_docker

      - name: Wait for service to be ready
@@ -253,12 +375,65 @@ jobs:
            sleep 3
          done

+      - name: Wait for MCP API Key mock server
+        run: |
+          echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
+          start_time=$(date +%s)
+          timeout=120
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
+              exit 1
+            fi
+
+            if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
+              echo "MCP API Key mock server is ready!"
+              break
+            fi
+
+            sleep 3
+          done
+
+      - name: Wait for web server to be ready
+        run: |
+          echo "Waiting for web server on port 3000..."
+          start_time=$(date +%s)
+          timeout=120
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Web server did not become ready in ${timeout}s."
+              exit 1
+            fi
+
+            if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
+               curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
+              echo "Web server is ready!"
+              break
+            fi
+
+            echo "Web server not ready yet. Retrying in 3 seconds..."
+            sleep 3
+          done
+
      - name: Run Playwright tests
        working-directory: ./web
+        env:
+          PROJECT: ${{ matrix.project }}
        run: |
          # Create test-results directory to ensure it exists for artifact upload
          mkdir -p test-results
-          npx playwright test --project ${{ matrix.project }}
+          if [ "${PROJECT}" = "no-auth" ]; then
+            export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
+          fi
+          npx playwright test --project ${PROJECT}

      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
@@ -271,10 +446,12 @@ jobs:
      # save before stopping the containers so the logs can be captured
      - name: Save Docker logs
        if: success() || failure()
+        env:
+          WORKSPACE: ${{ github.workspace }}
        run: |
          cd deployment/docker_compose
          docker compose logs > docker-compose.log
-          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+          mv docker-compose.log ${WORKSPACE}/docker-compose.log

      - name: Upload logs
        if: success() || failure()
@@ -283,6 +460,16 @@ jobs:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

+  playwright-required:
+    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
+    runs-on: ubuntu-slim
+    timeout-minutes: 45
+    needs: [playwright-tests]
+    if: ${{ always() }}
+    steps:
+      - name: Check job status
+        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
+        run: exit 1

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
@@ -301,12 +488,12 @@ jobs:
 #     ]
 #   steps:
 #     - name: Checkout code
-#       uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+#       uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
 #       with:
 #         fetch-depth: 0

 #     - name: Setup node
-#       uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
+#       uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
 #       with:
 #         node-version: 22

--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -9,6 +9,12 @@ on:
    branches:
      - main
      - 'release/**'
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 jobs:
  mypy-check:
@@ -16,26 +22,28 @@ jobs:
    # Note: Mypy seems quite optimized for x64 compared to arm64.
    # Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
+    timeout-minutes: 45

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
-
-      # needed for pulling openapitools/openapi-generator-cli
-      # otherwise, we hit the "Unauthenticated users" limit
-      # https://docs.docker.com/docker-hub/usage/
-      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
+          persist-credentials: false

-      - name: Prepare build
-        uses: ./.github/actions/prepare-build
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
        with:
-          docker-username: ${{ secrets.DOCKER_USERNAME }}
-          docker-password: ${{ secrets.DOCKER_TOKEN }}
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+            backend/requirements/model_server.txt
+            backend/requirements/ee.txt
+
+      - name: Generate OpenAPI schema and Python client
+        shell: bash
+        run: |
+          ods openapi all

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -53,11 +61,8 @@ jobs:
          TERM: xterm-256color
        run: mypy .

-      - name: Check import order with reorder-python-imports
-        working-directory: ./backend
-        run: |
-          find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
-
-      - name: Check code formatting with Black
-        working-directory: ./backend
-        run: black --check .
+      - name: Run MyPy (tools/)
+        env:
+          MYPY_FORCE_COLOR: 1
+          TERM: xterm-256color
+        run: mypy tools/
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -7,10 +7,16 @@ on:
  merge_group:
  pull_request:
    branches: [main]
+  push:
+    tags:
+      - "v*.*.*"
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"

+permissions:
+  contents: read
+
 env:
  # AWS
  AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
@@ -123,18 +129,26 @@ jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]
+    timeout-minutes: 45

    env:
      PYTHONPATH: ./backend
+      DISABLE_TELEMETRY: "true"

    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Setup Python and Install Dependencies
        uses: ./.github/actions/setup-python-and-install-dependencies
+        with:
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt

      - name: Setup Playwright
        uses: ./.github/actions/setup-playwright
@@ -147,16 +161,20 @@ jobs:
            hubspot:
              - 'backend/onyx/connectors/hubspot/**'
              - 'backend/tests/daily/connectors/hubspot/**'
+              - 'uv.lock'
            salesforce:
              - 'backend/onyx/connectors/salesforce/**'
              - 'backend/tests/daily/connectors/salesforce/**'
+              - 'uv.lock'
            github:
              - 'backend/onyx/connectors/github/**'
              - 'backend/tests/daily/connectors/github/**'
+              - 'uv.lock'
            file_processing:
              - 'backend/onyx/file_processing/**'
+              - 'uv.lock'

-      - name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
+      - name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
@@ -169,7 +187,8 @@ jobs:
            backend/tests/daily/connectors \
            --ignore backend/tests/daily/connectors/hubspot \
            --ignore backend/tests/daily/connectors/salesforce \
-            --ignore backend/tests/daily/connectors/github
+            --ignore backend/tests/daily/connectors/github \
+            --ignore backend/tests/daily/connectors/coda

      - name: Run HubSpot Connector Tests
        if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
@@ -214,8 +233,10 @@ jobs:
        if: failure() && github.event_name == 'schedule'
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
-            --data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
+            --data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
            $SLACK_WEBHOOK
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -11,6 +11,9 @@ on:
        required: false
        default: 'main'

+permissions:
+  contents: read
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -29,13 +32,16 @@ jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
+    timeout-minutes: 45

    env:
      PYTHONPATH: ./backend

    steps:
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -55,7 +61,7 @@ jobs:
          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test

      - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
          cache: "pip"
@@ -122,10 +128,12 @@ jobs:
        if: failure() && github.event_name == 'schedule'
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
-            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
+            --data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
            $SLACK_WEBHOOK

      - name: Dump all-container logs (optional)
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -9,28 +9,41 @@ on:
    branches:
      - main
      - 'release/**'
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 jobs:
  backend-check:
    # See https://runs-on.com/runners/linux/
    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
+    timeout-minutes: 45


    env:
      PYTHONPATH: ./backend
      REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
-      SF_USERNAME: ${{ secrets.SF_USERNAME }}
-      SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
-      SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
+      DISABLE_TELEMETRY: "true"

    steps:
    - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2

    - name: Checkout code
-      uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+      with:
+        persist-credentials: false

    - name: Setup Python and Install Dependencies
      uses: ./.github/actions/setup-python-and-install-dependencies
+      with:
+        requirements: |
+          backend/requirements/default.txt
+          backend/requirements/dev.txt
+          backend/requirements/model_server.txt
+          backend/requirements/ee.txt

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-quality-checks.yml
+++ b/.github/workflows/pr-quality-checks.yml
@@ -6,21 +6,44 @@ concurrency:
 on:
  merge_group:
  pull_request: null
+  push:
+    branches:
+      - main
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read

 jobs:
  quality-checks:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
    steps:
-      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
-      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
-      - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
+          persist-credentials: false
+      - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
        with:
          python-version: "3.11"
      - name: Setup Terraform
        uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
-      - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
+      - name: Setup node
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
+        with: # zizmor: ignore[cache-poisoning]
+          node-version: 22
+          cache: "npm"
+          cache-dependency-path: ./web/package-lock.json
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci
+      - uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
        with:
-          extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
+          prek-version: '0.2.21'
+          extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
+      - name: Check Actions
+        uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
+        with:
+          check_permissions: false
+          check_versions: false
--- a/.github/workflows/release-devtools.yml
+++ b/.github/workflows/release-devtools.yml
@@ -0,0 +1,41 @@
+name: Release Devtools
+
+on:
+  push:
+    tags:
+      - "ods/v*.*.*"
+
+jobs:
+  pypi:
+    runs-on: ubuntu-latest
+    environment:
+      name: release-devtools
+    permissions:
+      id-token: write
+    timeout-minutes: 10
+    strategy:
+      matrix:
+        os-arch:
+          - { goos: "linux", goarch: "amd64" }
+          - { goos: "linux", goarch: "arm64" }
+          - { goos: "windows", goarch: "amd64" }
+          - { goos: "windows", goarch: "arm64" }
+          - { goos: "darwin", goarch: "amd64" }
+          - { goos: "darwin", goarch: "arm64" }
+          - { goos: "", goarch: "" }
+    steps:
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+      - uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
+        with:
+          enable-cache: false
+          version: "0.9.9"
+      - run: |
+          GOOS="${{ matrix.os-arch.goos }}" \
+          GOARCH="${{ matrix.os-arch.goarch }}" \
+          uv build --wheel
+        working-directory: tools/ods
+      - run: uv publish
+        working-directory: tools/ods
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,13 +9,15 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
+    timeout-minutes: 45
    permissions:
      contents: read
    steps:
      - name: Checkout main Onyx repo
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Install git-filter-repo
        run: |
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -3,30 +3,30 @@ name: Nightly Tag Push
 on:
  schedule:
    - cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
+  workflow_dispatch:

 permissions:
  contents: write # Allows pushing tags to the repository

 jobs:
  create-and-push-tag:
-    runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]
+    runs-on: ubuntu-slim
+    timeout-minutes: 45

    steps:
      # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
      # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
      # implement here which needs an actual user's deploy key
-
-      # Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
-      # and not rkuo's personal account. It is fine to leave this key as is!
      - name: Checkout code
-        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
-          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+          ssh-key: "${{ secrets.DEPLOY_KEY }}"
+          persist-credentials: true

      - name: Set up Git user
        run: |
-          git config user.name "Richard Kuo [bot]"
-          git config user.email "rkuo[bot]@onyx.app"
+          git config user.name "Onyx Bot [bot]"
+          git config user.email "onyx-bot[bot]@onyx.app"

      - name: Check for existing nightly tag
        id: check_tag
@@ -54,3 +54,12 @@ jobs:
        run: |
          TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
          git push origin $TAG_NAME
+
+      - name: Send Slack notification
+        if: failure()
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
+          title: "🚨 Nightly Tag Push Failed"
+          ref-name: ${{ github.ref_name }}
+          failed-jobs: "create-and-push-tag"
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -0,0 +1,50 @@
+name: Run Zizmor
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["**"]
+
+permissions: {}
+
+jobs:
+  zizmor:
+    name: zizmor
+    runs-on: ubuntu-slim
+    timeout-minutes: 45
+    permissions:
+      security-events: write # needed for SARIF uploads
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
+        with:
+          persist-credentials: false
+
+      - name: Detect changes
+        id: filter
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
+        with:
+          filters: |
+            zizmor:
+              - '.github/**'
+
+      - name: Install the latest version of uv
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
+        uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
+        with:
+          enable-cache: false
+          version: "0.9.9"
+
+      - name: Run zizmor
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
+        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Upload SARIF file
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
+        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
+        with:
+          sarif_file: results.sarif
+          category: zizmor
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # editors
 .vscode
 .zed
+.cursor

 # macos
 .DS_store
@@ -28,6 +29,8 @@ settings.json

 # others
 /deployment/data/nginx/app.conf
+/deployment/data/nginx/mcp.conf.inc
+/deployment/data/nginx/mcp_upstream.conf.inc
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
 *.egg-info
@@ -46,5 +49,10 @@ CLAUDE.md
 # Local .terraform.lock.hcl file
 .terraform.lock.hcl

+node_modules
+
 # MCP configs
 .playwright-mcp
+
+# plans
+plans/
--- a/.mcp.json.template
+++ b/.mcp.json.template
@@ -1,8 +0,0 @@
-{
-  "mcpServers": {
-    "onyx-mcp": {
-      "type": "http",
-      "url": "http://localhost:8000/mcp"
-    }
-  }
-}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,61 +1,142 @@
+default_install_hook_types:
+  - pre-commit
+  - post-checkout
+  - post-merge
+  - post-rewrite
 repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    # From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
+    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    hooks:
-      - id: check-yaml
-        files: ^.github/
+      - id: uv-sync
+        args: ["--locked", "--all-extras"]
+      - id: uv-lock
+        files: ^pyproject\.toml$
+      - id: uv-export
+        name: uv-export default.txt
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "backend",
+            "-o",
+            "backend/requirements/default.txt",
+          ]
+        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
+      - id: uv-export
+        name: uv-export dev.txt
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "dev",
+            "-o",
+            "backend/requirements/dev.txt",
+          ]
+        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
+      - id: uv-export
+        name: uv-export ee.txt
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "ee",
+            "-o",
+            "backend/requirements/ee.txt",
+          ]
+        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
+      - id: uv-export
+        name: uv-export model_server.txt
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "model_server",
+            "-o",
+            "backend/requirements/model_server.txt",
+          ]
+        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
+      - id: uv-run
+        name: Check lazy imports
+        args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
+        files: ^backend/(?!\.venv/).*\.py$
+      # NOTE: This takes ~6s on a single, large module which is prohibitively slow.
+      # - id: uv-run
+      #   name: mypy
+      #   args: ["--all-extras", "mypy"]
+      #   pass_filenames: true
+      #   files: ^backend/.*\.py$

  - repo: https://github.com/rhysd/actionlint
-    rev: v1.7.8
+    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
      - id: actionlint

  - repo: https://github.com/psf/black
-    rev: 25.1.0
+    rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
    hooks:
-    - id: black
-      language_version: python3.11
+      - id: black
+        language_version: python3.11

  # this is a fork which keeps compatibility with black
  - repo: https://github.com/wimglenn/reorder-python-imports-black
-    rev: v3.14.0
+    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
    hooks:
-    - id: reorder-python-imports
-      args: ['--py311-plus', '--application-directories=backend/']
-      # need to ignore alembic files, since reorder-python-imports gets confused
-      # and thinks that alembic is a local package since there is a folder
-      # in the backend directory called `alembic`
-      exclude: ^backend/alembic/
+      - id: reorder-python-imports
+        args: ["--py311-plus", "--application-directories=backend/"]
+        # need to ignore alembic files, since reorder-python-imports gets confused
+        # and thinks that alembic is a local package since there is a folder
+        # in the backend directory called `alembic`
+        exclude: ^backend/alembic/

  # These settings will remove unused imports with side effects
  # Note: The repo currently does not and should not have imports with side effects
  - repo: https://github.com/PyCQA/autoflake
-    rev: v2.3.1
+    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
    hooks:
      - id: autoflake
-        args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
+        args:
+          [
+            "--remove-all-unused-imports",
+            "--remove-unused-variables",
+            "--in-place",
+            "--recursive",
+          ]
+
+  - repo: https://github.com/golangci/golangci-lint
+    rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
+    hooks:
+      - id: golangci-lint
+        entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.11.4
+    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
    hooks:
      - id: ruff

  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.1.0
+    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
    hooks:
-    - id: prettier
-      types_or: [html, css, javascript, ts, tsx]
-      language_version: system
+      - id: prettier
+        types_or: [html, css, javascript, ts, tsx]
+        language_version: system

  - repo: https://github.com/sirwart/ripsecrets
-    rev: v0.1.11
+    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
    hooks:
      - id: ripsecrets
        args:
-        - --additional-pattern
-        - ^sk-[A-Za-z0-9_\-]{20,}$
-
+          - --additional-pattern
+          - ^sk-[A-Za-z0-9_\-]{20,}$

  - repo: local
    hooks:
@@ -66,36 +147,13 @@ repos:
        pass_filenames: false
        files: \.tf$

-      - id: check-lazy-imports
-        name: Check lazy imports
-        entry: python3 backend/scripts/check_lazy_imports.py
+      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
+      # This is a preview package - if it breaks:
+      #   1. Try updating: cd web && npm update @typescript/native-preview
+      #   2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
+      - id: typescript-check
+        name: TypeScript type check
+        entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
        language: system
-        files: ^backend/(?!\.venv/).*\.py$
-
-  # We would like to have a mypy pre-commit hook, but due to the fact that
-  # pre-commit runs in it's own isolated environment, we would need to install
-  # and keep in sync all dependencies so mypy has access to the appropriate type
-  # stubs. This does not seem worth it at the moment, so for now we will stick to
-  # having mypy run via Github Actions / manually by contributors
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.1.1
-  #   hooks:
-  #     - id: mypy
-  #       exclude: ^tests/
-  #       # below are needed for type stubs since pre-commit runs in it's own
-  #       # isolated environment. Unfortunately, this needs to be kept in sync
-  #       # with requirements/dev.txt + requirements/default.txt
-  #       additional_dependencies: [
-  #         alembic==1.10.4,
-  #         types-beautifulsoup4==4.12.0.3,
-  #         types-html5lib==1.1.11.13,
-  #         types-oauthlib==3.2.0.9,
-  #         types-psycopg2==2.9.21.10,
-  #         types-python-dateutil==2.8.19.13,
-  #         types-regex==2023.3.23.1,
-  #         types-requests==2.28.11.17,
-  #         types-retry==0.9.9.3,
-  #         types-urllib3==1.26.25.11
-  #       ]
-  #       # TODO: add back once errors are addressed
-  #       # args: [--strict]
+        pass_filenames: false
+        files: ^web/.*\.(ts|tsx)$
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -1,66 +1,59 @@
-# Copy this file to .env in the .vscode folder
-# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
-# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
+# Copy this file to .env in the .vscode folder.
+# Fill in the <REPLACE THIS> values as needed; it is recommended to set the
+# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.
+# Also check out onyx/backend/scripts/restart_containers.sh for a script to
+# restart the containers which Onyx relies on outside of VSCode/Cursor
+# processes.

-# For local dev, often user Authentication is not needed
+
+# For local dev, often user Authentication is not needed.
 AUTH_TYPE=disabled

-# Skip warm up for dev
-SKIP_WARM_UP=True

-# Always keep these on for Dev
-# Logs all model prompts to stdout
+# Always keep these on for Dev.
+# Logs model prompts, reasoning, and answer to stdout.
 LOG_ONYX_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug


-# This passes top N results to LLM an additional time for reranking prior to answer generation
-# This step is quite heavy on token usage so we disable it for dev generally
+# This passes top N results to LLM an additional time for reranking prior to
+# answer generation.
+# This step is quite heavy on token usage so we disable it for dev generally.
 DISABLE_LLM_DOC_RELEVANCE=False


-# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
+# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
 OAUTH_CLIENT_ID=<REPLACE THIS>
 OAUTH_CLIENT_SECRET=<REPLACE THIS>
 OPENID_CONFIG_URL=<REPLACE THIS>
 SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config

-# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
+
+# Generally not useful for dev, we don't generally want to set up an SMTP server
+# for dev.
 REQUIRE_EMAIL_VERIFICATION=False


-# Set these so if you wipe the DB, you don't end up having to go through the UI every time
+# Set these so if you wipe the DB, you don't end up having to go through the UI
+# every time.
 GEN_AI_API_KEY=<REPLACE THIS>
 OPENAI_API_KEY=<REPLACE THIS>
-# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
+# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o

-# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
-# Only needed if using OnyxBot
-#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
-#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
-

 # Python stuff
 PYTHONPATH=../backend
 PYTHONUNBUFFERED=1


-# Internet Search
-EXA_API_KEY=<REPLACE THIS>
-
-
-# Enable the full set of Danswer Enterprise Edition features
-# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
+# Enable the full set of Danswer Enterprise Edition features.
+# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
+# are using this for local testing/development).
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False

-# Agent Search configs  # TODO: Remove give proper namings
-AGENT_RETRIEVAL_STATS=False   # Note: This setting will incur substantial re-ranking effort
-AGENT_RERANKING_STATS=True
-AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
-AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20

 # S3 File Store Configuration (MinIO for local development)
 S3_ENDPOINT_URL=http://localhost:9004
@@ -68,16 +61,24 @@ S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
 S3_AWS_ACCESS_KEY_ID=minioadmin
 S3_AWS_SECRET_ACCESS_KEY=minioadmin

-# Show extra/uncommon connectors
+
+# Show extra/uncommon connectors.
 SHOW_EXTRA_CONNECTORS=True

+
 # Local langsmith tracing
 LANGSMITH_TRACING="true"
 LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 LANGSMITH_API_KEY=<REPLACE_THIS>
 LANGSMITH_PROJECT=<REPLACE_THIS>

+
 # Local Confluence OAuth testing
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
-# NEXT_PUBLIC_TEST_ENV=True
+# NEXT_PUBLIC_TEST_ENV=True
+
+
+# OpenSearch
+# Arbitrary password is fine for local development.
+OPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -20,6 +20,7 @@
        "Web Server",
        "Model Server",
        "API Server",
+        "MCP Server",
        "Slack Bot",
        "Celery primary",
        "Celery light",
@@ -132,8 +133,6 @@
      },
      "consoleTitle": "API Server Console"
    },
-    // For the listener to access the Slack API,
-    // ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
    {
      "name": "Slack Bot",
      "consoleName": "Slack Bot",
@@ -152,6 +151,34 @@
      },
      "consoleTitle": "Slack Bot Console"
    },
+    {
+      "name": "MCP Server",
+      "consoleName": "MCP Server",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "MCP_SERVER_ENABLED": "true",
+        "MCP_SERVER_PORT": "8090",
+        "MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1"
+      },
+      "args": [
+        "onyx.mcp_server.api:mcp_app",
+        "--reload",
+        "--port",
+        "8090",
+        "--timeout-graceful-shutdown",
+        "0"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "MCP Server Console"
+    },
    {
      "name": "Celery primary",
      "type": "debugpy",
@@ -481,7 +508,21 @@
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
-      "stopOnEntry": true,
+      "presentation": {
+        "group": "3"
+      }
+    },
+    {
+      "name": "Clear and Restart OpenSearch Container",
+      // Generic debugger type, required arg but has no bearing on bash.
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
      "presentation": {
        "group": "3"
      }
@@ -527,10 +568,10 @@
      "name": "Install Python Requirements",
      "type": "node",
      "request": "launch",
-      "runtimeExecutable": "bash",
+      "runtimeExecutable": "uv",
      "runtimeArgs": [
-        "-c",
-        "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+        "sync",
+        "--all-extras"
      ],
      "cwd": "${workspaceFolder}",
      "console": "integratedTerminal",
@@ -543,14 +584,14 @@
      "name": "Onyx OpenAPI Schema Generator",
      "type": "debugpy",
      "request": "launch",
-      "program": "scripts/onyx_openapi_schema.py",
-      "cwd": "${workspaceFolder}/backend",
+      "program": "backend/scripts/onyx_openapi_schema.py",
+      "cwd": "${workspaceFolder}",
      "envFile": "${workspaceFolder}/.env",
      "env": {
        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
+        "PYTHONPATH": "backend"
      },
-      "args": ["--filename", "generated/openapi.json"]
+      "args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
    },
    {
      // script to debug multi tenant db issues
--- a/AGENTS.md.template
+++ b/AGENTS.md.template
@@ -1,13 +1,13 @@
 # AGENTS.md

-This file provides guidance to Codex when working with code in this repository.
+This file provides guidance to AI agents when working with code in this repository.

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -181,6 +181,286 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

+## Frontend Standards
+
+### 1. Import Standards
+
+**Always use absolute imports with the `@` prefix.**
+
+**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
+
+```typescript
+// ✅ Good
+import { Button } from "@/components/ui/button";
+import { useAuth } from "@/hooks/useAuth";
+import { Text } from "@/refresh-components/texts/Text";
+
+// ❌ Bad
+import { Button } from "../../../components/ui/button";
+import { useAuth } from "./hooks/useAuth";
+```
+
+### 2. React Component Functions
+
+**Prefer regular functions over arrow functions for React components.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+function UserProfile({ userId }: UserProfileProps) {
+  return <div>User Profile</div>
+}
+
+// ❌ Bad
+const UserProfile = ({ userId }: UserProfileProps) => {
+  return <div>User Profile</div>
+}
+```
+
+### 3. Props Interface Extraction
+
+**Extract prop types into their own interface definitions.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+interface UserCardProps {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}
+
+function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
+  return <div>User Card</div>
+}
+
+// ❌ Bad
+function UserCard({
+  user,
+  showActions = false,
+  onEdit
+}: {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}) {
+  return <div>User Card</div>
+}
+```
+
+### 4. Spacing Guidelines
+
+**Prefer padding over margins for spacing.**
+
+**Reason:** We want to consolidate usage to paddings instead of margins.
+
+```typescript
+// ✅ Good
+<div className="p-4 space-y-2">
+  <div className="p-2">Content</div>
+</div>
+
+// ❌ Bad
+<div className="m-4 space-y-2">
+  <div className="m-2">Content</div>
+</div>
+```
+
+### 5. Tailwind Dark Mode
+
+**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
+
+**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
+
+**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
+
+```typescript
+// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
+<div className="bg-background-neutral-03 text-text-02">
+  Content
+</div>
+
+// ✅ Good - Logo icons with dark mode handling via createLogoIcon
+export const GithubIcon = createLogoIcon(githubLightIcon, {
+  monochromatic: true,  // Will apply dark:invert internally
+});
+
+export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
+  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
+});
+
+// ❌ Bad - Manual dark mode overrides
+<div className="bg-white dark:bg-black text-black dark:text-white">
+  Content
+</div>
+```
+
+### 6. Class Name Utilities
+
+**Use the `cn` utility instead of raw string formatting for classNames.**
+
+**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
+
+```typescript
+import { cn } from '@/lib/utils'
+
+// ✅ Good
+<div className={cn(
+  'base-class',
+  isActive && 'active-class',
+  className
+)}>
+  Content
+</div>
+
+// ❌ Bad
+<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
+  Content
+</div>
+```
+
+### 7. Custom Hooks Organization
+
+**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
+
+**Reason:** This is just a layout preference. Keeps code clean.
+
+```typescript
+// web/src/hooks/useUserData.ts
+export function useUserData(userId: string) {
+  // hook implementation
+}
+
+// web/src/hooks/useLocalStorage.ts
+export function useLocalStorage<T>(key: string, initialValue: T) {
+  // hook implementation
+}
+```
+
+### 8. Icon Usage
+
+**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
+
+**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
+
+```typescript
+// ✅ Good
+import SvgX from "@/icons/x";
+import SvgMoreHorizontal from "@/icons/more-horizontal";
+
+// ❌ Bad
+import { User } from "lucide-react";
+import { FiSearch } from "react-icons/fi";
+```
+
+**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
+If you need help with this step, reach out to `raunak@onyx.app`.
+
+### 9. Text Rendering
+
+**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
+
+**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
+
+```typescript
+// ✅ Good
+import { Text } from '@/refresh-components/texts/Text'
+
+function UserCard({ name }: { name: string }) {
+  return (
+    <Text
+      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
+      text03
+      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
+      mainAction
+    >
+      {name}
+    </Text>
+  )
+}
+
+// ❌ Bad
+function UserCard({ name }: { name: string }) {
+  return (
+    <div>
+      <h2>{name}</h2>
+      <p>User details</p>
+    </div>
+  )
+}
+```
+
+### 10. Component Usage
+
+**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
+
+**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
+
+```typescript
+// ✅ Good
+import Button from '@/refresh-components/buttons/Button'
+import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
+import SvgPlusCircle from '@/icons/plus-circle'
+
+function ContactForm() {
+  return (
+    <form>
+      <InputTypeIn placeholder="Search..." />
+      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
+    </form>
+  )
+}
+
+// ❌ Bad
+function ContactForm() {
+  return (
+    <form>
+      <input placeholder="Name" />
+      <textarea placeholder="Message" />
+      <button type="submit">Submit</button>
+    </form>
+  )
+}
+```
+
+### 11. Colors
+
+**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
+
+**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
+
+**Available color categories:**
+- **Text:** `text-01` through `text-05`, `text-inverted-XX`
+- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
+- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
+- **Actions:** `action-link-XX`, `action-danger-XX`
+- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
+- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
+
+```typescript
+// ✅ Good - Use custom Onyx color classes
+<div className="bg-background-neutral-01 border border-border-02" />
+<div className="bg-background-tint-02 border border-border-01" />
+<div className="bg-status-success-01" />
+<div className="bg-action-link-01" />
+<div className="bg-theme-primary-05" />
+
+// ❌ Bad - Do NOT use standard Tailwind colors
+<div className="bg-gray-100 border border-gray-300 text-gray-600" />
+<div className="bg-white border border-slate-200" />
+<div className="bg-green-100 text-green-700" />
+<div className="bg-blue-100 text-blue-600" />
+<div className="bg-indigo-500" />
+```
+
+### 12. Data Fetching
+
+**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
+
+**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+
 ## Database & Migrations

 ### Running Migrations
@@ -295,14 +575,6 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

-## UI/UX Patterns
-
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
-
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -4,10 +4,10 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -184,6 +184,286 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

+## Frontend Standards
+
+### 1. Import Standards
+
+**Always use absolute imports with the `@` prefix.**
+
+**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
+
+```typescript
+// ✅ Good
+import { Button } from "@/components/ui/button";
+import { useAuth } from "@/hooks/useAuth";
+import { Text } from "@/refresh-components/texts/Text";
+
+// ❌ Bad
+import { Button } from "../../../components/ui/button";
+import { useAuth } from "./hooks/useAuth";
+```
+
+### 2. React Component Functions
+
+**Prefer regular functions over arrow functions for React components.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+function UserProfile({ userId }: UserProfileProps) {
+  return <div>User Profile</div>
+}
+
+// ❌ Bad
+const UserProfile = ({ userId }: UserProfileProps) => {
+  return <div>User Profile</div>
+}
+```
+
+### 3. Props Interface Extraction
+
+**Extract prop types into their own interface definitions.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+interface UserCardProps {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}
+
+function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
+  return <div>User Card</div>
+}
+
+// ❌ Bad
+function UserCard({
+  user,
+  showActions = false,
+  onEdit
+}: {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}) {
+  return <div>User Card</div>
+}
+```
+
+### 4. Spacing Guidelines
+
+**Prefer padding over margins for spacing.**
+
+**Reason:** We want to consolidate usage to paddings instead of margins.
+
+```typescript
+// ✅ Good
+<div className="p-4 space-y-2">
+  <div className="p-2">Content</div>
+</div>
+
+// ❌ Bad
+<div className="m-4 space-y-2">
+  <div className="m-2">Content</div>
+</div>
+```
+
+### 5. Tailwind Dark Mode
+
+**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
+
+**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
+
+**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
+
+```typescript
+// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
+<div className="bg-background-neutral-03 text-text-02">
+  Content
+</div>
+
+// ✅ Good - Logo icons with dark mode handling via createLogoIcon
+export const GithubIcon = createLogoIcon(githubLightIcon, {
+  monochromatic: true,  // Will apply dark:invert internally
+});
+
+export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
+  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
+});
+
+// ❌ Bad - Manual dark mode overrides
+<div className="bg-white dark:bg-black text-black dark:text-white">
+  Content
+</div>
+```
+
+### 6. Class Name Utilities
+
+**Use the `cn` utility instead of raw string formatting for classNames.**
+
+**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
+
+```typescript
+import { cn } from '@/lib/utils'
+
+// ✅ Good
+<div className={cn(
+  'base-class',
+  isActive && 'active-class',
+  className
+)}>
+  Content
+</div>
+
+// ❌ Bad
+<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
+  Content
+</div>
+```
+
+### 7. Custom Hooks Organization
+
+**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
+
+**Reason:** This is just a layout preference. Keeps code clean.
+
+```typescript
+// web/src/hooks/useUserData.ts
+export function useUserData(userId: string) {
+  // hook implementation
+}
+
+// web/src/hooks/useLocalStorage.ts
+export function useLocalStorage<T>(key: string, initialValue: T) {
+  // hook implementation
+}
+```
+
+### 8. Icon Usage
+
+**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
+
+**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
+
+```typescript
+// ✅ Good
+import SvgX from "@/icons/x";
+import SvgMoreHorizontal from "@/icons/more-horizontal";
+
+// ❌ Bad
+import { User } from "lucide-react";
+import { FiSearch } from "react-icons/fi";
+```
+
+**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
+If you need help with this step, reach out to `raunak@onyx.app`.
+
+### 9. Text Rendering
+
+**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
+
+**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
+
+```typescript
+// ✅ Good
+import { Text } from '@/refresh-components/texts/Text'
+
+function UserCard({ name }: { name: string }) {
+  return (
+    <Text
+      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
+      text03
+      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
+      mainAction
+    >
+      {name}
+    </Text>
+  )
+}
+
+// ❌ Bad
+function UserCard({ name }: { name: string }) {
+  return (
+    <div>
+      <h2>{name}</h2>
+      <p>User details</p>
+    </div>
+  )
+}
+```
+
+### 10. Component Usage
+
+**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
+
+**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
+
+```typescript
+// ✅ Good
+import Button from '@/refresh-components/buttons/Button'
+import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
+import SvgPlusCircle from '@/icons/plus-circle'
+
+function ContactForm() {
+  return (
+    <form>
+      <InputTypeIn placeholder="Search..." />
+      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
+    </form>
+  )
+}
+
+// ❌ Bad
+function ContactForm() {
+  return (
+    <form>
+      <input placeholder="Name" />
+      <textarea placeholder="Message" />
+      <button type="submit">Submit</button>
+    </form>
+  )
+}
+```
+
+### 11. Colors
+
+**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
+
+**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
+
+**Available color categories:**
+- **Text:** `text-01` through `text-05`, `text-inverted-XX`
+- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
+- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
+- **Actions:** `action-link-XX`, `action-danger-XX`
+- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
+- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
+
+```typescript
+// ✅ Good - Use custom Onyx color classes
+<div className="bg-background-neutral-01 border border-border-02" />
+<div className="bg-background-tint-02 border border-border-01" />
+<div className="bg-status-success-01" />
+<div className="bg-action-link-01" />
+<div className="bg-theme-primary-05" />
+
+// ❌ Bad - Do NOT use standard Tailwind colors
+<div className="bg-gray-100 border border-gray-300 text-gray-600" />
+<div className="bg-white border border-slate-200" />
+<div className="bg-green-100 text-green-700" />
+<div className="bg-blue-100 text-blue-600" />
+<div className="bg-indigo-500" />
+```
+
+### 12. Data Fetching
+
+**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
+
+**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+
 ## Database & Migrations

 ### Running Migrations
@@ -300,14 +580,6 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

-## UI/UX Patterns
-
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
-
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -71,12 +71,12 @@ If using a higher version, sometimes some libraries will not be available (i.e.

 #### Backend: Python requirements

-Currently, we use pip and recommend creating a virtual environment.
+Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).

 For convenience here's a command for it:

 ```bash
-python -m venv .venv
+uv venv .venv --python 3.11
 source .venv/bin/activate
 ```

@@ -95,33 +95,15 @@ If using PowerShell, the command slightly differs:
 Install the required python dependencies:

 ```bash
-pip install -r backend/requirements/combined.txt
+uv sync --all-extras
 ```

-or
+Install Playwright for Python (headless browser required by the Web Connector):

 ```bash
-pip install -r backend/requirements/default.txt
-pip install -r backend/requirements/dev.txt
-pip install -r backend/requirements/ee.txt
-pip install -r backend/requirements/model_server.txt
+uv run playwright install
 ```

-Fix vscode/cursor auto-imports:
-```bash
-pip install -e .
-```
-
-Install Playwright for Python (headless browser required by the Web Connector)
-
-In the activated Python virtualenv, install Playwright for Python by running:
-
-```bash
-playwright install
-```
-
-You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
-
 #### Frontend: Node dependencies

 Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
@@ -130,7 +112,7 @@ to manage your Node installations. Once installed, you can run
 ```bash
 nvm install 22 && nvm use 22
 node -v # verify your active version
-``` 
+```

 Navigate to `onyx/web` and run:

@@ -144,21 +126,15 @@ npm i

 For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).

-With the virtual environment active, install the pre-commit library with:
+Then run:

 ```bash
-pip install pre-commit
-```
-
-Then, from the `onyx/backend` directory, run:
-
-```bash
-pre-commit install
+uv run pre-commit install
 ```

 Additionally, we use `mypy` for static type checking.
 Onyx is fully type-annotated, and we want to keep it that way!
-To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
+To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.

 ### Web

@@ -185,7 +161,7 @@ You will need Docker installed to run these containers.
 First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:

 ```bash
-docker compose up -d index relational_db cache minio
+docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
 ```

 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -15,3 +15,4 @@ build/
 dist/
 .coverage
 htmlcov/
+model_server/legacy/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -12,6 +12,13 @@ ENV DANSWER_RUNNING_IN_DOCKER="true" \
    DO_NOT_TRACK="true" \
    PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"

+# Create non-root user for security best practices
+RUN groupadd -g 1001 onyx && \
+    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
+    mkdir -p /var/log/onyx && \
+    chmod 755 /var/log/onyx && \
+    chown onyx:onyx /var/log/onyx
+
 COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/

 # Install system dependencies
@@ -51,6 +58,7 @@ RUN uv pip install --system --no-cache-dir --upgrade \
    pip uninstall -y py && \
    playwright install chromium && \
    playwright install-deps chromium && \
+    chown -R onyx:onyx /app && \
    ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
    # Cleanup for CVEs and size reduction
    # https://github.com/tornadoweb/tornado/issues/3107
@@ -94,13 +102,6 @@ tiktoken.get_encoding('cl100k_base')"
 # Set up application files
 WORKDIR /app

-# Create non-root user for security best practices
-RUN groupadd -g 1001 onyx && \
-    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
-    mkdir -p /var/log/onyx && \
-    chmod 755 /var/log/onyx && \
-    chown onyx:onyx /var/log/onyx
-
 # Enterprise Version Files
 COPY --chown=onyx:onyx ./ee /app/ee
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,4 +1,29 @@
-FROM python:3.11.7-slim-bookworm
+# Base stage with dependencies
+FROM python:3.11.7-slim-bookworm AS base
+
+ENV DANSWER_RUNNING_IN_DOCKER="true" \
+    HF_HOME=/app/.cache/huggingface
+
+COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
+
+RUN mkdir -p /app/.cache/huggingface
+
+COPY ./requirements/model_server.txt /tmp/requirements.txt
+RUN uv pip install --system --no-cache-dir --upgrade \
+        -r /tmp/requirements.txt && \
+    rm -rf ~/.cache/uv /tmp/*.txt
+
+# Stage for downloading embedding models
+FROM base AS embedding-models
+RUN python -c "from huggingface_hub import snapshot_download; \
+snapshot_download('nomic-ai/nomic-embed-text-v1');"
+
+# Initialize SentenceTransformer to cache the custom architecture
+RUN python -c "from sentence_transformers import SentenceTransformer; \
+SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
+
+# Final stage - combine all downloads
+FROM base AS final

 LABEL com.danswer.maintainer="founders@onyx.app"
 LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
@@ -6,44 +31,17 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
 You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
 visit https://github.com/onyx-dot-app/onyx."

-ENV DANSWER_RUNNING_IN_DOCKER="true" \
-    HF_HOME=/app/.cache/huggingface
-
-COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
-
 # Create non-root user for security best practices
-RUN mkdir -p /app && \
-    groupadd -g 1001 onyx && \
-    useradd -u 1001 -g onyx -m -s /bin/bash onyx  && \
-    chown -R onyx:onyx /app && \
+RUN groupadd -g 1001 onyx && \
+    useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
    mkdir -p /var/log/onyx && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

-COPY ./requirements/model_server.txt /tmp/requirements.txt
-RUN uv pip install --system --no-cache-dir --upgrade \
-        -r /tmp/requirements.txt && \
-    rm -rf ~/.cache/uv /tmp/*.txt
-
-# Pre-downloading models for setups with limited egress
-# Download tokenizers, distilbert for the Onyx model
-# Download model weights
-# Run Nomic to pull in the custom architecture and have it cached locally
-RUN python -c "from transformers import AutoTokenizer; \
-AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
-AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
-from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
-snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
-snapshot_download('nomic-ai/nomic-embed-text-v1'); \
-snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
-from sentence_transformers import SentenceTransformer; \
-SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" && \
-    # In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
-    # running Onyx, move the current contents of the cache folder to a temporary location to ensure
-    # it's preserved in order to combine with the user's cache contents
-    mv /app/.cache/huggingface /app/.cache/temp_huggingface && \
-    chown -R onyx:onyx /app
+# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
+# running Onyx, move the current contents of the cache folder to a temporary location to ensure
+# it's preserved in order to combine with the user's cache contents
+COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface

 WORKDIR /app

--- a/backend/alembic/README.md
+++ b/backend/alembic/README.md
@@ -7,8 +7,12 @@ Onyx migrations use a generic single-database configuration with an async dbapi.

 ## To generate new migrations:

-run from onyx/backend:
-`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`
+From onyx/backend, run:
+`alembic revision -m <DESCRIPTION_OF_MIGRATION>`
+
+Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.
+
+Manually populate the upgrade and downgrade in your new migration.

 More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -39,7 +39,9 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    fileConfig(config.config_file_name)
+    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
+    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
+    fileConfig(config.config_file_name, disable_existing_loggers=False)

 target_metadata = [Base.metadata, ResultModelBase.metadata]

@@ -460,8 +462,49 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    logger.info("run_migrations_online starting.")
-    asyncio.run(run_async_migrations())
+    """Run migrations in 'online' mode.
+
+    Supports pytest-alembic by checking for a pre-configured connection
+    in context.config.attributes["connection"]. If present, uses that
+    connection/engine directly instead of creating a new async engine.
+    """
+    # Check if pytest-alembic is providing a connection/engine
+    connectable = context.config.attributes.get("connection", None)
+
+    if connectable is not None:
+        # pytest-alembic is providing an engine - use it directly
+        logger.info("run_migrations_online starting (pytest-alembic mode).")
+
+        # For pytest-alembic, we use the default schema (public)
+        schema_name = context.config.attributes.get(
+            "schema_name", POSTGRES_DEFAULT_SCHEMA
+        )
+
+        # pytest-alembic passes an Engine, we need to get a connection from it
+        with connectable.connect() as connection:
+            # Set search path for the schema
+            connection.execute(text(f'SET search_path TO "{schema_name}"'))
+
+            context.configure(
+                connection=connection,
+                target_metadata=target_metadata,  # type: ignore
+                include_object=include_object,
+                version_table_schema=schema_name,
+                include_schemas=True,
+                compare_type=True,
+                compare_server_default=True,
+                script_location=config.get_main_option("script_location"),
+            )
+
+            with context.begin_transaction():
+                context.run_migrations()
+
+            # Commit the transaction to ensure changes are visible to next migration
+            connection.commit()
+    else:
+        # Normal operation - use async migrations
+        logger.info("run_migrations_online starting.")
+        asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py
+++ b/backend/alembic/versions/18b5b2524446_add_is_clarification_to_chat_message.py
@@ -0,0 +1,29 @@
+"""add is_clarification to chat_message
+
+Revision ID: 18b5b2524446
+Revises: 87c52ec39f84
+Create Date: 2025-01-16
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "18b5b2524446"
+down_revision = "87c52ec39f84"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "is_clarification", sa.Boolean(), nullable=False, server_default="false"
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "is_clarification")
--- a/backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py
+++ b/backend/alembic/versions/1f2a3b4c5d6e_add_internet_search_and_content_providers.py
@@ -0,0 +1,89 @@
+"""add internet search and content provider tables
+
+Revision ID: 1f2a3b4c5d6e
+Revises: 9drpiiw74ljy
+Create Date: 2025-11-10 19:45:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+
+# revision identifiers, used by Alembic.
+revision = "1f2a3b4c5d6e"
+down_revision = "9drpiiw74ljy"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "internet_search_provider",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("name", sa.String(), nullable=False, unique=True),
+        sa.Column("provider_type", sa.String(), nullable=False),
+        sa.Column("api_key", sa.LargeBinary(), nullable=True),
+        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column(
+            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
+        ),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+    )
+    op.create_index(
+        "ix_internet_search_provider_is_active",
+        "internet_search_provider",
+        ["is_active"],
+    )
+
+    op.create_table(
+        "internet_content_provider",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("name", sa.String(), nullable=False, unique=True),
+        sa.Column("provider_type", sa.String(), nullable=False),
+        sa.Column("api_key", sa.LargeBinary(), nullable=True),
+        sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column(
+            "is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
+        ),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+    )
+    op.create_index(
+        "ix_internet_content_provider_is_active",
+        "internet_content_provider",
+        ["is_active"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_internet_content_provider_is_active", table_name="internet_content_provider"
+    )
+    op.drop_table("internet_content_provider")
+    op.drop_index(
+        "ix_internet_search_provider_is_active", table_name="internet_search_provider"
+    )
+    op.drop_table("internet_search_provider")
--- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
+++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "23957775e5f5"
 down_revision = "bc9771dccadf"
-branch_labels = None  # type: ignore
-depends_on = None  # type: ignore
+branch_labels = None
+depends_on = None


 def upgrade() -> None:
--- a/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
+++ b/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
@@ -0,0 +1,27 @@
+"""add last refreshed at mcp server
+
+Revision ID: 2a391f840e85
+Revises: 4cebcbc9b2ae
+Create Date: 2025-12-06 15:19:59.766066
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembi.
+revision = "2a391f840e85"
+down_revision = "4cebcbc9b2ae"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "mcp_server",
+        sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("mcp_server", "last_refreshed_at")
--- a/backend/alembic/versions/2b90f3af54b8_usage_limits.py
+++ b/backend/alembic/versions/2b90f3af54b8_usage_limits.py
@@ -0,0 +1,46 @@
+"""usage_limits
+
+Revision ID: 2b90f3af54b8
+Revises: 9a0296d7421e
+Create Date: 2026-01-03 16:55:30.449692
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2b90f3af54b8"
+down_revision = "9a0296d7421e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tenant_usage",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "window_start", sa.DateTime(timezone=True), nullable=False, index=True
+        ),
+        sa.Column("llm_cost_cents", sa.Float(), nullable=False, server_default="0.0"),
+        sa.Column("chunks_indexed", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("api_calls", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column(
+            "non_streaming_api_calls", sa.Integer(), nullable=False, server_default="0"
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=True,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("window_start", name="uq_tenant_usage_window"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_tenant_usage_window_start", table_name="tenant_usage")
+    op.drop_table("tenant_usage")
--- a/backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py
+++ b/backend/alembic/versions/3c9a65f1207f_seed_exa_provider_from_env.py
@@ -0,0 +1,89 @@
+"""seed_exa_provider_from_env
+
+Revision ID: 3c9a65f1207f
+Revises: 1f2a3b4c5d6e
+Create Date: 2025-11-20 19:18:00.000000
+
+"""
+
+from __future__ import annotations
+
+import os
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from dotenv import load_dotenv, find_dotenv
+
+from onyx.utils.encryption import encrypt_string_to_bytes
+
+revision = "3c9a65f1207f"
+down_revision = "1f2a3b4c5d6e"
+branch_labels = None
+depends_on = None
+
+
+EXA_PROVIDER_NAME = "Exa"
+
+
+def _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:
+    return sa.Table(
+        "internet_search_provider",
+        metadata,
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column("name", sa.String),
+        sa.Column("provider_type", sa.String),
+        sa.Column("api_key", sa.LargeBinary),
+        sa.Column("config", postgresql.JSONB),
+        sa.Column("is_active", sa.Boolean),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+        sa.Column(
+            "time_updated",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("now()"),
+        ),
+    )
+
+
+def upgrade() -> None:
+    load_dotenv(find_dotenv())
+
+    exa_api_key = os.environ.get("EXA_API_KEY")
+    if not exa_api_key:
+        return
+
+    bind = op.get_bind()
+    metadata = sa.MetaData()
+    table = _get_internet_search_table(metadata)
+
+    existing = bind.execute(
+        sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)
+    ).first()
+    if existing:
+        return
+
+    encrypted_key = encrypt_string_to_bytes(exa_api_key)
+
+    has_active_provider = bind.execute(
+        sa.select(table.c.id).where(table.c.is_active.is_(True))
+    ).first()
+
+    bind.execute(
+        table.insert().values(
+            name=EXA_PROVIDER_NAME,
+            provider_type="exa",
+            api_key=encrypted_key,
+            config=None,
+            is_active=not bool(has_active_provider),
+        )
+    )
+
+
+def downgrade() -> None:
+    return
--- a/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
+++ b/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.llm_provider_options import (
+from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
+++ b/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
@@ -0,0 +1,27 @@
+"""add tab_index to tool_call
+
+Revision ID: 4cebcbc9b2ae
+Revises: a1b2c3d4e5f6
+Create Date: 2025-12-16
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "4cebcbc9b2ae"
+down_revision = "a1b2c3d4e5f6"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "tool_call",
+        sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("tool_call", "tab_index")
--- a/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
+++ b/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
@@ -62,6 +62,11 @@ def upgrade() -> None:
    )
    """
    )
+
+    # Drop the temporary table to avoid conflicts if migration runs again
+    # (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
+    op.execute("DROP TABLE IF EXISTS temp_connector_credential")
+
    # If no exception was raised, alter the column
    op.alter_column("credential", "source", nullable=True)  # TODO modify
    # # ### end Alembic commands ###
--- a/backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
+++ b/backend/alembic/versions/4f8a2b3c1d9e_add_open_url_tool.py
@@ -0,0 +1,104 @@
+"""add_open_url_tool
+
+Revision ID: 4f8a2b3c1d9e
+Revises: a852cbe15577
+Create Date: 2025-11-24 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "4f8a2b3c1d9e"
+down_revision = "a852cbe15577"
+branch_labels = None
+depends_on = None
+
+
+OPEN_URL_TOOL = {
+    "name": "OpenURLTool",
+    "display_name": "Open URL",
+    "description": (
+        "The Open URL Action allows the agent to fetch and read contents of web pages."
+    ),
+    "in_code_tool_id": "OpenURLTool",
+    "enabled": True,
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Check if tool already exists
+    existing = conn.execute(
+        sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
+        {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
+    ).fetchone()
+
+    if existing:
+        tool_id = existing[0]
+        # Update existing tool
+        conn.execute(
+            sa.text(
+                """
+                UPDATE tool
+                SET name = :name,
+                    display_name = :display_name,
+                    description = :description
+                WHERE in_code_tool_id = :in_code_tool_id
+                """
+            ),
+            OPEN_URL_TOOL,
+        )
+    else:
+        # Insert new tool
+        conn.execute(
+            sa.text(
+                """
+                INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
+                VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
+                """
+            ),
+            OPEN_URL_TOOL,
+        )
+        # Get the newly inserted tool's id
+        result = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
+            {"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
+        ).fetchone()
+        tool_id = result[0]  # type: ignore
+
+    # Associate the tool with all existing personas
+    # Get all persona IDs
+    persona_ids = conn.execute(sa.text("SELECT id FROM persona")).fetchall()
+
+    for (persona_id,) in persona_ids:
+        # Check if association already exists
+        exists = conn.execute(
+            sa.text(
+                """
+                SELECT 1 FROM persona__tool
+                WHERE persona_id = :persona_id AND tool_id = :tool_id
+                """
+            ),
+            {"persona_id": persona_id, "tool_id": tool_id},
+        ).fetchone()
+
+        if not exists:
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO persona__tool (persona_id, tool_id)
+                    VALUES (:persona_id, :tool_id)
+                    """
+                ),
+                {"persona_id": persona_id, "tool_id": tool_id},
+            )
+
+
+def downgrade() -> None:
+    # We don't remove the tool on downgrade since it's fine to have it around.
+    # If we upgrade again, it will be a no-op.
+    pass
--- a/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
+++ b/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
@@ -0,0 +1,35 @@
+"""backend driven notification details
+
+Revision ID: 5c3dca366b35
+Revises: 9087b548dd69
+Create Date: 2026-01-06 16:03:11.413724
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "5c3dca366b35"
+down_revision = "9087b548dd69"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "notification",
+        sa.Column(
+            "title", sa.String(), nullable=False, server_default="New Notification"
+        ),
+    )
+    op.add_column(
+        "notification",
+        sa.Column("description", sa.String(), nullable=True, server_default=""),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("notification", "title")
+    op.drop_column("notification", "description")
--- a/backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py
+++ b/backend/alembic/versions/5e6f7a8b9c0d_update_default_persona_prompt.py
@@ -0,0 +1,55 @@
+"""update_default_persona_prompt
+
+Revision ID: 5e6f7a8b9c0d
+Revises: 4f8a2b3c1d9e
+Create Date: 2025-11-30 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "5e6f7a8b9c0d"
+down_revision = "4f8a2b3c1d9e"
+branch_labels = None
+depends_on = None
+
+
+DEFAULT_PERSONA_ID = 0
+
+# ruff: noqa: E501, W605 start
+DEFAULT_SYSTEM_PROMPT = """
+You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
+
+The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}
+
+# Response Style
+You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
+You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
+For code you prefer to use Markdown and specify the language.
+You can use horizontal rules (---) to separate sections of your responses.
+You can use Markdown tables to format your responses for data, lists, and other structured information.
+""".lstrip()
+# ruff: noqa: E501, W605 end
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = :system_prompt
+            WHERE id = :persona_id
+            """
+        ),
+        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
+    )
+
+
+def downgrade() -> None:
+    # We don't revert the system prompt on downgrade since we don't know
+    # what the previous value was. The new prompt is a reasonable default.
+    pass
--- a/backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py
+++ b/backend/alembic/versions/6436661d5b65_add_created_at_in_project_userfile.py
@@ -0,0 +1,44 @@
+"""add_created_at_in_project_userfile
+
+Revision ID: 6436661d5b65
+Revises: c7e9f4a3b2d1
+Create Date: 2025-11-24 11:50:24.536052
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "6436661d5b65"
+down_revision = "c7e9f4a3b2d1"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add created_at column to project__user_file table
+    op.add_column(
+        "project__user_file",
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+    )
+    # Add composite index on (project_id, created_at DESC)
+    op.create_index(
+        "ix_project__user_file_project_id_created_at",
+        "project__user_file",
+        ["project_id", sa.text("created_at DESC")],
+    )
+
+
+def downgrade() -> None:
+    # Remove composite index on (project_id, created_at)
+    op.drop_index(
+        "ix_project__user_file_project_id_created_at", table_name="project__user_file"
+    )
+    # Remove created_at column from project__user_file table
+    op.drop_column("project__user_file", "created_at")
--- a/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
+++ b/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
@@ -0,0 +1,75 @@
+"""nullify_default_task_prompt
+
+Revision ID: 699221885109
+Revises: 7e490836d179
+Create Date: 2025-12-30 10:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "699221885109"
+down_revision = "7e490836d179"
+branch_labels = None
+depends_on = None
+
+DEFAULT_PERSONA_ID = 0
+
+
+def upgrade() -> None:
+    # Make task_prompt column nullable
+    # Note: The model had nullable=True but the DB column was NOT NULL until this point
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        nullable=True,
+    )
+
+    # Set task_prompt to NULL for the default persona
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = NULL
+            WHERE id = :persona_id
+            """
+        ),
+        {"persona_id": DEFAULT_PERSONA_ID},
+    )
+
+
+def downgrade() -> None:
+    # Restore task_prompt to empty string for the default persona
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = ''
+            WHERE id = :persona_id AND task_prompt IS NULL
+            """
+        ),
+        {"persona_id": DEFAULT_PERSONA_ID},
+    )
+
+    # Set any remaining NULL task_prompts to empty string before making non-nullable
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = ''
+            WHERE task_prompt IS NULL
+            """
+        )
+    )
+
+    # Revert task_prompt column to not nullable
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        nullable=False,
+    )
--- a/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
+++ b/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
@@ -0,0 +1,54 @@
+"""add image generation config table
+
+Revision ID: 7206234e012a
+Revises: 699221885109
+Create Date: 2025-12-21 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "7206234e012a"
+down_revision = "699221885109"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "image_generation_config",
+        sa.Column("image_provider_id", sa.String(), primary_key=True),
+        sa.Column("model_configuration_id", sa.Integer(), nullable=False),
+        sa.Column("is_default", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["model_configuration_id"],
+            ["model_configuration.id"],
+            ondelete="CASCADE",
+        ),
+    )
+    op.create_index(
+        "ix_image_generation_config_is_default",
+        "image_generation_config",
+        ["is_default"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_image_generation_config_model_configuration_id",
+        "image_generation_config",
+        ["model_configuration_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_image_generation_config_model_configuration_id",
+        table_name="image_generation_config",
+    )
+    op.drop_index(
+        "ix_image_generation_config_is_default", table_name="image_generation_config"
+    )
+    op.drop_table("image_generation_config")
--- a/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
+++ b/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
@@ -10,7 +10,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.llm_provider_options import (
+from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py
+++ b/backend/alembic/versions/7bd55f264e1b_add_display_name_to_model_configuration.py
@@ -0,0 +1,27 @@
+"""Add display_name to model_configuration
+
+Revision ID: 7bd55f264e1b
+Revises: e8f0d2a38171
+Create Date: 2025-12-04
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "7bd55f264e1b"
+down_revision = "e8f0d2a38171"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "model_configuration",
+        sa.Column("display_name", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("model_configuration", "display_name")
--- a/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
+++ b/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
@@ -0,0 +1,80 @@
+"""nullify_default_system_prompt
+
+Revision ID: 7e490836d179
+Revises: c1d2e3f4a5b6
+Create Date: 2025-12-29 16:54:36.635574
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "7e490836d179"
+down_revision = "c1d2e3f4a5b6"
+branch_labels = None
+depends_on = None
+
+
+# This is the default system prompt from the previous migration (87c52ec39f84)
+# ruff: noqa: E501, W605 start
+PREVIOUS_DEFAULT_SYSTEM_PROMPT = """
+You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
+
+The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
+
+# Response Style
+You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
+You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
+For code you prefer to use Markdown and specify the language.
+You can use horizontal rules (---) to separate sections of your responses.
+You can use Markdown tables to format your responses for data, lists, and other structured information.
+""".lstrip()
+# ruff: noqa: E501, W605 end
+
+
+def upgrade() -> None:
+    # Make system_prompt column nullable (model already has nullable=True but DB doesn't)
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        nullable=True,
+    )
+
+    # Set system_prompt to NULL where it matches the previous default
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = NULL
+            WHERE system_prompt = :previous_default
+            """
+        ),
+        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
+    )
+
+
+def downgrade() -> None:
+    # Restore the default system prompt for personas that have NULL
+    # Note: This may restore the prompt to personas that originally had NULL
+    # before this migration, but there's no way to distinguish them
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = :previous_default
+            WHERE system_prompt IS NULL
+            """
+        ),
+        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
+    )
+
+    # Revert system_prompt column to not nullable
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        nullable=False,
+    )
--- a/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
+++ b/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
@@ -42,13 +42,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
    )
@@ -63,13 +63,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
--- a/backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py
+++ b/backend/alembic/versions/87c52ec39f84_update_default_system_prompt.py
@@ -0,0 +1,55 @@
+"""update_default_system_prompt
+
+Revision ID: 87c52ec39f84
+Revises: 7bd55f264e1b
+Create Date: 2025-12-05 15:54:06.002452
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "87c52ec39f84"
+down_revision = "7bd55f264e1b"
+branch_labels = None
+depends_on = None
+
+
+DEFAULT_PERSONA_ID = 0
+
+# ruff: noqa: E501, W605 start
+DEFAULT_SYSTEM_PROMPT = """
+You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
+
+The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
+
+# Response Style
+You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
+You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
+For code you prefer to use Markdown and specify the language.
+You can use horizontal rules (---) to separate sections of your responses.
+You can use Markdown tables to format your responses for data, lists, and other structured information.
+""".lstrip()
+# ruff: noqa: E501, W605 end
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = :system_prompt
+            WHERE id = :persona_id
+            """
+        ),
+        {"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
+    )
+
+
+def downgrade() -> None:
+    # We don't revert the system prompt on downgrade since we don't know
+    # what the previous value was. The new prompt is a reasonable default.
+    pass
--- a/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
+++ b/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
@@ -0,0 +1,136 @@
+"""seed_default_image_gen_config
+
+Revision ID: 9087b548dd69
+Revises: 2b90f3af54b8
+Create Date: 2026-01-05 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9087b548dd69"
+down_revision = "2b90f3af54b8"
+branch_labels = None
+depends_on = None
+
+# Constants for default image generation config
+# Source: web/src/app/admin/configuration/image-generation/constants.ts
+IMAGE_PROVIDER_ID = "openai_gpt_image_1"
+MODEL_NAME = "gpt-image-1"
+PROVIDER_NAME = "openai"
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Check if image_generation_config table already has records
+    existing_configs = (
+        conn.execute(sa.text("SELECT COUNT(*) FROM image_generation_config")).scalar()
+        or 0
+    )
+
+    if existing_configs > 0:
+        # Skip if configs already exist - user may have configured manually
+        return
+
+    # Find the first OpenAI LLM provider
+    openai_provider = conn.execute(
+        sa.text(
+            """
+            SELECT id, api_key
+            FROM llm_provider
+            WHERE provider = :provider
+            ORDER BY id
+            LIMIT 1
+            """
+        ),
+        {"provider": PROVIDER_NAME},
+    ).fetchone()
+
+    if not openai_provider:
+        # No OpenAI provider found - nothing to do
+        return
+
+    source_provider_id, api_key = openai_provider
+
+    # Create new LLM provider for image generation (clone only api_key)
+    result = conn.execute(
+        sa.text(
+            """
+            INSERT INTO llm_provider (
+                name, provider, api_key, api_base, api_version,
+                deployment_name, default_model_name, is_public,
+                is_default_provider, is_default_vision_provider, is_auto_mode
+            )
+            VALUES (
+                :name, :provider, :api_key, NULL, NULL,
+                NULL, :default_model_name, :is_public,
+                NULL, NULL, :is_auto_mode
+            )
+            RETURNING id
+            """
+        ),
+        {
+            "name": f"Image Gen - {IMAGE_PROVIDER_ID}",
+            "provider": PROVIDER_NAME,
+            "api_key": api_key,
+            "default_model_name": MODEL_NAME,
+            "is_public": True,
+            "is_auto_mode": False,
+        },
+    )
+    new_provider_id = result.scalar()
+
+    # Create model configuration
+    result = conn.execute(
+        sa.text(
+            """
+            INSERT INTO model_configuration (
+                llm_provider_id, name, is_visible, max_input_tokens,
+                supports_image_input, display_name
+            )
+            VALUES (
+                :llm_provider_id, :name, :is_visible, :max_input_tokens,
+                :supports_image_input, :display_name
+            )
+            RETURNING id
+            """
+        ),
+        {
+            "llm_provider_id": new_provider_id,
+            "name": MODEL_NAME,
+            "is_visible": True,
+            "max_input_tokens": None,
+            "supports_image_input": False,
+            "display_name": None,
+        },
+    )
+    model_config_id = result.scalar()
+
+    # Create image generation config
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO image_generation_config (
+                image_provider_id, model_configuration_id, is_default
+            )
+            VALUES (
+                :image_provider_id, :model_configuration_id, :is_default
+            )
+            """
+        ),
+        {
+            "image_provider_id": IMAGE_PROVIDER_ID,
+            "model_configuration_id": model_config_id,
+            "is_default": True,
+        },
+    )
+
+
+def downgrade() -> None:
+    # We don't remove the config on downgrade since it's safe to keep around
+    # If we upgrade again, it will be a no-op due to the existing records check
+    pass
--- a/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
+++ b/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
@@ -0,0 +1,33 @@
+"""add_is_auto_mode_to_llm_provider
+
+Revision ID: 9a0296d7421e
+Revises: 7206234e012a
+Create Date: 2025-12-17 18:14:29.620981
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9a0296d7421e"
+down_revision = "7206234e012a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "llm_provider",
+        sa.Column(
+            "is_auto_mode",
+            sa.Boolean(),
+            nullable=False,
+            server_default="false",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("llm_provider", "is_auto_mode")
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -234,6 +234,8 @@ def downgrade() -> None:
        if "instructions" in columns:
            op.drop_column("user_project", "instructions")
        op.execute("ALTER TABLE user_project RENAME TO user_folder")
+        # Update NULL descriptions to empty string before setting NOT NULL constraint
+        op.execute("UPDATE user_folder SET description = '' WHERE description IS NULL")
        op.alter_column("user_folder", "description", nullable=False)
        logger.info("Renamed user_project back to user_folder")

--- a/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
+++ b/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
@@ -0,0 +1,62 @@
+"""update_default_tool_descriptions
+
+Revision ID: a01bf2971c5d
+Revises: 87c52ec39f84
+Create Date: 2025-12-16 15:21:25.656375
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "a01bf2971c5d"
+down_revision = "18b5b2524446"
+branch_labels = None
+depends_on = None
+
+# new tool descriptions (12/2025)
+TOOL_DESCRIPTIONS = {
+    "SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
+    "ImageGenerationTool": (
+        "The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
+        "The action will be used when the user asks the agent to generate an image."
+    ),
+    "WebSearchTool": (
+        "The Web Search Action allows the agent "
+        "to perform internet searches for up-to-date information."
+    ),
+    "KnowledgeGraphTool": (
+        "The Knowledge Graph Search Action allows the agent to search the "
+        "Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
+        "and it requires the Knowledge Graph to be enabled."
+    ),
+    "OktaProfileTool": (
+        "The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
+        "This may include the user's name, email, phone number, address, and other details such as their "
+        "manager and direct reports."
+    ),
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        for tool_id, description in TOOL_DESCRIPTIONS.items():
+            conn.execute(
+                sa.text(
+                    "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
+                ),
+                {"description": description, "tool_id": tool_id},
+            )
+        conn.execute(sa.text("COMMIT"))
+    except Exception as e:
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
+++ b/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
@@ -0,0 +1,49 @@
+"""add license table
+
+Revision ID: a1b2c3d4e5f6
+Revises: a01bf2971c5d
+Create Date: 2025-12-04 10:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a1b2c3d4e5f6"
+down_revision = "a01bf2971c5d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "license",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("license_data", sa.Text(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+
+    # Singleton pattern - only ever one row in this table
+    op.create_index(
+        "idx_license_singleton",
+        "license",
+        [sa.text("(true)")],
+        unique=True,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("idx_license_singleton", table_name="license")
+    op.drop_table("license")
--- a/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
+++ b/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
@@ -0,0 +1,27 @@
+"""Remove fast_default_model_name from llm_provider
+
+Revision ID: a2b3c4d5e6f7
+Revises: 2a391f840e85
+Create Date: 2024-12-17
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a2b3c4d5e6f7"
+down_revision = "2a391f840e85"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.drop_column("llm_provider", "fast_default_model_name")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "llm_provider",
+        sa.Column("fast_default_model_name", sa.String(), nullable=True),
+    )
--- a/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
+++ b/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
@@ -0,0 +1,39 @@
+"""remove userfile related deprecated fields
+
+Revision ID: a3c1a7904cd0
+Revises: 5c3dca366b35
+Create Date: 2026-01-06 13:00:30.634396
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a3c1a7904cd0"
+down_revision = "5c3dca366b35"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_column("user_file", "document_id")
+    op.drop_column("user_file", "document_id_migrated")
+    op.drop_column("connector_credential_pair", "is_user_file")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column("is_user_file", sa.Boolean(), nullable=False, server_default="false"),
+    )
+    op.add_column(
+        "user_file",
+        sa.Column("document_id", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "user_file",
+        sa.Column(
+            "document_id_migrated", sa.Boolean(), nullable=False, server_default="true"
+        ),
+    )
--- a/backend/alembic/versions/a852cbe15577_new_chat_history.py
+++ b/backend/alembic/versions/a852cbe15577_new_chat_history.py
@@ -0,0 +1,425 @@
+"""New Chat History
+
+Revision ID: a852cbe15577
+Revises: 6436661d5b65
+Create Date: 2025-11-08 15:16:37.781308
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "a852cbe15577"
+down_revision = "6436661d5b65"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # 1. Drop old research/agent tables (CASCADE handles dependencies)
+    op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
+    op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
+    op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
+    op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
+    op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")
+
+    # 2. ChatMessage table changes
+    # Rename columns and add FKs
+    op.alter_column(
+        "chat_message", "parent_message", new_column_name="parent_message_id"
+    )
+    op.create_foreign_key(
+        "fk_chat_message_parent_message_id",
+        "chat_message",
+        "chat_message",
+        ["parent_message_id"],
+        ["id"],
+    )
+    op.alter_column(
+        "chat_message",
+        "latest_child_message",
+        new_column_name="latest_child_message_id",
+    )
+    op.create_foreign_key(
+        "fk_chat_message_latest_child_message_id",
+        "chat_message",
+        "chat_message",
+        ["latest_child_message_id"],
+        ["id"],
+    )
+
+    # Add new column
+    op.add_column(
+        "chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
+    )
+
+    # Drop old columns
+    op.drop_column("chat_message", "rephrased_query")
+    op.drop_column("chat_message", "alternate_assistant_id")
+    op.drop_column("chat_message", "overridden_model")
+    op.drop_column("chat_message", "is_agentic")
+    op.drop_column("chat_message", "refined_answer_improvement")
+    op.drop_column("chat_message", "research_type")
+    op.drop_column("chat_message", "research_plan")
+    op.drop_column("chat_message", "research_answer_purpose")
+
+    # 3. ToolCall table changes
+    # Drop the unique constraint first
+    op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")
+
+    # Delete orphaned tool_call rows (those without valid chat_message)
+    op.execute(
+        "DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)"
+    )
+
+    # Add chat_session_id as nullable first, populate, then make NOT NULL
+    op.add_column(
+        "tool_call",
+        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=True),
+    )
+
+    # Populate chat_session_id from the related chat_message
+    op.execute(
+        """
+        UPDATE tool_call
+        SET chat_session_id = chat_message.chat_session_id
+        FROM chat_message
+        WHERE tool_call.message_id = chat_message.id
+    """
+    )
+
+    # Now make it NOT NULL and add FK
+    op.alter_column("tool_call", "chat_session_id", nullable=False)
+    op.create_foreign_key(
+        "fk_tool_call_chat_session_id",
+        "tool_call",
+        "chat_session",
+        ["chat_session_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Rename message_id and make nullable, recreate FK with CASCADE
+    op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
+    op.alter_column(
+        "tool_call",
+        "message_id",
+        new_column_name="parent_chat_message_id",
+        nullable=True,
+    )
+    op.create_foreign_key(
+        "fk_tool_call_parent_chat_message_id",
+        "tool_call",
+        "chat_message",
+        ["parent_chat_message_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Add parent_tool_call_id with FK
+    op.add_column(
+        "tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
+    )
+    op.create_foreign_key(
+        "fk_tool_call_parent_tool_call_id",
+        "tool_call",
+        "tool_call",
+        ["parent_tool_call_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    # Add other new columns
+    op.add_column(
+        "tool_call",
+        sa.Column("turn_number", sa.Integer(), nullable=False, server_default="0"),
+    )
+    op.add_column(
+        "tool_call",
+        sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
+    )
+    op.add_column("tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True))
+    op.add_column(
+        "tool_call",
+        sa.Column("tool_call_tokens", sa.Integer(), nullable=False, server_default="0"),
+    )
+    op.add_column(
+        "tool_call",
+        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
+    )
+
+    # Rename columns
+    op.alter_column(
+        "tool_call", "tool_arguments", new_column_name="tool_call_arguments"
+    )
+    op.alter_column("tool_call", "tool_result", new_column_name="tool_call_response")
+
+    # Change tool_call_response type from JSONB to Text
+    op.execute(
+        """
+        ALTER TABLE tool_call
+        ALTER COLUMN tool_call_response TYPE TEXT
+        USING tool_call_response::text
+    """
+    )
+
+    # Drop old columns
+    op.drop_column("tool_call", "tool_name")
+
+    # 4. Create new association table
+    op.create_table(
+        "tool_call__search_doc",
+        sa.Column("tool_call_id", sa.Integer(), nullable=False),
+        sa.Column("search_doc_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(
+            ["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
+    )
+
+    # 5. Persona table change
+    op.add_column(
+        "persona",
+        sa.Column(
+            "replace_base_system_prompt",
+            sa.Boolean(),
+            nullable=False,
+            server_default="false",
+        ),
+    )
+
+
+def downgrade() -> None:
+    # Reverse persona changes
+    op.drop_column("persona", "replace_base_system_prompt")
+
+    # Drop new association table
+    op.drop_table("tool_call__search_doc")
+
+    # Reverse ToolCall changes
+    op.add_column(
+        "tool_call",
+        sa.Column("tool_name", sa.String(), nullable=False, server_default=""),
+    )
+
+    # Change tool_call_response back to JSONB
+    op.execute(
+        """
+        ALTER TABLE tool_call
+        ALTER COLUMN tool_call_response TYPE JSONB
+        USING tool_call_response::jsonb
+    """
+    )
+
+    op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
+    op.alter_column(
+        "tool_call", "tool_call_arguments", new_column_name="tool_arguments"
+    )
+
+    op.drop_column("tool_call", "generated_images")
+    op.drop_column("tool_call", "tool_call_tokens")
+    op.drop_column("tool_call", "reasoning_tokens")
+    op.drop_column("tool_call", "tool_call_id")
+    op.drop_column("tool_call", "turn_number")
+
+    op.drop_constraint(
+        "fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
+    )
+    op.drop_column("tool_call", "parent_tool_call_id")
+
+    op.drop_constraint(
+        "fk_tool_call_parent_chat_message_id", "tool_call", type_="foreignkey"
+    )
+    op.alter_column(
+        "tool_call",
+        "parent_chat_message_id",
+        new_column_name="message_id",
+        nullable=False,
+    )
+    op.create_foreign_key(
+        "tool_call_message_id_fkey",
+        "tool_call",
+        "chat_message",
+        ["message_id"],
+        ["id"],
+    )
+
+    op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
+    op.drop_column("tool_call", "chat_session_id")
+
+    op.create_unique_constraint("uq_tool_call_message_id", "tool_call", ["message_id"])
+
+    # Reverse ChatMessage changes
+    # Note: research_answer_purpose and research_type were originally String columns,
+    # not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)
+    op.add_column(
+        "chat_message",
+        sa.Column("research_answer_purpose", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("research_type", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("refined_answer_improvement", sa.Boolean(), nullable=True),
+    )
+    op.add_column(
+        "chat_message",
+        sa.Column("is_agentic", sa.Boolean(), nullable=False, server_default="false"),
+    )
+    op.add_column(
+        "chat_message", sa.Column("overridden_model", sa.String(), nullable=True)
+    )
+    op.add_column(
+        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
+    )
+    # Recreate the FK constraint that was implicitly dropped when the column was dropped
+    op.create_foreign_key(
+        "fk_chat_message_persona",
+        "chat_message",
+        "persona",
+        ["alternate_assistant_id"],
+        ["id"],
+    )
+    op.add_column(
+        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
+    )
+
+    op.drop_column("chat_message", "reasoning_tokens")
+
+    op.drop_constraint(
+        "fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
+    )
+    op.alter_column(
+        "chat_message",
+        "latest_child_message_id",
+        new_column_name="latest_child_message",
+    )
+
+    op.drop_constraint(
+        "fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
+    )
+    op.alter_column(
+        "chat_message", "parent_message_id", new_column_name="parent_message"
+    )
+
+    # Recreate agent sub question and sub query tables
+    op.create_table(
+        "agent__sub_question",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("primary_question_id", sa.Integer(), nullable=False),
+        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("sub_question", sa.Text(), nullable=False),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("level_question_num", sa.Integer(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("sub_answer", sa.Text(), nullable=False),
+        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    op.create_table(
+        "agent__sub_query",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("parent_question_id", sa.Integer(), nullable=False),
+        sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("sub_query", sa.Text(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["parent_question_id"], ["agent__sub_question.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    op.create_table(
+        "agent__sub_query__search_doc",
+        sa.Column("sub_query_id", sa.Integer(), nullable=False),
+        sa.Column("search_doc_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["sub_query_id"], ["agent__sub_query.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["search_doc_id"], ["search_doc.id"]),
+        sa.PrimaryKeyConstraint("sub_query_id", "search_doc_id"),
+    )
+
+    # Recreate research agent tables
+    op.create_table(
+        "research_agent_iteration",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("primary_question_id", sa.Integer(), nullable=False),
+        sa.Column("iteration_nr", sa.Integer(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("purpose", sa.String(), nullable=True),
+        sa.Column("reasoning", sa.String(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "primary_question_id",
+            "iteration_nr",
+            name="_research_agent_iteration_unique_constraint",
+        ),
+    )
+
+    op.create_table(
+        "research_agent_iteration_sub_step",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("primary_question_id", sa.Integer(), nullable=False),
+        sa.Column("iteration_nr", sa.Integer(), nullable=False),
+        sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("sub_step_instructions", sa.String(), nullable=True),
+        sa.Column("sub_step_tool_id", sa.Integer(), nullable=True),
+        sa.Column("reasoning", sa.String(), nullable=True),
+        sa.Column("sub_answer", sa.String(), nullable=True),
+        sa.Column("cited_doc_results", postgresql.JSONB(), nullable=False),
+        sa.Column("claims", postgresql.JSONB(), nullable=True),
+        sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
+        sa.Column("queries", postgresql.JSONB(), nullable=True),
+        sa.Column("generated_images", postgresql.JSONB(), nullable=True),
+        sa.Column("additional_data", postgresql.JSONB(), nullable=True),
+        sa.Column("file_ids", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["primary_question_id", "iteration_nr"],
+            [
+                "research_agent_iteration.primary_question_id",
+                "research_agent_iteration.iteration_nr",
+            ],
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(["sub_step_tool_id"], ["tool.id"], ondelete="SET NULL"),
+        sa.PrimaryKeyConstraint("id"),
+    )
--- a/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
+++ b/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
@@ -0,0 +1,46 @@
+"""Drop milestone table
+
+Revision ID: b8c9d0e1f2a3
+Revises: a2b3c4d5e6f7
+Create Date: 2025-12-18
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "b8c9d0e1f2a3"
+down_revision = "a2b3c4d5e6f7"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_table("milestone")
+
+
+def downgrade() -> None:
+    op.create_table(
+        "milestone",
+        sa.Column("id", sa.UUID(), nullable=False),
+        sa.Column("tenant_id", sa.String(), nullable=True),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("event_type", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
+    )
--- a/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
+++ b/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
@@ -0,0 +1,52 @@
+"""add_deep_research_tool
+
+Revision ID: c1d2e3f4a5b6
+Revises: b8c9d0e1f2a3
+Create Date: 2025-12-18 16:00:00.000000
+
+"""
+
+from alembic import op
+from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "c1d2e3f4a5b6"
+down_revision = "b8c9d0e1f2a3"
+branch_labels = None
+depends_on = None
+
+
+DEEP_RESEARCH_TOOL = {
+    "name": RESEARCH_AGENT_DB_NAME,
+    "display_name": "Research Agent",
+    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
+    "in_code_tool_id": "ResearchAgent",
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
+            VALUES (:name, :display_name, :description, :in_code_tool_id, false)
+            """
+        ),
+        DEEP_RESEARCH_TOOL,
+    )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            DELETE FROM tool
+            WHERE in_code_tool_id = :in_code_tool_id
+            """
+        ),
+        {"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
+    )
--- a/backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
+++ b/backend/alembic/versions/c7e9f4a3b2d1_add_python_tool.py
@@ -0,0 +1,73 @@
+"""add_python_tool
+
+Revision ID: c7e9f4a3b2d1
+Revises: 3c9a65f1207f
+Create Date: 2025-11-08 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+
+# revision identifiers, used by Alembic.
+revision = "c7e9f4a3b2d1"
+down_revision = "3c9a65f1207f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Add PythonTool to built-in tools"""
+    conn = op.get_bind()
+
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
+            VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
+            """
+        ),
+        {
+            "name": "PythonTool",
+            # in the UI, call it `Code Interpreter` since this is a well known term for this tool
+            "display_name": "Code Interpreter",
+            "description": (
+                "The Code Interpreter Action allows the assistant to execute "
+                "Python code in a secure, isolated environment for data analysis, "
+                "computation, visualization, and file processing."
+            ),
+            "in_code_tool_id": "PythonTool",
+            "enabled": True,
+        },
+    )
+
+    # needed to store files generated by the python tool
+    op.add_column(
+        "research_agent_iteration_sub_step",
+        sa.Column(
+            "file_ids",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    """Remove PythonTool from built-in tools"""
+    conn = op.get_bind()
+
+    conn.execute(
+        sa.text(
+            """
+            DELETE FROM tool
+            WHERE in_code_tool_id = :in_code_tool_id
+            """
+        ),
+        {
+            "in_code_tool_id": "PythonTool",
+        },
+    )
+
+    op.drop_column("research_agent_iteration_sub_step", "file_ids")
--- a/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
+++ b/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
@@ -257,8 +257,8 @@ def _migrate_files_to_external_storage() -> None:
            print(f"File {file_id} not found in PostgreSQL storage.")
            continue

-        lobj_id = cast(int, file_record.lobj_oid)  # type: ignore
-        file_metadata = cast(Any, file_record.file_metadata)  # type: ignore
+        lobj_id = cast(int, file_record.lobj_oid)
+        file_metadata = cast(Any, file_record.file_metadata)

        # Read file content from PostgreSQL
        try:
@@ -280,7 +280,7 @@ def _migrate_files_to_external_storage() -> None:
            else:
                # Convert other types to dict if possible, otherwise None
                try:
-                    file_metadata = dict(file_record.file_metadata)  # type: ignore
+                    file_metadata = dict(file_record.file_metadata)
                except (TypeError, ValueError):
                    file_metadata = None

--- a/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
+++ b/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa

 revision = "e209dc5a8156"
 down_revision = "48d14957fe80"
-branch_labels = None  # type: ignore
-depends_on = None  # type: ignore
+branch_labels = None
+depends_on = None


 def upgrade() -> None:
--- a/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
+++ b/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
@@ -0,0 +1,115 @@
+"""add status to mcp server and make auth fields nullable
+
+Revision ID: e8f0d2a38171
+Revises: ed9e44312505
+Create Date: 2025-11-28 11:15:37.667340
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from onyx.db.enums import (
+    MCPTransport,
+    MCPAuthenticationType,
+    MCPAuthenticationPerformer,
+    MCPServerStatus,
+)
+
+# revision identifiers, used by Alembic.
+revision = "e8f0d2a38171"
+down_revision = "ed9e44312505"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Make auth fields nullable
+    op.alter_column(
+        "mcp_server",
+        "transport",
+        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
+        nullable=True,
+    )
+
+    op.alter_column(
+        "mcp_server",
+        "auth_type",
+        existing_type=sa.Enum(
+            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
+        ),
+        nullable=True,
+    )
+
+    op.alter_column(
+        "mcp_server",
+        "auth_performer",
+        existing_type=sa.Enum(
+            MCPAuthenticationPerformer,
+            name="mcp_authentication_performer",
+            native_enum=False,
+        ),
+        nullable=True,
+    )
+
+    # Add status column with default
+    op.add_column(
+        "mcp_server",
+        sa.Column(
+            "status",
+            sa.Enum(MCPServerStatus, name="mcp_server_status", native_enum=False),
+            nullable=False,
+            server_default="CREATED",
+        ),
+    )
+
+    # For existing records, mark status as CONNECTED
+    bind = op.get_bind()
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server
+        SET status = 'CONNECTED'
+        WHERE status != 'CONNECTED'
+        and admin_connection_config_id IS NOT NULL
+        """
+        )
+    )
+
+
+def downgrade() -> None:
+    # Remove status column
+    op.drop_column("mcp_server", "status")
+
+    # Make auth fields non-nullable (set defaults first)
+    op.execute(
+        "UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL"
+    )
+    op.execute("UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL")
+    op.execute(
+        "UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL"
+    )
+
+    op.alter_column(
+        "mcp_server",
+        "transport",
+        existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
+        nullable=False,
+    )
+    op.alter_column(
+        "mcp_server",
+        "auth_type",
+        existing_type=sa.Enum(
+            MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
+        ),
+        nullable=False,
+    )
+    op.alter_column(
+        "mcp_server",
+        "auth_performer",
+        existing_type=sa.Enum(
+            MCPAuthenticationPerformer,
+            name="mcp_authentication_performer",
+            native_enum=False,
+        ),
+        nullable=False,
+    )
--- a/backend/alembic/versions/ed9e44312505_add_icon_name_field.py
+++ b/backend/alembic/versions/ed9e44312505_add_icon_name_field.py
@@ -0,0 +1,34 @@
+"""Add icon_name field
+
+Revision ID: ed9e44312505
+Revises: 5e6f7a8b9c0d
+Create Date: 2025-12-03 16:35:07.828393
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "ed9e44312505"
+down_revision = "5e6f7a8b9c0d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add icon_name column
+    op.add_column("persona", sa.Column("icon_name", sa.String(), nullable=True))
+
+    # Remove old icon columns
+    op.drop_column("persona", "icon_shape")
+    op.drop_column("persona", "icon_color")
+
+
+def downgrade() -> None:
+    # Re-add old icon columns
+    op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
+    op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))
+
+    # Remove icon_name column
+    op.drop_column("persona", "icon_name")
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -20,7 +20,9 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    fileConfig(config.config_file_name)
+    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
+    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
+    fileConfig(config.config_file_name, disable_existing_loggers=False)

 # add your model's MetaData object here
 # for 'autogenerate' support
@@ -82,9 +84,9 @@ def run_migrations_offline() -> None:
 def do_run_migrations(connection: Connection) -> None:
    context.configure(
        connection=connection,
-        target_metadata=target_metadata,  # type: ignore
+        target_metadata=target_metadata,  # type: ignore[arg-type]
        include_object=include_object,
-    )  # type: ignore
+    )

    with context.begin_transaction():
        context.run_migrations()
@@ -108,9 +110,24 @@ async def run_async_migrations() -> None:


 def run_migrations_online() -> None:
-    """Run migrations in 'online' mode."""
+    """Run migrations in 'online' mode.

-    asyncio.run(run_async_migrations())
+    Supports pytest-alembic by checking for a pre-configured connection
+    in context.config.attributes["connection"]. If present, uses that
+    connection/engine directly instead of creating a new async engine.
+    """
+    # Check if pytest-alembic is providing a connection/engine
+    connectable = context.config.attributes.get("connection", None)
+
+    if connectable is not None:
+        # pytest-alembic is providing an engine - use it directly
+        with connectable.connect() as connection:
+            do_run_migrations(connection)
+            # Commit to ensure changes are visible to next migration
+            connection.commit()
+    else:
+        # Normal operation - use async migrations
+        asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/docker-bake.hcl
+++ b/backend/docker-bake.hcl
@@ -1,27 +0,0 @@
-variable "REPOSITORY" {
-  default = "onyxdotapp/onyx-integration"
-}
-
-variable "TAG" {
-  default = "latest"
-}
-
-target "backend" {
-  context    = "."
-  dockerfile = "Dockerfile"
-}
-
-target "integration" {
-  context    = "."
-  dockerfile = "tests/integration/Dockerfile"
-
-  // Provide the base image via build context from the backend target
-  contexts = {
-    base = "target:backend"
-  }
-
-  cache-from = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache"]
-  cache-to   = ["type=registry,ref=${REPOSITORY}:integration-test-backend-cache,mode=max"]
-
-  tags      = ["${REPOSITORY}:${TAG}"]
-}
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -41,6 +41,10 @@ CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
 JIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(
    os.environ.get("JIRA_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
 )
+# In seconds, default is 30 minutes
+JIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(
+    os.environ.get("JIRA_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
+)


 #####
@@ -107,10 +111,6 @@ CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(
 STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")
 STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")

-OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY")
-ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY")
-COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")
-
 # JWT Public Key URL
 JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)

@@ -124,6 +124,8 @@ SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
 POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
 POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"

+MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
+
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 GATED_TENANTS_KEY = "gated_tenants"
--- a/backend/ee/onyx/db/analytics.py
+++ b/backend/ee/onyx/db/analytics.py
@@ -199,10 +199,7 @@ def fetch_persona_message_analytics(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            or_(
-                ChatMessage.alternate_assistant_id == persona_id,
-                ChatSession.persona_id == persona_id,
-            ),
+            ChatSession.persona_id == persona_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -231,10 +228,7 @@ def fetch_persona_unique_users(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            or_(
-                ChatMessage.alternate_assistant_id == persona_id,
-                ChatSession.persona_id == persona_id,
-            ),
+            ChatSession.persona_id == persona_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -265,10 +259,7 @@ def fetch_assistant_message_analytics(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            or_(
-                ChatMessage.alternate_assistant_id == assistant_id,
-                ChatSession.persona_id == assistant_id,
-            ),
+            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -299,10 +290,7 @@ def fetch_assistant_unique_users(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            or_(
-                ChatMessage.alternate_assistant_id == assistant_id,
-                ChatSession.persona_id == assistant_id,
-            ),
+            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
@@ -332,10 +320,7 @@ def fetch_assistant_unique_users_total(
            ChatMessage.chat_session_id == ChatSession.id,
        )
        .where(
-            or_(
-                ChatMessage.alternate_assistant_id == assistant_id,
-                ChatSession.persona_id == assistant_id,
-            ),
+            ChatSession.persona_id == assistant_id,
            ChatMessage.time_sent >= start,
            ChatMessage.time_sent <= end,
            ChatMessage.message_type == MessageType.ASSISTANT,
--- a/backend/ee/onyx/db/document_set.py
+++ b/backend/ee/onyx/db/document_set.py
@@ -118,6 +118,6 @@ def fetch_document_sets(
            .all()
        )

-        document_set_with_cc_pairs.append((document_set, cc_pairs))  # type: ignore
+        document_set_with_cc_pairs.append((document_set, cc_pairs))

    return document_set_with_cc_pairs
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -0,0 +1,278 @@
+"""Database and cache operations for the license table."""
+
+from datetime import datetime
+
+from sqlalchemy import func
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from ee.onyx.server.license.models import LicenseMetadata
+from ee.onyx.server.license.models import LicensePayload
+from ee.onyx.server.license.models import LicenseSource
+from onyx.db.models import License
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+LICENSE_METADATA_KEY = "license:metadata"
+LICENSE_CACHE_TTL_SECONDS = 86400  # 24 hours
+
+
+# -----------------------------------------------------------------------------
+# Database CRUD Operations
+# -----------------------------------------------------------------------------
+
+
+def get_license(db_session: Session) -> License | None:
+    """
+    Get the current license (singleton pattern - only one row).
+
+    Args:
+        db_session: Database session
+
+    Returns:
+        License object if exists, None otherwise
+    """
+    return db_session.execute(select(License)).scalars().first()
+
+
+def upsert_license(db_session: Session, license_data: str) -> License:
+    """
+    Insert or update the license (singleton pattern).
+
+    Args:
+        db_session: Database session
+        license_data: Base64-encoded signed license blob
+
+    Returns:
+        The created or updated License object
+    """
+    existing = get_license(db_session)
+
+    if existing:
+        existing.license_data = license_data
+        db_session.commit()
+        db_session.refresh(existing)
+        logger.info("License updated")
+        return existing
+
+    new_license = License(license_data=license_data)
+    db_session.add(new_license)
+    db_session.commit()
+    db_session.refresh(new_license)
+    logger.info("License created")
+    return new_license
+
+
+def delete_license(db_session: Session) -> bool:
+    """
+    Delete the current license.
+
+    Args:
+        db_session: Database session
+
+    Returns:
+        True if deleted, False if no license existed
+    """
+    existing = get_license(db_session)
+    if existing:
+        db_session.delete(existing)
+        db_session.commit()
+        logger.info("License deleted")
+        return True
+    return False
+
+
+# -----------------------------------------------------------------------------
+# Seat Counting
+# -----------------------------------------------------------------------------
+
+
+def get_used_seats(tenant_id: str | None = None) -> int:
+    """
+    Get current seat usage.
+
+    For multi-tenant: counts users in UserTenantMapping for this tenant.
+    For self-hosted: counts all active users (includes both Onyx UI users
+    and Slack users who have been converted to Onyx users).
+    """
+    if MULTI_TENANT:
+        from ee.onyx.server.tenants.user_mapping import get_tenant_count
+
+        return get_tenant_count(tenant_id or get_current_tenant_id())
+    else:
+        # Self-hosted: count all active users (Onyx + converted Slack users)
+        from onyx.db.engine.sql_engine import get_session_with_current_tenant
+
+        with get_session_with_current_tenant() as db_session:
+            result = db_session.execute(
+                select(func.count()).select_from(User).where(User.is_active)  # type: ignore
+            )
+            return result.scalar() or 0
+
+
+# -----------------------------------------------------------------------------
+# Redis Cache Operations
+# -----------------------------------------------------------------------------
+
+
+def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
+    """
+    Get license metadata from Redis cache.
+
+    Args:
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if cached, None otherwise
+    """
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_replica_client(tenant_id=tenant)
+
+    cached = redis_client.get(LICENSE_METADATA_KEY)
+    if cached:
+        try:
+            cached_str: str
+            if isinstance(cached, bytes):
+                cached_str = cached.decode("utf-8")
+            else:
+                cached_str = str(cached)
+            return LicenseMetadata.model_validate_json(cached_str)
+        except Exception as e:
+            logger.warning(f"Failed to parse cached license metadata: {e}")
+            return None
+    return None
+
+
+def invalidate_license_cache(tenant_id: str | None = None) -> None:
+    """
+    Invalidate the license metadata cache (not the license itself).
+
+    This deletes the cached LicenseMetadata from Redis. The actual license
+    in the database is not affected. Redis delete is idempotent - if the
+    key doesn't exist, this is a no-op.
+
+    Args:
+        tenant_id: Tenant ID (for multi-tenant deployments)
+    """
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_client(tenant_id=tenant)
+    redis_client.delete(LICENSE_METADATA_KEY)
+    logger.info("License cache invalidated")
+
+
+def update_license_cache(
+    payload: LicensePayload,
+    source: LicenseSource | None = None,
+    grace_period_end: datetime | None = None,
+    tenant_id: str | None = None,
+) -> LicenseMetadata:
+    """
+    Update the Redis cache with license metadata.
+
+    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
+    1. Frontend needs status to show appropriate UI/banners
+    2. Caching avoids repeated DB + crypto verification on every request
+    3. Status enforcement happens at the feature level, not here
+
+    Args:
+        payload: Verified license payload
+        source: How the license was obtained
+        grace_period_end: Optional grace period end time
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        The cached LicenseMetadata
+    """
+    from ee.onyx.utils.license import get_license_status
+
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_client(tenant_id=tenant)
+
+    used_seats = get_used_seats(tenant)
+    status = get_license_status(payload, grace_period_end)
+
+    metadata = LicenseMetadata(
+        tenant_id=payload.tenant_id,
+        organization_name=payload.organization_name,
+        seats=payload.seats,
+        used_seats=used_seats,
+        plan_type=payload.plan_type,
+        issued_at=payload.issued_at,
+        expires_at=payload.expires_at,
+        grace_period_end=grace_period_end,
+        status=status,
+        source=source,
+        stripe_subscription_id=payload.stripe_subscription_id,
+    )
+
+    redis_client.setex(
+        LICENSE_METADATA_KEY,
+        LICENSE_CACHE_TTL_SECONDS,
+        metadata.model_dump_json(),
+    )
+
+    logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
+    return metadata
+
+
+def refresh_license_cache(
+    db_session: Session,
+    tenant_id: str | None = None,
+) -> LicenseMetadata | None:
+    """
+    Refresh the license cache from the database.
+
+    Args:
+        db_session: Database session
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if license exists, None otherwise
+    """
+    from ee.onyx.utils.license import verify_license_signature
+
+    license_record = get_license(db_session)
+    if not license_record:
+        invalidate_license_cache(tenant_id)
+        return None
+
+    try:
+        payload = verify_license_signature(license_record.license_data)
+        return update_license_cache(
+            payload,
+            source=LicenseSource.AUTO_FETCH,
+            tenant_id=tenant_id,
+        )
+    except ValueError as e:
+        logger.error(f"Failed to verify license during cache refresh: {e}")
+        invalidate_license_cache(tenant_id)
+        return None
+
+
+def get_license_metadata(
+    db_session: Session,
+    tenant_id: str | None = None,
+) -> LicenseMetadata | None:
+    """
+    Get license metadata, using cache if available.
+
+    Args:
+        db_session: Database session
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if license exists, None otherwise
+    """
+    # Try cache first
+    cached = get_cached_license_metadata(tenant_id)
+    if cached:
+        return cached
+
+    # Refresh from database
+    return refresh_license_cache(db_session, tenant_id)
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -34,6 +34,7 @@ def make_persona_private(
                create_notification(
                    user_id=user_id,
                    notif_type=NotificationType.PERSONA_SHARED,
+                    title="A new agent was shared with you!",
                    db_session=db_session,
                    additional_data=PersonaSharedNotificationData(
                        persona_id=persona_id,
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -55,18 +55,7 @@ def get_empty_chat_messages_entries__paginated(

            # Get assistant name (from session persona, or alternate if specified)
            assistant_name = None
-            if message.alternate_assistant_id:
-                # If there's an alternate assistant, we need to fetch it
-                from onyx.db.models import Persona
-
-                alternate_persona = (
-                    db_session.query(Persona)
-                    .filter(Persona.id == message.alternate_assistant_id)
-                    .first()
-                )
-                if alternate_persona:
-                    assistant_name = alternate_persona.name
-            elif chat_session.persona:
+            if chat_session.persona:
                assistant_name = chat_session.persona.name

            message_skeletons.append(
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -8,6 +8,7 @@ from sqlalchemy import func
 from sqlalchemy import Select
 from sqlalchemy import select
 from sqlalchemy import update
+from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import Session

 from ee.onyx.server.user_group.models import SetCuratorRequest
@@ -362,14 +363,29 @@ def _check_user_group_is_modifiable(user_group: UserGroup) -> None:

 def _add_user__user_group_relationships__no_commit(
    db_session: Session, user_group_id: int, user_ids: list[UUID]
-) -> list[User__UserGroup]:
-    """NOTE: does not commit the transaction."""
-    relationships = [
-        User__UserGroup(user_id=user_id, user_group_id=user_group_id)
-        for user_id in user_ids
-    ]
-    db_session.add_all(relationships)
-    return relationships
+) -> None:
+    """NOTE: does not commit the transaction.
+
+    This function is idempotent - it will skip users who are already in the group
+    to avoid duplicate key violations during concurrent operations or re-syncs.
+    Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.
+    """
+    if not user_ids:
+        return
+
+    insert_stmt = (
+        insert(User__UserGroup)
+        .values(
+            [
+                {"user_id": user_id, "user_group_id": user_group_id}
+                for user_id in user_ids
+            ]
+        )
+        .on_conflict_do_nothing(
+            index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]
+        )
+    )
+    db_session.execute(insert_stmt)


 def _add_user_group__cc_pair_relationships__no_commit(
@@ -581,6 +597,48 @@ def update_user_curator_relationship(
    db_session.commit()


+def add_users_to_user_group(
+    db_session: Session,
+    user: User | None,
+    user_group_id: int,
+    user_ids: list[UUID],
+) -> UserGroup:
+    db_user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id)
+    if db_user_group is None:
+        raise ValueError(f"UserGroup with id '{user_group_id}' not found")
+
+    missing_users = [
+        user_id for user_id in user_ids if fetch_user_by_id(db_session, user_id) is None
+    ]
+    if missing_users:
+        raise ValueError(
+            f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
+        )
+
+    _check_user_group_is_modifiable(db_user_group)
+
+    current_user_ids = [user.id for user in db_user_group.users]
+    current_user_ids_set = set(current_user_ids)
+    new_user_ids = [
+        user_id for user_id in user_ids if user_id not in current_user_ids_set
+    ]
+
+    if not new_user_ids:
+        return db_user_group
+
+    user_group_update = UserGroupUpdate(
+        user_ids=current_user_ids + new_user_ids,
+        cc_pair_ids=[cc_pair.id for cc_pair in db_user_group.cc_pairs],
+    )
+
+    return update_user_group(
+        db_session=db_session,
+        user=user,
+        user_group_id=user_group_id,
+        user_group_update=user_group_update,
+    )
+
+
 def update_user_group(
    db_session: Session,
    user: User | None,
@@ -603,6 +661,17 @@ def update_user_group(
    added_user_ids = list(updated_user_ids - current_user_ids)
    removed_user_ids = list(current_user_ids - updated_user_ids)

+    if added_user_ids:
+        missing_users = [
+            user_id
+            for user_id in added_user_ids
+            if fetch_user_by_id(db_session, user_id) is None
+        ]
+        if missing_users:
+            raise ValueError(
+                f"User(s) not found: {', '.join(str(user_id) for user_id in missing_users)}"
+            )
+
    # LEAVING THIS HERE FOR NOW FOR GIVING DIFFERENT ROLES
    # ACCESS TO DIFFERENT PERMISSIONS
    # if (removed_user_ids or added_user_ids) and (
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -3,12 +3,15 @@ from collections.abc import Generator
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
 from onyx.background.error_logging import emit_background_error
+from onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC
 from onyx.connectors.confluence.onyx_confluence import (
    get_user_email_from_username__server,
 )
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.models import ConnectorCredentialPair
+from onyx.db.users import get_all_users
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -19,7 +22,7 @@ def _build_group_member_email_map(
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user}")
+        logger.info(f"Processing groups for user: {user}")

        email = user.email
        if not email:
@@ -31,6 +34,8 @@ def _build_group_member_email_map(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
+            else:
+                logger.error(f"user result missing username field: {user}")

        if not email:
            # If we still don't have an email, skip this user
@@ -64,6 +69,92 @@ def _build_group_member_email_map(
    return group_member_emails


+def _build_group_member_email_map_from_onyx_users(
+    confluence_client: OnyxConfluence,
+) -> dict[str, set[str]]:
+    """Hacky, but it's the only way to do this as long as the
+    Confluence APIs are broken.
+
+    This is fixed in Confluence Data Center 10.1.0, so first choice
+    is to tell users to upgrade to 10.1.0.
+    https://jira.atlassian.com/browse/CONFSERVER-95999
+    """
+    with get_session_with_current_tenant() as db_session:
+        # don't include external since they are handled by the "through confluence"
+        # user fetching mechanism
+        user_emails = [
+            user.email for user in get_all_users(db_session, include_external=False)
+        ]
+
+    def _infer_username_from_email(email: str) -> str:
+        return email.split("@")[0]
+
+    group_member_emails: dict[str, set[str]] = {}
+    for email in user_emails:
+        logger.info(f"Processing groups for user with email: {email}")
+        try:
+            user_name = _infer_username_from_email(email)
+            response = confluence_client.get_user_details_by_username(user_name)
+            user_key = response.get("userKey")
+            if not user_key:
+                logger.error(f"User key not found for user with email {email}")
+                continue
+
+            all_users_groups: set[str] = set()
+            for group in confluence_client.paginated_groups_by_user_retrieval(user_key):
+                # group name uniqueness is enforced by Confluence, so we can use it as a group ID
+                group_id = group["name"]
+                group_member_emails.setdefault(group_id, set()).add(email)
+                all_users_groups.add(group_id)
+
+            if not all_users_groups:
+                msg = f"No groups found for user with email: {email}"
+                logger.error(msg)
+            else:
+                logger.info(
+                    f"Found groups {all_users_groups} for user with email {email}"
+                )
+        except Exception:
+            logger.exception(f"Error getting user details for user with email {email}")
+
+    return group_member_emails
+
+
+def _build_final_group_to_member_email_map(
+    confluence_client: OnyxConfluence,
+    cc_pair_id: int,
+    # if set, will infer confluence usernames from onyx users in addition to using the
+    # confluence users API. This is a hacky workaround for the fact that the Confluence
+    # users API is broken before Confluence Data Center 10.1.0.
+    use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,
+) -> dict[str, set[str]]:
+    group_to_member_email_map = _build_group_member_email_map(
+        confluence_client=confluence_client,
+        cc_pair_id=cc_pair_id,
+    )
+    group_to_member_email_map_from_onyx_users = (
+        (
+            _build_group_member_email_map_from_onyx_users(
+                confluence_client=confluence_client,
+            )
+        )
+        if use_onyx_users
+        else {}
+    )
+
+    all_group_ids = set(group_to_member_email_map.keys()) | set(
+        group_to_member_email_map_from_onyx_users.keys()
+    )
+    final_group_to_member_email_map = {}
+    for group_id in all_group_ids:
+        group_member_emails = group_to_member_email_map.get(
+            group_id, set()
+        ) | group_to_member_email_map_from_onyx_users.get(group_id, set())
+        final_group_to_member_email_map[group_id] = group_member_emails
+
+    return final_group_to_member_email_map
+
+
 def confluence_group_sync(
    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
@@ -87,13 +178,12 @@ def confluence_group_sync(
    confluence_client._probe_connection(**probe_kwargs)
    confluence_client._initialize_connection(**final_kwargs)

-    group_member_email_map = _build_group_member_email_map(
-        confluence_client=confluence_client,
-        cc_pair_id=cc_pair.id,
+    group_to_member_email_map = _build_final_group_to_member_email_map(
+        confluence_client, cc_pair.id
    )

    all_found_emails = set()
-    for group_id, group_member_emails in group_member_email_map.items():
+    for group_id, group_member_emails in group_to_member_email_map.items():
        yield (
            ExternalUserGroup(
                id=group_id,
--- a/backend/ee/onyx/external_permissions/jira/group_sync.py
+++ b/backend/ee/onyx/external_permissions/jira/group_sync.py
@@ -0,0 +1,136 @@
+from collections.abc import Generator
+
+from jira import JIRA
+
+from ee.onyx.db.external_perm import ExternalUserGroup
+from onyx.connectors.jira.utils import build_jira_client
+from onyx.db.models import ConnectorCredentialPair
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def _get_jira_group_members_email(
+    jira_client: JIRA,
+    group_name: str,
+) -> list[str]:
+    """Get all member emails for a Jira group.
+
+    Filters out app accounts (bots, integrations) and only returns real user emails.
+    """
+    emails: list[str] = []
+
+    try:
+        # group_members returns an OrderedDict of account_id -> member_info
+        members = jira_client.group_members(group=group_name)
+
+        if not members:
+            logger.warning(f"No members found for group {group_name}")
+            return emails
+
+        for account_id, member_info in members.items():
+            # member_info is a dict with keys like 'fullname', 'email', 'active'
+            email = member_info.get("email")
+
+            # Skip "hidden" emails - these are typically app accounts
+            if email and email != "hidden":
+                emails.append(email)
+            else:
+                # For cloud, we might need to fetch user details separately
+                try:
+                    user = jira_client.user(id=account_id)
+
+                    # Skip app accounts (bots, integrations, etc.)
+                    if hasattr(user, "accountType") and user.accountType == "app":
+                        logger.info(
+                            f"Skipping app account {account_id} for group {group_name}"
+                        )
+                        continue
+
+                    if hasattr(user, "emailAddress") and user.emailAddress:
+                        emails.append(user.emailAddress)
+                    else:
+                        logger.warning(f"User {account_id} has no email address")
+                except Exception as e:
+                    logger.warning(
+                        f"Could not fetch email for user {account_id} in group {group_name}: {e}"
+                    )
+
+    except Exception as e:
+        logger.error(f"Error fetching members for group {group_name}: {e}")
+
+    return emails
+
+
+def _build_group_member_email_map(
+    jira_client: JIRA,
+) -> dict[str, set[str]]:
+    """Build a map of group names to member emails."""
+    group_member_emails: dict[str, set[str]] = {}
+
+    try:
+        # Get all groups from Jira - returns a list of group name strings
+        group_names = jira_client.groups()
+
+        if not group_names:
+            logger.warning("No groups found in Jira")
+            return group_member_emails
+
+        logger.info(f"Found {len(group_names)} groups in Jira")
+
+        for group_name in group_names:
+            if not group_name:
+                continue
+
+            member_emails = _get_jira_group_members_email(
+                jira_client=jira_client,
+                group_name=group_name,
+            )
+
+            if member_emails:
+                group_member_emails[group_name] = set(member_emails)
+                logger.debug(
+                    f"Found {len(member_emails)} members for group {group_name}"
+                )
+            else:
+                logger.debug(f"No members found for group {group_name}")
+
+    except Exception as e:
+        logger.error(f"Error building group member email map: {e}")
+
+    return group_member_emails
+
+
+def jira_group_sync(
+    tenant_id: str,
+    cc_pair: ConnectorCredentialPair,
+) -> Generator[ExternalUserGroup, None, None]:
+    """
+    Sync Jira groups and their members.
+
+    This function fetches all groups from Jira and yields ExternalUserGroup
+    objects containing the group ID and member emails.
+    """
+    jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
+    scoped_token = cc_pair.connector.connector_specific_config.get(
+        "scoped_token", False
+    )
+
+    if not jira_base_url:
+        raise ValueError("No jira_base_url found in connector config")
+
+    jira_client = build_jira_client(
+        credentials=cc_pair.credential.credential_json,
+        jira_base=jira_base_url,
+        scoped_token=scoped_token,
+    )
+
+    group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
+    if not group_member_email_map:
+        raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
+
+    for group_id, group_member_emails in group_member_email_map.items():
+        yield ExternalUserGroup(
+            id=group_id,
+            user_emails=list(group_member_emails),
+        )
--- a/backend/ee/onyx/external_permissions/jira/page_access.py
+++ b/backend/ee/onyx/external_permissions/jira/page_access.py
@@ -16,6 +16,10 @@ HolderMap = dict[str, list[Holder]]
 logger = setup_logger()


+def _get_role_id(holder: Holder) -> str | None:
+    return holder.get("value") or holder.get("parameter")
+
+
 def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
    """
    A "Holder" in JIRA is a person / entity who "holds" the corresponding permission.
@@ -110,80 +114,137 @@ def _get_user_emails(user_holders: list[Holder]) -> list[str]:
    return emails


-def _get_user_emails_from_project_roles(
+def _get_user_emails_and_groups_from_project_roles(
    jira_client: JIRA,
    jira_project: str,
    project_role_holders: list[Holder],
-) -> list[str]:
-    # NOTE (@raunakab) a `parallel_yield` may be helpful here...?
+) -> tuple[list[str], list[str]]:
+    """
+    Get user emails and group names from project roles.
+    Returns a tuple of (emails, group_names).
+    """
+    # Get role IDs - Cloud uses "value", Data Center uses "parameter"
+    role_ids = []
+    for holder in project_role_holders:
+        role_id = _get_role_id(holder)
+        if role_id:
+            role_ids.append(role_id)
+        else:
+            logger.warning(f"No value or parameter in projectRole holder: {holder}")
+
    roles = [
-        jira_client.project_role(project=jira_project, id=project_role_holder["value"])
-        for project_role_holder in project_role_holders
-        if "value" in project_role_holder
+        jira_client.project_role(project=jira_project, id=role_id)
+        for role_id in role_ids
    ]

    emails = []
+    groups = []

    for role in roles:
        if not hasattr(role, "actors"):
+            logger.warning(f"Project role {role} has no actors attribute")
            continue

        for actor in role.actors:
-            if not hasattr(actor, "actorUser") or not hasattr(
-                actor.actorUser, "accountId"
-            ):
+            # Handle group actors
+            if hasattr(actor, "actorGroup"):
+                group_name = getattr(actor.actorGroup, "name", None) or getattr(
+                    actor.actorGroup, "displayName", None
+                )
+                if group_name:
+                    groups.append(group_name)
                continue

-            user = jira_client.user(id=actor.actorUser.accountId)
-            if not hasattr(user, "accountType") or user.accountType != "atlassian":
+            # Handle user actors
+            if hasattr(actor, "actorUser"):
+                account_id = getattr(actor.actorUser, "accountId", None)
+                if not account_id:
+                    logger.error(f"No accountId in actorUser: {actor.actorUser}")
+                    continue
+
+                user = jira_client.user(id=account_id)
+                if not hasattr(user, "accountType") or user.accountType != "atlassian":
+                    logger.info(
+                        f"Skipping user {account_id} because it is not an atlassian user"
+                    )
+                    continue
+
+                if not hasattr(user, "emailAddress"):
+                    msg = f"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}"
+                    if hasattr(user, "displayName"):
+                        msg += f" {actor.displayName=}"
+                    logger.warning(msg)
+                    continue
+
+                emails.append(user.emailAddress)
                continue

-            if not hasattr(user, "emailAddress"):
-                msg = f"User's email address was not able to be retrieved;  {actor.actorUser.accountId=}"
-                if hasattr(user, "displayName"):
-                    msg += f" {actor.displayName=}"
-                logger.warn(msg)
-                continue
+            logger.debug(f"Skipping actor type: {actor}")

-            emails.append(user.emailAddress)
-
-    return emails
+    return emails, groups


 def _build_external_access_from_holder_map(
    jira_client: JIRA, jira_project: str, holder_map: HolderMap
 ) -> ExternalAccess:
    """
-    # Note:
-        If the `holder_map` contains an instance of "anyone", then this is a public JIRA project.
-        Otherwise, we fetch the "projectRole"s (i.e., the user-groups in JIRA speak), and the user emails.
-    """
+    Build ExternalAccess from the holder map.

+    Holder types handled:
+        - "anyone": Public project, anyone can access
+        - "applicationRole": All users with a Jira license can access (treated as public)
+        - "user": Specific users with access
+        - "projectRole": Project roles containing users and/or groups
+        - "group": Groups directly assigned in the permission scheme
+    """
+    # Public access - anyone can view
    if "anyone" in holder_map:
        return ExternalAccess(
            external_user_emails=set(), external_user_group_ids=set(), is_public=True
        )

+    # applicationRole means all users with a Jira license can access - treat as public
+    if "applicationRole" in holder_map:
+        return ExternalAccess(
+            external_user_emails=set(), external_user_group_ids=set(), is_public=True
+        )
+
+    # Get emails from explicit user holders
    user_emails = (
        _get_user_emails(user_holders=holder_map["user"])
        if "user" in holder_map
        else []
    )
-    project_role_user_emails = (
-        _get_user_emails_from_project_roles(
-            jira_client=jira_client,
-            jira_project=jira_project,
-            project_role_holders=holder_map["projectRole"],
+
+    # Get emails and groups from project roles
+    project_role_user_emails: list[str] = []
+    project_role_groups: list[str] = []
+    if "projectRole" in holder_map:
+        project_role_user_emails, project_role_groups = (
+            _get_user_emails_and_groups_from_project_roles(
+                jira_client=jira_client,
+                jira_project=jira_project,
+                project_role_holders=holder_map["projectRole"],
+            )
        )
-        if "projectRole" in holder_map
-        else []
-    )
+
+    # Get groups directly assigned in permission scheme (common in Data Center)
+    # Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}
+    direct_groups: list[str] = []
+    if "group" in holder_map:
+        for group_holder in holder_map["group"]:
+            group_name = _get_role_id(group_holder)
+            if group_name:
+                direct_groups.append(group_name)
+            else:
+                logger.error(f"No parameter/value in group holder: {group_holder}")

    external_user_emails = set(user_emails + project_role_user_emails)
+    external_user_group_ids = set(project_role_groups + direct_groups)

    return ExternalAccess(
        external_user_emails=external_user_emails,
-        external_user_group_ids=set(),
+        external_user_group_ids=external_user_group_ids,
        is_public=False,
    )

@@ -197,9 +258,11 @@ def get_project_permissions(
    )

    if not hasattr(project_permissions, "permissions"):
+        logger.error(f"Project {jira_project} has no permissions attribute")
        return None

    if not isinstance(project_permissions.permissions, list):
+        logger.error(f"Project {jira_project} permissions is not a list")
        return None

    holder_map = _build_holder_map(permissions=project_permissions.permissions)
--- a/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
+++ b/backend/ee/onyx/external_permissions/sharepoint/permission_utils.py
@@ -15,6 +15,7 @@ from ee.onyx.db.external_perm import ExternalUserGroup
 from onyx.access.models import ExternalAccess
 from onyx.access.utils import build_ext_group_name_for_onyx
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE
 from onyx.connectors.sharepoint.connector import sleep_and_retry
 from onyx.utils.logger import setup_logger

@@ -511,8 +512,8 @@ def get_external_access_from_sharepoint(
                f"Failed to get SharePoint list item ID for item {drive_item.id}"
            )

-        if drive_name == "Shared Documents":
-            drive_name = "Documents"
+        if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:
+            drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]

        item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(
            item_id
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -11,6 +11,7 @@ from ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY
+from ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY
 from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
@@ -23,6 +24,7 @@ from ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync
 from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
 from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
 from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
+from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
 from ee.onyx.external_permissions.perm_sync_types import CensoringFuncType
 from ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType
 from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
@@ -110,6 +112,11 @@ _SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {
            doc_sync_func=jira_doc_sync,
            initial_index_should_sync=True,
        ),
+        group_sync_config=GroupSyncConfig(
+            group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,
+            group_sync_func=jira_group_sync,
+            group_sync_is_cc_pair_agnostic=True,
+        ),
    ),
    # Groups are not needed for Slack.
    # All channel access is done at the individual user level.
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -14,6 +14,7 @@ from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
 from ee.onyx.server.evals.api import router as evals_router
+from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
@@ -23,7 +24,7 @@ from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
 from ee.onyx.server.query_and_chat.query_backend import (
-    basic_router as query_router,
+    basic_router as ee_query_router,
 )
 from ee.onyx.server.query_history.api import router as query_history_router
 from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
@@ -48,6 +49,9 @@ from onyx.main import include_auth_router_with_prefix
 from onyx.main import include_router_with_global_prefix_prepended
 from onyx.main import lifespan as lifespan_base
 from onyx.main import use_route_function_names_as_operation_ids
+from onyx.server.query_and_chat.query_backend import (
+    basic_router as query_router,
+)
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import MULTI_TENANT
@@ -119,6 +123,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_history_router)
    # EE only backend APIs
    include_router_with_global_prefix_prepended(application, query_router)
+    include_router_with_global_prefix_prepended(application, ee_query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
@@ -135,6 +140,8 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, enterprise_settings_router)
    include_router_with_global_prefix_prepended(application, usage_export_router)
+    # License management
+    include_router_with_global_prefix_prepended(application, license_router)

    if MULTI_TENANT:
        # Tenant management
--- a/backend/ee/onyx/server/analytics/api.py
+++ b/backend/ee/onyx/server/analytics/api.py
@@ -21,8 +21,9 @@ from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
+from onyx.server.utils import PUBLIC_API_TAGS

-router = APIRouter(prefix="/analytics")
+router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)


 _DEFAULT_LOOKBACK_DAYS = 30
--- a/Show More
+++ b/Show More