Compare commits

..

6 Commits

Author SHA1 Message Date
Raunak Bhagat
9e9c3ec0b9 Remove unused imports 2025-11-18 13:51:10 -08:00
Raunak Bhagat
1457ca2a20 Make share button instantaneous 2025-11-18 13:50:37 -08:00
Raunak Bhagat
edc390edc6 Implement AppPage wrapper for all other pages inside of /chat 2025-11-18 13:34:38 -08:00
Raunak Bhagat
022624cb5a Maintain consistent heights 2025-11-18 13:20:09 -08:00
Raunak Bhagat
f301257130 Make chatSession info and settings info be passed in as server-side data 2025-11-18 13:07:52 -08:00
Raunak Bhagat
9eecc71cda Fix flashing 2025-11-18 11:43:49 -08:00
2459 changed files with 101492 additions and 269986 deletions

View File

@@ -1,8 +0,0 @@
# Exclude these commits from git blame (e.g. mass reformatting).
# These are ignored by GitHub automatically.
# To enable this locally, run:
#
# git config blame.ignoreRevsFile .git-blame-ignore-revs
3134e5f840c12c8f32613ce520101a047c89dcc2 # refactor(whitespace): rm temporary react fragments (#7161)
ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4 # refactor(whitespace): rm react fragment #7190

7
.github/CODEOWNERS vendored
View File

@@ -1,10 +1,3 @@
* @onyx-dot-app/onyx-core-team
# Helm charts Owners
/helm/ @justin-tahara
# Web standards updates
/web/STANDARDS.md @raunakab @Weves
# Agent context files
/CLAUDE.md.template @Weves
/AGENTS.md.template @Weves

View File

@@ -17,7 +17,6 @@ self-hosted-runner:
- runner=16cpu-linux-x64
- ubuntu-slim # Currently in public preview
- volume=40gb
- volume=50gb
# Configuration variables in array of strings defined in your repository or
# organization. `null` means disabling configuration variables check.

View File

@@ -0,0 +1,135 @@
name: 'Build and Push Docker Image with Retry'
description: 'Attempts to build and push a Docker image, with a retry on failure'
inputs:
context:
description: 'Build context'
required: true
file:
description: 'Dockerfile location'
required: true
platforms:
description: 'Target platforms'
required: true
pull:
description: 'Always attempt to pull a newer version of the image'
required: false
default: 'true'
push:
description: 'Push the image to registry'
required: false
default: 'true'
load:
description: 'Load the image into Docker daemon'
required: false
default: 'true'
tags:
description: 'Image tags'
required: true
no-cache:
description: 'Read from cache'
required: false
default: 'false'
cache-from:
description: 'Cache sources'
required: false
cache-to:
description: 'Cache destinations'
required: false
outputs:
description: 'Output destinations'
required: false
provenance:
description: 'Generate provenance attestation'
required: false
default: 'false'
build-args:
description: 'Build arguments'
required: false
retry-wait-time:
description: 'Time to wait before attempt 2 in seconds'
required: false
default: '60'
retry-wait-time-2:
description: 'Time to wait before attempt 3 in seconds'
required: false
default: '120'
runs:
using: "composite"
steps:
- name: Build and push Docker image (Attempt 1 of 3)
id: buildx1
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
continue-on-error: true
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
outputs: ${{ inputs.outputs }}
provenance: ${{ inputs.provenance }}
build-args: ${{ inputs.build-args }}
- name: Wait before attempt 2
if: steps.buildx1.outcome != 'success'
run: |
echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..."
sleep ${{ inputs.retry-wait-time }}
shell: bash
- name: Build and push Docker image (Attempt 2 of 3)
id: buildx2
if: steps.buildx1.outcome != 'success'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
outputs: ${{ inputs.outputs }}
provenance: ${{ inputs.provenance }}
build-args: ${{ inputs.build-args }}
- name: Wait before attempt 3
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
run: |
echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..."
sleep ${{ inputs.retry-wait-time-2 }}
shell: bash
- name: Build and push Docker image (Attempt 3 of 3)
id: buildx3
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # ratchet:docker/build-push-action@v6
with:
context: ${{ inputs.context }}
file: ${{ inputs.file }}
platforms: ${{ inputs.platforms }}
pull: ${{ inputs.pull }}
push: ${{ inputs.push }}
load: ${{ inputs.load }}
tags: ${{ inputs.tags }}
no-cache: ${{ inputs.no-cache }}
cache-from: ${{ inputs.cache-from }}
cache-to: ${{ inputs.cache-to }}
outputs: ${{ inputs.outputs }}
provenance: ${{ inputs.provenance }}
build-args: ${{ inputs.build-args }}
- name: Report failure
if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
run: |
echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details."
shell: bash

View File

@@ -0,0 +1,42 @@
name: "Prepare Build (OpenAPI generation)"
description: "Sets up Python with uv, installs deps, generates OpenAPI schema and Python client, uploads artifact"
inputs:
docker-username:
required: true
docker-password:
required: true
runs:
using: "composite"
steps:
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
- name: Generate OpenAPI schema
shell: bash
working-directory: backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ inputs['docker-username'] }}
password: ${{ inputs['docker-password'] }}
- name: Generate OpenAPI Python client
shell: bash
run: |
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client \
--skip-validate-spec \
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"

View File

@@ -7,9 +7,9 @@ runs:
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
key: ${{ runner.os }}-playwright-${{ hashFiles('backend/requirements/default.txt') }}
restore-keys: |
${{ runner.os }}-${{ runner.arch }}-playwright-
${{ runner.os }}-playwright-
- name: Install playwright
shell: bash

View File

@@ -1,45 +1,22 @@
name: "Setup Python and Install Dependencies"
description: "Sets up Python with uv and installs deps"
inputs:
requirements:
description: "Newline-separated list of requirement files to install (relative to repo root)"
required: true
runs:
using: "composite"
steps:
- name: Compute requirements hash
id: req-hash
shell: bash
env:
REQUIREMENTS: ${{ inputs.requirements }}
run: |
# Hash the contents of the specified requirement files
hash=""
while IFS= read -r req; do
if [ -n "$req" ] && [ -f "$req" ]; then
hash="$hash$(sha256sum "$req")"
fi
done <<< "$REQUIREMENTS"
echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
- name: Setup uv
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
# NOTE: This comes before Setup uv since clean-ups run in reverse chronological order
# such that Setup uv's prune-cache is able to prune the cache before we upload.
- name: Cache uv cache directory
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-${{ steps.req-hash.outputs.hash }}
key: ${{ runner.os }}-uv-${{ hashFiles('backend/requirements/*.txt', 'backend/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-uv-
- name: Setup uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
with:
@@ -47,30 +24,15 @@ runs:
- name: Create virtual environment
shell: bash
env:
VENV_DIR: ${{ runner.temp }}/venv
run: | # zizmor: ignore[github-env]
uv venv "$VENV_DIR"
# Validate path before adding to GITHUB_PATH to prevent code injection
if [ -d "$VENV_DIR/bin" ]; then
realpath "$VENV_DIR/bin" >> "$GITHUB_PATH"
else
echo "Error: $VENV_DIR/bin does not exist"
exit 1
fi
run: |
uv venv ${{ runner.temp }}/venv
echo "VENV_PATH=${{ runner.temp }}/venv" >> $GITHUB_ENV
echo "${{ runner.temp }}/venv/bin" >> $GITHUB_PATH
- name: Install Python dependencies with uv
shell: bash
env:
REQUIREMENTS: ${{ inputs.requirements }}
run: |
# Build the uv pip install command with each requirement file as array elements
cmd=("uv" "pip" "install")
while IFS= read -r req; do
# Skip empty lines
if [ -n "$req" ]; then
cmd+=("-r" "$req")
fi
done <<< "$REQUIREMENTS"
echo "Running: ${cmd[*]}"
"${cmd[@]}"
uv pip install \
-r backend/requirements/default.txt \
-r backend/requirements/dev.txt \
-r backend/requirements/model_server.txt

View File

@@ -21,27 +21,26 @@ runs:
shell: bash
env:
SLACK_WEBHOOK_URL: ${{ inputs.webhook-url }}
FAILED_JOBS: ${{ inputs.failed-jobs }}
TITLE: ${{ inputs.title }}
REF_NAME: ${{ inputs.ref-name }}
REPO: ${{ github.repository }}
WORKFLOW: ${{ github.workflow }}
RUN_NUMBER: ${{ github.run_number }}
RUN_ID: ${{ github.run_id }}
SERVER_URL: ${{ github.server_url }}
GITHUB_REF_NAME: ${{ github.ref_name }}
run: |
if [ -z "$SLACK_WEBHOOK_URL" ]; then
echo "webhook-url input or SLACK_WEBHOOK_URL env var is not set, skipping notification"
exit 0
fi
# Build workflow URL
# Get inputs with defaults
FAILED_JOBS="${{ inputs.failed-jobs }}"
TITLE="${{ inputs.title }}"
REF_NAME="${{ inputs.ref-name }}"
REPO="${{ github.repository }}"
WORKFLOW="${{ github.workflow }}"
RUN_NUMBER="${{ github.run_number }}"
RUN_ID="${{ github.run_id }}"
SERVER_URL="${{ github.server_url }}"
WORKFLOW_URL="${SERVER_URL}/${REPO}/actions/runs/${RUN_ID}"
# Use ref_name from input or fall back to github.ref_name
if [ -z "$REF_NAME" ]; then
REF_NAME="$GITHUB_REF_NAME"
REF_NAME="${{ github.ref_name }}"
fi
# Escape JSON special characters

View File

@@ -4,8 +4,6 @@ updates:
directory: "/"
schedule:
interval: "weekly"
cooldown:
default-days: 7
open-pull-requests-limit: 3
assignees:
- "jmelahman"
@@ -15,8 +13,6 @@ updates:
directory: "/backend"
schedule:
interval: "weekly"
cooldown:
default-days: 7
open-pull-requests-limit: 3
assignees:
- "jmelahman"

View File

@@ -1,10 +1,10 @@
## Description
<!--- Provide a brief description of the changes in this PR --->
[Provide a brief description of the changes in this PR]
## How Has This Been Tested?
<!--- Describe the tests you ran to verify your changes --->
[Describe the tests you ran to verify your changes]
## Additional Options

View File

@@ -0,0 +1,27 @@
name: Check Lazy Imports
concurrency:
group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- 'release/**'
jobs:
check-lazy-imports:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
- name: Check lazy imports
run: python3 backend/scripts/check_lazy_imports.py

File diff suppressed because it is too large Load Diff

View File

@@ -10,18 +10,14 @@ on:
description: "The version (ie v1.0.0-beta.0) to tag as beta"
required: true
permissions:
contents: read
jobs:
tag:
# See https://runs-on.com/runners/linux/
# use a lower powered instance since this just does i/o to docker hub
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -33,19 +29,13 @@ jobs:
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
- name: Pull, Tag and Push Web Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-web-server:beta onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
- name: Pull, Tag and Push API Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-backend:beta onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
- name: Pull, Tag and Push Model Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-model-server:beta onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}

View File

@@ -10,18 +10,14 @@ on:
description: "The version (ie v0.0.1) to tag as latest"
required: true
permissions:
contents: read
jobs:
tag:
# See https://runs-on.com/runners/linux/
# use a lower powered instance since this just does i/o to docker hub
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-tag"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -33,19 +29,13 @@ jobs:
run: echo "DOCKER_CLI_EXPERIMENTAL=enabled" >> $GITHUB_ENV
- name: Pull, Tag and Push Web Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:${{ github.event.inputs.version }}
- name: Pull, Tag and Push API Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-backend:latest onyxdotapp/onyx-backend:${{ github.event.inputs.version }}
- name: Pull, Tag and Push Model Server Image
env:
VERSION: ${{ github.event.inputs.version }}
run: |
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${VERSION}
docker buildx imagetools create -t onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:${{ github.event.inputs.version }}

View File

@@ -12,13 +12,11 @@ jobs:
permissions:
contents: write
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Install Helm CLI
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4
@@ -29,11 +27,9 @@ jobs:
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Build chart dependencies

View File

@@ -1,31 +0,0 @@
name: Merge Group-Specific
on:
merge_group:
permissions:
contents: read
jobs:
# This job immediately succeeds to satisfy branch protection rules on merge_group events.
# There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
# integration tests. That job runs on both pull_request and merge_group events, and this job
# exists solely to provide a fast-passing check with the same name for branch protection.
# The actual tests remain enforced on presubmit (pull_request events).
required:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Success
run: echo "Success"
# This job immediately succeeds to satisfy branch protection rules on merge_group events.
# There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
# the actual playwright tests. That job runs on both pull_request and merge_group events, and
# this job exists solely to provide a fast-passing check with the same name for branch protection.
# The actual tests remain enforced on presubmit (pull_request events).
playwright-required:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Success
run: echo "Success"

View File

@@ -11,9 +11,8 @@ permissions:
jobs:
stale:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # ratchet:actions/stale@v9
with:
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'

View File

@@ -15,25 +15,19 @@ on:
permissions:
actions: read
contents: read
security-events: write
jobs:
scan-licenses:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-licenses"]
timeout-minutes: 45
permissions:
actions: read
contents: read
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
cache: 'pip'
@@ -60,9 +54,7 @@ jobs:
- name: Print report
if: always()
env:
REPORT: ${{ steps.license_check_report.outputs.report }}
run: echo "$REPORT"
run: echo "${{ steps.license_check_report.outputs.report }}"
- name: Install npm dependencies
working-directory: ./web
@@ -90,11 +82,10 @@ jobs:
scan-trivy:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}-scan-trivy"]
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -1,62 +0,0 @@
name: Database Tests
concurrency:
group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
database-tests:
runs-on:
- runs-on
- runner=2cpu-linux-arm64
- "run-id=${{ github.run_id }}-database-tests"
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Generate OpenAPI schema and Python client
shell: bash
run: |
ods openapi all
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers
working-directory: ./deployment/docker_compose
run: |
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
relational_db
- name: Run Database Tests
working-directory: ./backend
run: pytest -m alembic tests/integration/tests/migrations/

View File

@@ -7,22 +7,14 @@ on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# AWS credentials for S3-specific test
S3_AWS_ACCESS_KEY_ID_FOR_TEST: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY_FOR_TEST: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
# AWS
S3_AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
# MinIO
S3_ENDPOINT_URL: "http://localhost:9004"
S3_AWS_ACCESS_KEY_ID: "minioadmin"
S3_AWS_SECRET_ACCESS_KEY: "minioadmin"
# Confluence
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
@@ -32,34 +24,19 @@ env:
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
# LLMs
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}
# Code Interpreter
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
# OpenSearch
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Discover test directories
id: set-matrix
@@ -76,7 +53,6 @@ jobs:
- runner=2cpu-linux-arm64
- ${{ format('run-id={0}-external-dependency-unit-tests-job-{1}', github.run_id, strategy['job-index']) }}
- extras=s3-cache
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
@@ -85,23 +61,15 @@ jobs:
env:
PYTHONPATH: ./backend
MODEL_SERVER_HOST: "disabled"
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/ee.txt
- name: Setup Playwright
uses: ./.github/actions/setup-playwright
@@ -115,27 +83,10 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Create .env file for Docker Compose
run: |
cat <<EOF > deployment/docker_compose/.env
CODE_INTERPRETER_BETA_ENABLED=true
DISABLE_TELEMETRY=true
EOF
- name: Set up Standard Dependencies
run: |
cd deployment/docker_compose
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
-f docker-compose.opensearch.yml \
up -d \
minio \
relational_db \
cache \
index \
opensearch \
code-interpreter
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
- name: Run migrations
run: |
@@ -146,39 +97,10 @@ jobs:
- name: Run Tests for ${{ matrix.test-dir }}
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
env:
TEST_DIR: ${{ matrix.test-dir }}
run: |
py.test \
--durations=8 \
-o junit_family=xunit2 \
-xv \
--ff \
backend/tests/external_dependency_unit/${TEST_DIR}
- name: Collect Docker logs on failure
if: failure()
run: |
mkdir -p docker-logs
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
# Collect logs from each container
for container in $containers; do
container_name=$(docker inspect --format='{{.Name}}' $container | sed 's/^\///')
echo "Collecting logs from $container_name..."
docker logs $container > ../../docker-logs/${container_name}.log 2>&1
done
cd ../..
echo "Docker logs collected in docker-logs directory"
- name: Upload Docker logs
if: failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-logs-${{ matrix.test-dir }}
path: docker-logs/
retention-days: 7
backend/tests/external_dependency_unit/${{ matrix.test-dir }}

View File

@@ -6,253 +6,226 @@ concurrency:
on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
workflow_dispatch: # Allows manual triggering
permissions:
contents: read
branches: [ main ]
workflow_dispatch: # Allows manual triggering
jobs:
helm-chart-check:
# See https://runs-on.com/runners/linux/
runs-on:
[
runs-on,
runner=8cpu-linux-x64,
hdd=256,
"run-id=${{ github.run_id }}-helm-chart-check",
]
timeout-minutes: 45
runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
with:
version: v3.19.0
- name: Set up Helm
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
with:
version: v3.19.0
- name: Set up chart-testing
# NOTE: This is Jamison's patch from https://github.com/helm/chart-testing-action/pull/194
uses: helm/chart-testing-action@8958a6ac472cbd8ee9a8fbb6f1acbc1b0e966e44 # zizmor: ignore[impostor-commit]
with:
uv_version: "0.9.9"
- name: Set up chart-testing
uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0
# even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
- name: Run chart-testing (list-changed)
id: list-changed
env:
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
run: |
echo "default_branch: ${DEFAULT_BRANCH}"
changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
echo "list-changed output: $changed"
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
- name: Run chart-testing (list-changed)
id: list-changed
run: |
echo "default_branch: ${{ github.event.repository.default_branch }}"
changed=$(ct list-changed --remote origin --target-branch ${{ github.event.repository.default_branch }} --chart-dirs deployment/helm/charts)
echo "list-changed output: $changed"
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# uncomment to force run chart-testing
# - name: Force run chart-testing (list-changed)
# id: list-changed
# run: echo "changed=true" >> $GITHUB_OUTPUT
# lint all charts if any changes were detected
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'
run: ct lint --config ct.yaml --all
# the following would lint only changed charts, but linting isn't expensive
# run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
# uncomment to force run chart-testing
# - name: Force run chart-testing (list-changed)
# id: list-changed
# run: echo "changed=true" >> $GITHUB_OUTPUT
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
# lint all charts if any changes were detected
- name: Run chart-testing (lint)
if: steps.list-changed.outputs.changed == 'true'
run: ct lint --config ct.yaml --all
# the following would lint only changed charts, but linting isn't expensive
# run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
- name: Pre-install cluster status check
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-install Cluster Status ==="
kubectl get nodes -o wide
kubectl get pods --all-namespaces
kubectl get storageclass
- name: Create kind cluster
if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
- name: Add Helm repositories and update
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Pre-install cluster status check
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-install Cluster Status ==="
kubectl get nodes -o wide
kubectl get pods --all-namespaces
kubectl get storageclass
- name: Install Redis operator
if: steps.list-changed.outputs.changed == 'true'
shell: bash
run: |
echo "=== Installing redis-operator CRDs ==="
helm upgrade --install redis-operator ot-container-kit/redis-operator \
--namespace redis-operator --create-namespace --wait --timeout 300s
- name: Add Helm repositories and update
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo update
- name: Pre-pull required images
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-pulling required images to avoid timeout ==="
KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
echo "Kind cluster: $KIND_CLUSTER"
- name: Install Redis operator
if: steps.list-changed.outputs.changed == 'true'
shell: bash
run: |
echo "=== Installing redis-operator CRDs ==="
helm upgrade --install redis-operator ot-container-kit/redis-operator \
--namespace redis-operator --create-namespace --wait --timeout 300s
IMAGES=(
"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
"quay.io/opstree/redis:v7.0.15"
"docker.io/onyxdotapp/onyx-web-server:latest"
)
- name: Pre-pull required images
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Pre-pulling required images to avoid timeout ==="
KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
echo "Kind cluster: $KIND_CLUSTER"
for image in "${IMAGES[@]}"; do
echo "Pre-pulling $image"
if docker pull "$image"; then
kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
else
echo "Failed to pull $image"
fi
done
IMAGES=(
"ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
"quay.io/opstree/redis:v7.0.15"
"docker.io/onyxdotapp/onyx-web-server:latest"
)
echo "=== Images loaded into Kind cluster ==="
docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
- name: Validate chart dependencies
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Validating chart dependencies ==="
cd deployment/helm/charts/onyx
helm dependency update
helm lint .
- name: Run chart-testing (install) with enhanced monitoring
timeout-minutes: 25
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Starting chart installation with monitoring ==="
# Function to monitor cluster state
monitor_cluster() {
while true; do
echo "=== Cluster Status Check at $(date) ==="
# Only show non-running pods to reduce noise
NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
if [ "$NON_RUNNING_PODS" -gt 0 ]; then
echo "Non-running pods:"
kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
else
echo "All pods running successfully"
fi
# Only show recent events if there are issues
RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
if [ -n "$RECENT_EVENTS" ]; then
echo "Recent warnings/errors:"
echo "$RECENT_EVENTS"
fi
sleep 60
done
}
# Start monitoring in background
monitor_cluster &
MONITOR_PID=$!
# Set up cleanup
cleanup() {
echo "=== Cleaning up monitoring process ==="
kill $MONITOR_PID 2>/dev/null || true
echo "=== Final cluster state ==="
kubectl get pods --all-namespaces
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
}
# Trap cleanup on exit
trap cleanup EXIT
# Run the actual installation with detailed logging
# Note that opensearch.enabled is true whereas others in this install
# are false. There is some work that needs to be done to get this
# entire step working in CI, enabling opensearch here is a small step
# in that direction. If this is causing issues, disabling it in this
# step should be ok in the short term.
echo "=== Starting ct install ==="
set +e
ct install --all \
--helm-extra-set-args="\
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=opensearch.enabled=true \
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
--set=webserver.replicaCount=1 \
--set=api.replicaCount=0 \
--set=inferenceCapability.replicaCount=0 \
--set=indexCapability.replicaCount=0 \
--set=celery_beat.replicaCount=0 \
--set=celery_worker_heavy.replicaCount=0 \
--set=celery_worker_docfetching.replicaCount=0 \
--set=celery_worker_docprocessing.replicaCount=0 \
--set=celery_worker_light.replicaCount=0 \
--set=celery_worker_monitoring.replicaCount=0 \
--set=celery_worker_primary.replicaCount=0 \
--set=celery_worker_user_file_processing.replicaCount=0 \
--set=celery_worker_user_files_indexing.replicaCount=0" \
--helm-extra-args="--timeout 900s --debug" \
--debug --config ct.yaml
CT_EXIT=$?
set -e
if [[ $CT_EXIT -ne 0 ]]; then
echo "ct install failed with exit code $CT_EXIT"
exit $CT_EXIT
for image in "${IMAGES[@]}"; do
echo "Pre-pulling $image"
if docker pull "$image"; then
kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
else
echo "=== Installation completed successfully ==="
echo "Failed to pull $image"
fi
done
kubectl get pods --all-namespaces
echo "=== Images loaded into Kind cluster ==="
docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
- name: Post-install verification
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Post-install verification ==="
kubectl get pods --all-namespaces
kubectl get services --all-namespaces
# Only show issues if they exist
kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
- name: Validate chart dependencies
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Validating chart dependencies ==="
cd deployment/helm/charts/onyx
helm dependency update
helm lint .
- name: Cleanup on failure
if: failure() && steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Cleanup on failure ==="
- name: Run chart-testing (install) with enhanced monitoring
timeout-minutes: 25
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Starting chart installation with monitoring ==="
# Function to monitor cluster state
monitor_cluster() {
while true; do
echo "=== Cluster Status Check at $(date) ==="
# Only show non-running pods to reduce noise
NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
if [ "$NON_RUNNING_PODS" -gt 0 ]; then
echo "Non-running pods:"
kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
else
echo "All pods running successfully"
fi
# Only show recent events if there are issues
RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
if [ -n "$RECENT_EVENTS" ]; then
echo "Recent warnings/errors:"
echo "$RECENT_EVENTS"
fi
sleep 60
done
}
# Start monitoring in background
monitor_cluster &
MONITOR_PID=$!
# Set up cleanup
cleanup() {
echo "=== Cleaning up monitoring process ==="
kill $MONITOR_PID 2>/dev/null || true
echo "=== Final cluster state ==="
kubectl get pods --all-namespaces
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
}
echo "=== Pod descriptions for debugging ==="
kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
# Trap cleanup on exit
trap cleanup EXIT
echo "=== Recent logs for debugging ==="
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
# Run the actual installation with detailed logging
echo "=== Starting ct install ==="
set +e
ct install --all \
--helm-extra-set-args="\
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
--set=webserver.replicaCount=1 \
--set=api.replicaCount=0 \
--set=inferenceCapability.replicaCount=0 \
--set=indexCapability.replicaCount=0 \
--set=celery_beat.replicaCount=0 \
--set=celery_worker_heavy.replicaCount=0 \
--set=celery_worker_docfetching.replicaCount=0 \
--set=celery_worker_docprocessing.replicaCount=0 \
--set=celery_worker_light.replicaCount=0 \
--set=celery_worker_monitoring.replicaCount=0 \
--set=celery_worker_primary.replicaCount=0 \
--set=celery_worker_user_file_processing.replicaCount=0 \
--set=celery_worker_user_files_indexing.replicaCount=0" \
--helm-extra-args="--timeout 900s --debug" \
--debug --config ct.yaml
CT_EXIT=$?
set -e
echo "=== Helm releases ==="
helm list --all-namespaces
# the following would install only changed charts, but we only have one chart so
# don't worry about that for now
# run: ct install --target-branch ${{ github.event.repository.default_branch }}
if [[ $CT_EXIT -ne 0 ]]; then
echo "ct install failed with exit code $CT_EXIT"
exit $CT_EXIT
else
echo "=== Installation completed successfully ==="
fi
kubectl get pods --all-namespaces
- name: Post-install verification
if: steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Post-install verification ==="
kubectl get pods --all-namespaces
kubectl get services --all-namespaces
# Only show issues if they exist
kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
- name: Cleanup on failure
if: failure() && steps.list-changed.outputs.changed == 'true'
run: |
echo "=== Cleanup on failure ==="
echo "=== Final cluster state ==="
kubectl get pods --all-namespaces
kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
echo "=== Pod descriptions for debugging ==="
kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
echo "=== Recent logs for debugging ==="
kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
echo "=== Helm releases ==="
helm list --all-namespaces
# the following would install only changed charts, but we only have one chart so
# don't worry about that for now
# run: ct install --target-branch ${{ github.event.repository.default_branch }}

View File

@@ -9,12 +9,6 @@ on:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# Test Environment Variables
@@ -33,30 +27,22 @@ env:
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Discover test directories
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
@@ -72,38 +58,16 @@ jobs:
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -121,49 +85,20 @@ jobs:
file: ./backend/Dockerfile
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -181,34 +116,19 @@ jobs:
file: ./backend/Dockerfile.model_server
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
build-integration-image:
runs-on:
[
runs-on,
runner=2cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-integration-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -219,42 +139,11 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
run: cd backend && docker buildx bake --push integration
integration-tests:
needs:
@@ -269,7 +158,6 @@ jobs:
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
strategy:
fail-fast: false
@@ -279,9 +167,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -294,30 +180,19 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Create .env file for Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
MCP_SERVER_ENABLED=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
@@ -330,45 +205,39 @@ jobs:
-d
id: start_docker
- name: Wait for services to be ready
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
wait_for_service() {
local url=$1
local label=$2
local timeout=${3:-300} # default 5 minutes
local start_time
start_time=$(date +%s)
docker logs -f onyx-api_server-1 &
while true; do
local current_time
current_time=$(date +%s)
local elapsed_time=$((current_time - start_time))
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
exit 1
fi
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
if [ "$response" = "200" ]; then
echo "${label} is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
else
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
sleep 5
done
}
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
wait_for_service "http://localhost:8080/health" "API server"
echo "Finished waiting for services."
sleep 5
done
echo "Finished waiting for service."
- name: Start Mock Services
run: |
@@ -398,7 +267,6 @@ jobs:
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -412,11 +280,6 @@ jobs:
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
-e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
-e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
-e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
-e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
-e TEST_WEB_HOSTNAME=test-runner \
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
@@ -439,30 +302,26 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
multitenant-tests:
needs:
[build-backend-image, build-model-server-image, build-integration-image]
runs-on:
[
runs-on,
runner=8cpu-linux-arm64,
"run-id=${{ github.run_id }}-multitenant-tests",
"extras=ecr-cache",
build-backend-image,
build-model-server-image,
build-integration-image,
]
timeout-minutes: 45
runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -471,9 +330,6 @@ jobs:
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers for multi-tenant tests
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
@@ -481,9 +337,8 @@ jobs:
AUTH_TYPE=cloud \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
DEV_MODE=true \
docker compose -f docker-compose.multitenant-dev.yml up \
relational_db \
@@ -524,9 +379,6 @@ jobs:
echo "Finished waiting for service."
- name: Run Multi-Tenant Integration Tests
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
echo "Running multi-tenant integration tests..."
docker run --rm --network onyx_default \
@@ -542,7 +394,6 @@ jobs:
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e TEST_WEB_HOSTNAME=test-runner \
-e AUTH_TYPE=cloud \
@@ -551,7 +402,7 @@ jobs:
-e REQUIRE_EMAIL_VERIFICATION=false \
-e DISABLE_TELEMETRY=true \
-e DEV_MODE=true \
${ECR_CACHE}:integration-test-${RUN_ID} \
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
/app/tests/integration/multitenant_tests
- name: Dump API server logs (multi-tenant)
@@ -568,7 +419,7 @@ jobs:
- name: Upload logs (multi-tenant)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-multitenant
path: ${{ github.workspace }}/docker-compose-multitenant.log
@@ -582,10 +433,16 @@ jobs:
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests, multitenant-tests]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
- uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
with:
script: |
const needs = ${{ toJSON(needs) }};
const failed = Object.values(needs).some(n => n.result !== 'success');
if (failed) {
core.setFailed('One or more upstream jobs failed or were cancelled.');
} else {
core.notice('All required jobs succeeded.');
}

View File

@@ -3,35 +3,21 @@ concurrency:
group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
on: push
jobs:
jest-tests:
name: Jest Tests
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: 'npm'
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
@@ -44,7 +30,7 @@ jobs:
- name: Upload coverage reports
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: jest-coverage-${{ github.run_id }}
path: ./web/coverage

View File

@@ -1,7 +1,7 @@
name: PR Labeler
on:
pull_request:
pull_request_target:
branches:
- main
types:
@@ -12,11 +12,11 @@ on:
permissions:
contents: read
pull-requests: write
jobs:
validate_pr_title:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check PR title for Conventional Commits
env:

View File

@@ -7,13 +7,9 @@ on:
pull_request:
types: [opened, edited, reopened, synchronize]
permissions:
contents: read
jobs:
linear-check:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check PR body for Linear link or override
env:

View File

@@ -6,18 +6,11 @@ concurrency:
on:
merge_group:
types: [checks_requested]
push:
tags:
- "v*.*.*"
permissions:
contents: read
env:
# Test Environment Variables
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
@@ -35,20 +28,17 @@ jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
outputs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Discover test directories
id: set-matrix
run: |
# Find all leaf-level directories in both test directories
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
# Create JSON array with directory info
@@ -65,37 +55,14 @@ jobs:
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -113,49 +80,19 @@ jobs:
file: ./backend/Dockerfile
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -173,48 +110,18 @@ jobs:
file: ./backend/Dockerfile.model_server
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-cache,mode=max
build-integration-image:
runs-on:
[
runs-on,
runner=2cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-integration-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -227,26 +134,9 @@ jobs:
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
run: cd backend && docker buildx bake --push integration
integration-tests-mit:
needs:
@@ -261,7 +151,6 @@ jobs:
- runner=4cpu-linux-arm64
- ${{ format('run-id={0}-integration-tests-mit-job-{1}', github.run_id, strategy['job-index']) }}
- extras=ecr-cache
timeout-minutes: 45
strategy:
fail-fast: false
@@ -271,9 +160,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -286,27 +173,17 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Create .env file for Docker Compose
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }} \
ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }} \
INTEGRATION_TESTS_MODE=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
@@ -319,45 +196,39 @@ jobs:
-d
id: start_docker
- name: Wait for services to be ready
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
wait_for_service() {
local url=$1
local label=$2
local timeout=${3:-300} # default 5 minutes
local start_time
start_time=$(date +%s)
docker logs -f onyx-api_server-1 &
while true; do
local current_time
current_time=$(date +%s)
local elapsed_time=$((current_time - start_time))
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. ${label} did not become ready in $timeout seconds."
exit 1
fi
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
local response
response=$(curl -s -o /dev/null -w "%{http_code}" "$url" || echo "curl_error")
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
if [ "$response" = "200" ]; then
echo "${label} is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error while checking ${label}. Retrying in 5 seconds..."
else
echo "${label} not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
sleep 5
done
}
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
wait_for_service "http://localhost:8080/health" "API server"
echo "Finished waiting for services."
sleep 5
done
echo "Finished waiting for service."
- name: Start Mock Services
run: |
@@ -388,7 +259,6 @@ jobs:
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
-e EXA_API_KEY=${EXA_API_KEY} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
@@ -424,19 +294,26 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [integration-tests-mit]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
- uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # ratchet:actions/github-script@v8
with:
script: |
const needs = ${{ toJSON(needs) }};
const failed = Object.values(needs).some(n => n.result !== 'success');
if (failed) {
core.setFailed('One or more upstream jobs failed or were cancelled.');
} else {
core.notice('All required jobs succeeded.');
}

View File

@@ -3,18 +3,7 @@ concurrency:
group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
on: push
env:
# Test Environment Variables
@@ -35,13 +24,6 @@ env:
MCP_OAUTH_USERNAME: ${{ vars.MCP_OAUTH_USERNAME }}
MCP_OAUTH_PASSWORD: ${{ secrets.MCP_OAUTH_PASSWORD }}
# for MCP API Key tests
MCP_API_KEY: test-api-key-12345
MCP_API_KEY_TEST_PORT: 8005
MCP_API_KEY_TEST_URL: http://host.docker.internal:8005/mcp
MCP_API_KEY_SERVER_HOST: 0.0.0.0
MCP_API_KEY_SERVER_PUBLIC_HOST: host.docker.internal
MOCK_LLM_RESPONSE: true
MCP_TEST_SERVER_PORT: 8004
MCP_TEST_SERVER_URL: http://host.docker.internal:8004/mcp
@@ -54,38 +36,15 @@ env:
jobs:
build-web-image:
runs-on:
[
runs-on,
runner=4cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-web-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -103,50 +62,20 @@ jobs:
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
type=registry,ref=onyxdotapp/onyx-web-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -164,50 +93,20 @@ jobs:
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
timeout-minutes: 45
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -225,27 +124,14 @@ jobs:
platforms: linux/arm64
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-from: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
playwright-tests:
needs: [build-web-image, build-backend-image, build-model-server-image]
name: Playwright Tests (${{ matrix.project }})
runs-on:
- runs-on
- runner=8cpu-linux-arm64
- "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}"
- "extras=ecr-cache"
- volume=50gb
timeout-minutes: 45
runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-playwright-tests-${{ matrix.project }}", "extras=ecr-cache"]
strategy:
fail-fast: false
matrix:
@@ -254,15 +140,15 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
persist-credentials: false
fetch-depth: 0
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: 'npm'
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
@@ -282,26 +168,18 @@ jobs:
run: npx playwright install --with-deps
- name: Create .env file for Docker Compose
env:
OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
EXA_API_KEY_VALUE: ${{ env.EXA_API_KEY }}
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
EXA_API_KEY=${EXA_API_KEY_VALUE}
GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
EXA_API_KEY=${{ env.EXA_API_KEY }}
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:playwright-test-model-server-${RUN_ID}
ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
ONYX_BACKEND_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
ONYX_MODEL_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
ONYX_WEB_SERVER_IMAGE=${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
EOF
if [ "${{ matrix.project }}" = "no-auth" ]; then
echo "PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true" >> deployment/docker_compose/.env
fi
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -315,7 +193,7 @@ jobs:
- name: Start Docker containers
run: |
cd deployment/docker_compose
docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml -f docker-compose.mcp-api-key-test.yml up -d
docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.mcp-oauth-test.yml up -d
id: start_docker
- name: Wait for service to be ready
@@ -375,67 +253,14 @@ jobs:
sleep 3
done
- name: Wait for MCP API Key mock server
run: |
echo "Waiting for MCP API Key mock server on port ${MCP_API_KEY_TEST_PORT:-8005}..."
start_time=$(date +%s)
timeout=120
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. MCP API Key mock server did not become ready in ${timeout}s."
exit 1
fi
if curl -sf "http://localhost:${MCP_API_KEY_TEST_PORT:-8005}/healthz" > /dev/null; then
echo "MCP API Key mock server is ready!"
break
fi
sleep 3
done
- name: Wait for web server to be ready
run: |
echo "Waiting for web server on port 3000..."
start_time=$(date +%s)
timeout=120
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Web server did not become ready in ${timeout}s."
exit 1
fi
if curl -sf "http://localhost:3000/api/health" > /dev/null 2>&1 || \
curl -sf "http://localhost:3000/" > /dev/null 2>&1; then
echo "Web server is ready!"
break
fi
echo "Web server not ready yet. Retrying in 3 seconds..."
sleep 3
done
- name: Run Playwright tests
working-directory: ./web
env:
PROJECT: ${{ matrix.project }}
run: |
# Create test-results directory to ensure it exists for artifact upload
mkdir -p test-results
if [ "${PROJECT}" = "no-auth" ]; then
export PLAYWRIGHT_FORCE_EMPTY_LLM_PROVIDERS=true
fi
npx playwright test --project ${PROJECT}
npx playwright test --project ${{ matrix.project }}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
if: always()
with:
# Includes test results and trace.zip files
@@ -446,30 +271,18 @@ jobs:
# save before stopping the containers so the logs can be captured
- name: Save Docker logs
if: success() || failure()
env:
WORKSPACE: ${{ github.workspace }}
run: |
cd deployment/docker_compose
docker compose logs > docker-compose.log
mv docker-compose.log ${WORKSPACE}/docker-compose.log
mv docker-compose.log ${{ github.workspace }}/docker-compose.log
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
playwright-required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 45
needs: [playwright-tests]
if: ${{ always() }}
steps:
- name: Check job status
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
# NOTE: Chromatic UI diff testing is currently disabled.
# We are using Playwright for local and CI testing without visual regression checks.
@@ -488,12 +301,12 @@ jobs:
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
# uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
# with:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
# uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
# with:
# node-version: 22

View File

@@ -9,12 +9,6 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
mypy-check:
@@ -22,28 +16,26 @@ jobs:
# Note: Mypy seems quite optimized for x64 compared to arm64.
# Similarly, mypy is single-threaded and incremental, so 2cpu is sufficient.
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-mypy-check", "extras=s3-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
backend/requirements/ee.txt
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Generate OpenAPI schema and Python client
shell: bash
run: |
ods openapi all
- name: Prepare build
uses: ./.github/actions/prepare-build
with:
docker-username: ${{ secrets.DOCKER_USERNAME }}
docker-password: ${{ secrets.DOCKER_TOKEN }}
- name: Cache mypy cache
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -61,8 +53,11 @@ jobs:
TERM: xterm-256color
run: mypy .
- name: Run MyPy (tools/)
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy tools/
- name: Check import order with reorder-python-imports
working-directory: ./backend
run: |
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Check code formatting with Black
working-directory: ./backend
run: black --check .

View File

@@ -7,16 +7,10 @@ on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
schedule:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
permissions:
contents: read
env:
# AWS
AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS: ${{ secrets.AWS_ACCESS_KEY_ID_DAILY_CONNECTOR_TESTS }}
@@ -129,26 +123,18 @@ jobs:
connectors-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-connectors-check", "extras=s3-cache"]
timeout-minutes: 45
env:
PYTHONPATH: ./backend
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Setup Playwright
uses: ./.github/actions/setup-playwright
@@ -161,20 +147,16 @@ jobs:
hubspot:
- 'backend/onyx/connectors/hubspot/**'
- 'backend/tests/daily/connectors/hubspot/**'
- 'uv.lock'
salesforce:
- 'backend/onyx/connectors/salesforce/**'
- 'backend/tests/daily/connectors/salesforce/**'
- 'uv.lock'
github:
- 'backend/onyx/connectors/github/**'
- 'backend/tests/daily/connectors/github/**'
- 'uv.lock'
file_processing:
- 'backend/onyx/file_processing/**'
- 'uv.lock'
- name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
- name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
@@ -187,8 +169,7 @@ jobs:
backend/tests/daily/connectors \
--ignore backend/tests/daily/connectors/hubspot \
--ignore backend/tests/daily/connectors/salesforce \
--ignore backend/tests/daily/connectors/github \
--ignore backend/tests/daily/connectors/coda
--ignore backend/tests/daily/connectors/github
- name: Run HubSpot Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}
@@ -233,10 +214,8 @@ jobs:
if: failure() && github.event_name == 'schedule'
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data "{\"text\":\"Scheduled Connector Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
--data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
$SLACK_WEBHOOK

View File

@@ -5,9 +5,11 @@ on:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
workflow_dispatch:
permissions:
contents: read
inputs:
branch:
description: 'Branch to run the workflow on'
required: false
default: 'main'
env:
# Bedrock
@@ -26,103 +28,115 @@ env:
jobs:
model-check:
# See https://runs-on.com/runners/linux/
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-model-check"
- "extras=ecr-cache"
timeout-minutes: 45
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
env:
PYTHONPATH: ./backend
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
- name: Build and load
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
env:
TAG: model-server-${{ github.run_id }}
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Model Server Docker image
run: |
docker pull onyxdotapp/onyx-model-server:latest
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
load: true
targets: model-server
set: |
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Start Docker containers
id: start_docker
env:
IMAGE_TAG: model-server-${{ github.run_id }}
run: |
cd deployment/docker_compose
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
up -d --wait \
inference_model_server
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
failed-jobs: model-check
title: "🚨 Scheduled Model Tests failed!"
ref-name: ${{ github.ref_name }}
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
$SLACK_WEBHOOK
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -9,41 +9,28 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
backend-check:
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-backend-check"]
timeout-minutes: 45
env:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
DISABLE_TELEMETRY: "true"
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
backend/requirements/ee.txt
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"

View File

@@ -6,44 +6,21 @@ concurrency:
on:
merge_group:
pull_request: null
push:
branches:
- main
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
quality-checks:
runs-on: ubuntu-latest
timeout-minutes: 45
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
with:
prek-version: '0.2.21'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
with:
check_permissions: false
check_versions: false
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}

View File

@@ -1,41 +0,0 @@
name: Release Devtools
on:
push:
tags:
- "ods/v*.*.*"
jobs:
pypi:
runs-on: ubuntu-latest
environment:
name: release-devtools
permissions:
id-token: write
timeout-minutes: 10
strategy:
matrix:
os-arch:
- { goos: "linux", goarch: "amd64" }
- { goos: "linux", goarch: "arm64" }
- { goos: "windows", goarch: "amd64" }
- { goos: "windows", goarch: "arm64" }
- { goos: "darwin", goarch: "amd64" }
- { goos: "darwin", goarch: "arm64" }
- { goos: "", goarch: "" }
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- run: |
GOOS="${{ matrix.os-arch.goos }}" \
GOARCH="${{ matrix.os-arch.goarch }}" \
uv build --wheel
working-directory: tools/ods
- run: uv publish
working-directory: tools/ods

View File

@@ -9,15 +9,13 @@ on:
jobs:
sync-foss:
runs-on: ubuntu-latest
timeout-minutes: 45
permissions:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Install git-filter-repo
run: |

View File

@@ -3,30 +3,30 @@ name: Nightly Tag Push
on:
schedule:
- cron: "0 10 * * *" # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC
workflow_dispatch:
permissions:
contents: write # Allows pushing tags to the repository
jobs:
create-and-push-tag:
runs-on: ubuntu-slim
timeout-minutes: 45
runs-on: [runs-on, runner=2cpu-linux-x64, "run-id=${{ github.run_id }}-create-and-push-tag"]
steps:
# actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
# Additional NOTE: even though this is named "rkuo", the actual key is tied to the onyx repo
# and not rkuo's personal account. It is fine to leave this key as is!
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # ratchet:actions/checkout@v4
with:
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true
ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
- name: Set up Git user
run: |
git config user.name "Onyx Bot [bot]"
git config user.email "onyx-bot[bot]@onyx.app"
git config user.name "Richard Kuo [bot]"
git config user.email "rkuo[bot]@onyx.app"
- name: Check for existing nightly tag
id: check_tag
@@ -54,12 +54,3 @@ jobs:
run: |
TAG_NAME="nightly-latest-$(date +'%Y%m%d')"
git push origin $TAG_NAME
- name: Send Slack notification
if: failure()
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
title: "🚨 Nightly Tag Push Failed"
ref-name: ${{ github.ref_name }}
failed-jobs: "create-and-push-tag"

View File

@@ -1,50 +0,0 @@
name: Run Zizmor
on:
push:
branches: ["main"]
pull_request:
branches: ["**"]
permissions: {}
jobs:
zizmor:
name: zizmor
runs-on: ubuntu-slim
timeout-minutes: 45
permissions:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
persist-credentials: false
- name: Detect changes
id: filter
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
with:
filters: |
zizmor:
- '.github/**'
- name: Install the latest version of uv
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- name: Run zizmor
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload SARIF file
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
with:
sarif_file: results.sarif
category: zizmor

12
.gitignore vendored
View File

@@ -1,10 +1,6 @@
# editors
.vscode
!/.vscode/env_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed
.cursor
# macos
.DS_store
@@ -24,7 +20,6 @@ backend/tests/regression/search_quality/*.json
backend/onyx/evals/data/
backend/onyx/evals/one_off/*.json
*.log
*.csv
# secret files
.env
@@ -33,8 +28,6 @@ settings.json
# others
/deployment/data/nginx/app.conf
/deployment/data/nginx/mcp.conf.inc
/deployment/data/nginx/mcp_upstream.conf.inc
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml
*.egg-info
@@ -53,10 +46,5 @@ CLAUDE.md
# Local .terraform.lock.hcl file
.terraform.lock.hcl
node_modules
# MCP configs
.playwright-mcp
# plans
plans/

8
.mcp.json.template Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"onyx-mcp": {
"type": "http",
"url": "http://localhost:8000/mcp"
}
}
}

View File

@@ -1,149 +1,61 @@
default_install_hook_types:
- pre-commit
- post-checkout
- post-merge
- post-rewrite
repos:
- repo: https://github.com/astral-sh/uv-pre-commit
# From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
hooks:
- id: uv-sync
args: ["--locked", "--all-extras"]
- id: uv-lock
- id: uv-export
name: uv-export default.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"backend",
"-o",
"backend/requirements/default.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export dev.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"dev",
"-o",
"backend/requirements/dev.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export ee.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"ee",
"-o",
"backend/requirements/ee.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export model_server.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"model_server",
"-o",
"backend/requirements/model_server.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-run
name: Check lazy imports
args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
pass_filenames: true
files: ^backend/(?!\.venv/|scripts/).*\.py$
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
# args: ["--all-extras", "mypy"]
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
rev: v4.6.0
hooks:
- id: check-added-large-files
name: Check for added large files
args: ["--maxkb=1500"]
- id: check-yaml
files: ^.github/
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
rev: v1.7.8
hooks:
- id: actionlint
- repo: https://github.com/psf/black
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
rev: 25.1.0
hooks:
- id: black
language_version: python3.11
- id: black
language_version: python3.11
# this is a fork which keeps compatibility with black
- repo: https://github.com/wimglenn/reorder-python-imports-black
rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
rev: v3.14.0
hooks:
- id: reorder-python-imports
args: ["--py311-plus", "--application-directories=backend/"]
# need to ignore alembic files, since reorder-python-imports gets confused
# and thinks that alembic is a local package since there is a folder
# in the backend directory called `alembic`
exclude: ^backend/alembic/
- id: reorder-python-imports
args: ['--py311-plus', '--application-directories=backend/']
# need to ignore alembic files, since reorder-python-imports gets confused
# and thinks that alembic is a local package since there is a folder
# in the backend directory called `alembic`
exclude: ^backend/alembic/
# These settings will remove unused imports with side effects
# Note: The repo currently does not and should not have imports with side effects
- repo: https://github.com/PyCQA/autoflake
rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
rev: v2.3.1
hooks:
- id: autoflake
args:
[
"--remove-all-unused-imports",
"--remove-unused-variables",
"--in-place",
"--recursive",
]
- repo: https://github.com/golangci/golangci-lint
rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
hooks:
- id: golangci-lint
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
rev: v0.11.4
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier
rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
rev: v3.1.0
hooks:
- id: prettier
types_or: [html, css, javascript, ts, tsx]
language_version: system
- id: prettier
types_or: [html, css, javascript, ts, tsx]
language_version: system
- repo: https://github.com/sirwart/ripsecrets
rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
rev: v0.1.11
hooks:
- id: ripsecrets
args:
- --additional-pattern
- ^sk-[A-Za-z0-9_\-]{20,}$
- --additional-pattern
- ^sk-[A-Za-z0-9_\-]{20,}$
- repo: local
hooks:
@@ -154,29 +66,36 @@ repos:
pass_filenames: false
files: \.tf$
- id: npm-install
name: npm install
description: "Automatically run 'npm install' after a checkout, pull or rebase"
- id: check-lazy-imports
name: Check lazy imports
entry: python3 backend/scripts/check_lazy_imports.py
language: system
entry: bash -c 'cd web && npm install --no-save'
pass_filenames: false
files: ^web/package(-lock)?\.json$
stages: [post-checkout, post-merge, post-rewrite]
- id: npm-install-check
name: npm install --package-lock-only
description: "Check the 'web/package-lock.json' is updated"
language: system
entry: bash -c 'cd web && npm install --package-lock-only'
pass_filenames: false
files: ^web/package(-lock)?\.json$
files: ^backend/(?!\.venv/).*\.py$
# Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
# This is a preview package - if it breaks:
# 1. Try updating: cd web && npm update @typescript/native-preview
# 2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
- id: typescript-check
name: TypeScript type check
entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
language: system
pass_filenames: false
files: ^web/.*\.(ts|tsx)$
# We would like to have a mypy pre-commit hook, but due to the fact that
# pre-commit runs in it's own isolated environment, we would need to install
# and keep in sync all dependencies so mypy has access to the appropriate type
# stubs. This does not seem worth it at the moment, so for now we will stick to
# having mypy run via Github Actions / manually by contributors
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.1.1
# hooks:
# - id: mypy
# exclude: ^tests/
# # below are needed for type stubs since pre-commit runs in it's own
# # isolated environment. Unfortunately, this needs to be kept in sync
# # with requirements/dev.txt + requirements/default.txt
# additional_dependencies: [
# alembic==1.10.4,
# types-beautifulsoup4==4.12.0.3,
# types-html5lib==1.1.11.13,
# types-oauthlib==3.2.0.9,
# types-psycopg2==2.9.21.10,
# types-python-dateutil==2.8.19.13,
# types-regex==2023.3.23.1,
# types-requests==2.28.11.17,
# types-retry==0.9.9.3,
# types-urllib3==1.26.25.11
# ]
# # TODO: add back once errors are addressed
# # args: [--strict]

View File

@@ -1,53 +1,66 @@
# Copy this file to .env in the .vscode folder.
# Fill in the <REPLACE THIS> values as needed; it is recommended to set the
# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.
# Also check out onyx/backend/scripts/restart_containers.sh for a script to
# restart the containers which Onyx relies on outside of VSCode/Cursor
# processes.
# Copy this file to .env in the .vscode folder
# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
# For local dev, often user Authentication is not needed.
# For local dev, often user Authentication is not needed
AUTH_TYPE=disabled
# Skip warm up for dev
SKIP_WARM_UP=True
# Always keep these on for Dev.
# Logs model prompts, reasoning, and answer to stdout.
# Always keep these on for Dev
# Logs all model prompts to stdout
LOG_ONYX_MODEL_INTERACTIONS=True
# More verbose logging
LOG_LEVEL=debug
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
# This passes top N results to LLM an additional time for reranking prior to answer generation
# This step is quite heavy on token usage so we disable it for dev generally
DISABLE_LLM_DOC_RELEVANCE=False
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
OAUTH_CLIENT_ID=<REPLACE THIS>
OAUTH_CLIENT_SECRET=<REPLACE THIS>
OPENID_CONFIG_URL=<REPLACE THIS>
SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config
# Generally not useful for dev, we don't generally want to set up an SMTP server
# for dev.
# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
REQUIRE_EMAIL_VERIFICATION=False
# Set these so if you wipe the DB, you don't end up having to go through the UI
# every time.
# Set these so if you wipe the DB, you don't end up having to go through the UI every time
GEN_AI_API_KEY=<REPLACE THIS>
OPENAI_API_KEY=<REPLACE THIS>
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
# Only needed if using OnyxBot
#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
# Python stuff
PYTHONPATH=../backend
PYTHONUNBUFFERED=1
# Enable the full set of Danswer Enterprise Edition features.
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
# are using this for local testing/development).
# Internet Search
EXA_API_KEY=<REPLACE THIS>
# Enable the full set of Danswer Enterprise Edition features
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
# Agent Search configs # TODO: Remove give proper namings
AGENT_RETRIEVAL_STATS=False # Note: This setting will incur substantial re-ranking effort
AGENT_RERANKING_STATS=True
AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
# S3 File Store Configuration (MinIO for local development)
S3_ENDPOINT_URL=http://localhost:9004
@@ -55,24 +68,16 @@ S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
S3_AWS_ACCESS_KEY_ID=minioadmin
S3_AWS_SECRET_ACCESS_KEY=minioadmin
# Show extra/uncommon connectors.
# Show extra/uncommon connectors
SHOW_EXTRA_CONNECTORS=True
# Local langsmith tracing
LANGSMITH_TRACING="true"
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY=<REPLACE_THIS>
LANGSMITH_PROJECT=<REPLACE_THIS>
# Local Confluence OAuth testing
# OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
# OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
# NEXT_PUBLIC_TEST_ENV=True
# OpenSearch
# Arbitrary password is fine for local development.
OPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>
# NEXT_PUBLIC_TEST_ENV=True

View File

@@ -1,3 +1,5 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
@@ -18,11 +20,10 @@
"Web Server",
"Model Server",
"API Server",
"MCP Server",
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery background",
"Celery docfetching",
"Celery docprocessing",
"Celery beat"
@@ -131,6 +132,8 @@
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
@@ -149,52 +152,6 @@
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Discord Bot",
"consoleName": "Discord Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/discord/client.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Discord Bot Console"
},
{
"name": "MCP Server",
"consoleName": "MCP Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"MCP_SERVER_ENABLED": "true",
"MCP_SERVER_PORT": "8090",
"MCP_SERVER_CORS_ORIGINS": "http://localhost:*",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
"args": [
"onyx.mcp_server.api:mcp_app",
"--reload",
"--port",
"8090",
"--timeout-graceful-shutdown",
"0"
],
"presentation": {
"group": "2"
},
"consoleTitle": "MCP Server Console"
},
{
"name": "Celery primary",
"type": "debugpy",
@@ -415,6 +372,7 @@
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docfetching@%n",
@@ -445,6 +403,7 @@
"onyx.background.celery.versioned_apps.docprocessing",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docprocessing@%n",
@@ -522,21 +481,7 @@
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Clear and Restart OpenSearch Container",
// Generic debugger type, required arg but has no bearing on bash.
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
@@ -582,10 +527,10 @@
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeExecutable": "bash",
"runtimeArgs": [
"sync",
"--all-extras"
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
@@ -593,150 +538,19 @@
"group": "3"
}
},
{
"name": "Build Sandbox Templates",
"type": "debugpy",
"request": "launch",
"module": "onyx.server.features.build.sandbox.build_templates",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"console": "integratedTerminal",
"presentation": {
"group": "3"
},
"consoleTitle": "Build Sandbox Templates"
},
{
// Dummy entry used to label the group
"name": "--- Database ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "4",
"order": 0
}
},
{
"name": "Restore seeded database dump",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore seeded database dump (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--clean",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Create database snapshot",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"dump",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore database snapshot (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--clean",
"--yes",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Upgrade database to head revision",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"upgrade"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",
"type": "debugpy",
"request": "launch",
"program": "backend/scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}",
"program": "scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "backend"
"PYTHONPATH": "."
},
"args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
"args": ["--filename", "generated/openapi.json"]
},
{
// script to debug multi tenant db issues

View File

@@ -1,13 +1,13 @@
# AGENTS.md
This file provides guidance to AI agents when working with code in this repository.
This file provides guidance to Codex when working with code in this repository.
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
@@ -181,286 +181,6 @@ web/
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
### 1. Import Standards
**Always use absolute imports with the `@` prefix.**
**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
```typescript
// ✅ Good
import { Button } from "@/components/ui/button";
import { useAuth } from "@/hooks/useAuth";
import { Text } from "@/refresh-components/texts/Text";
// ❌ Bad
import { Button } from "../../../components/ui/button";
import { useAuth } from "./hooks/useAuth";
```
### 2. React Component Functions
**Prefer regular functions over arrow functions for React components.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
function UserProfile({ userId }: UserProfileProps) {
return <div>User Profile</div>
}
// ❌ Bad
const UserProfile = ({ userId }: UserProfileProps) => {
return <div>User Profile</div>
}
```
### 3. Props Interface Extraction
**Extract prop types into their own interface definitions.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
interface UserCardProps {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}
function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
return <div>User Card</div>
}
// ❌ Bad
function UserCard({
user,
showActions = false,
onEdit
}: {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}) {
return <div>User Card</div>
}
```
### 4. Spacing Guidelines
**Prefer padding over margins for spacing.**
**Reason:** We want to consolidate usage to paddings instead of margins.
```typescript
// ✅ Good
<div className="p-4 space-y-2">
<div className="p-2">Content</div>
</div>
// ❌ Bad
<div className="m-4 space-y-2">
<div className="m-2">Content</div>
</div>
```
### 5. Tailwind Dark Mode
**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
```typescript
// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
<div className="bg-background-neutral-03 text-text-02">
Content
</div>
// ✅ Good - Logo icons with dark mode handling via createLogoIcon
export const GithubIcon = createLogoIcon(githubLightIcon, {
monochromatic: true, // Will apply dark:invert internally
});
export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
darkSrc: gitbookDarkIcon, // Will use dark:hidden/dark:block internally
});
// ❌ Bad - Manual dark mode overrides
<div className="bg-white dark:bg-black text-black dark:text-white">
Content
</div>
```
### 6. Class Name Utilities
**Use the `cn` utility instead of raw string formatting for classNames.**
**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
```typescript
import { cn } from '@/lib/utils'
// ✅ Good
<div className={cn(
'base-class',
isActive && 'active-class',
className
)}>
Content
</div>
// ❌ Bad
<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
Content
</div>
```
### 7. Custom Hooks Organization
**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
**Reason:** This is just a layout preference. Keeps code clean.
```typescript
// web/src/hooks/useUserData.ts
export function useUserData(userId: string) {
// hook implementation
}
// web/src/hooks/useLocalStorage.ts
export function useLocalStorage<T>(key: string, initialValue: T) {
// hook implementation
}
```
### 8. Icon Usage
**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
```typescript
// ✅ Good
import SvgX from "@/icons/x";
import SvgMoreHorizontal from "@/icons/more-horizontal";
// ❌ Bad
import { User } from "lucide-react";
import { FiSearch } from "react-icons/fi";
```
**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
If you need help with this step, reach out to `raunak@onyx.app`.
### 9. Text Rendering
**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
```typescript
// ✅ Good
import { Text } from '@/refresh-components/texts/Text'
function UserCard({ name }: { name: string }) {
return (
<Text
{/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
text03
{/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
mainAction
>
{name}
</Text>
)
}
// ❌ Bad
function UserCard({ name }: { name: string }) {
return (
<div>
<h2>{name}</h2>
<p>User details</p>
</div>
)
}
```
### 10. Component Usage
**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
```typescript
// ✅ Good
import Button from '@/refresh-components/buttons/Button'
import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
import SvgPlusCircle from '@/icons/plus-circle'
function ContactForm() {
return (
<form>
<InputTypeIn placeholder="Search..." />
<Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
</form>
)
}
// ❌ Bad
function ContactForm() {
return (
<form>
<input placeholder="Name" />
<textarea placeholder="Message" />
<button type="submit">Submit</button>
</form>
)
}
```
### 11. Colors
**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
```typescript
// ✅ Good - Use custom Onyx color classes
<div className="bg-background-neutral-01 border border-border-02" />
<div className="bg-background-tint-02 border border-border-01" />
<div className="bg-status-success-01" />
<div className="bg-action-link-01" />
<div className="bg-theme-primary-05" />
// ❌ Bad - Do NOT use standard Tailwind colors
<div className="bg-gray-100 border border-gray-300 text-gray-600" />
<div className="bg-white border border-slate-200" />
<div className="bg-green-100 text-green-700" />
<div className="bg-blue-100 text-blue-600" />
<div className="bg-indigo-500" />
```
### 12. Data Fetching
**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
## Database & Migrations
### Running Migrations
@@ -575,6 +295,14 @@ will be tailing their logs to this file.
- Token management and rate limiting
- Custom prompts and agent actions
## UI/UX Patterns
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:

View File

@@ -4,10 +4,10 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
`a`. The app can be accessed at `http://localhost:3000`.
- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
make sure we see logs coming out from the relevant service.
@@ -184,286 +184,6 @@ web/
└── src/lib/ # Utilities & business logic
```
## Frontend Standards
### 1. Import Standards
**Always use absolute imports with the `@` prefix.**
**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
```typescript
// ✅ Good
import { Button } from "@/components/ui/button";
import { useAuth } from "@/hooks/useAuth";
import { Text } from "@/refresh-components/texts/Text";
// ❌ Bad
import { Button } from "../../../components/ui/button";
import { useAuth } from "./hooks/useAuth";
```
### 2. React Component Functions
**Prefer regular functions over arrow functions for React components.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
function UserProfile({ userId }: UserProfileProps) {
return <div>User Profile</div>
}
// ❌ Bad
const UserProfile = ({ userId }: UserProfileProps) => {
return <div>User Profile</div>
}
```
### 3. Props Interface Extraction
**Extract prop types into their own interface definitions.**
**Reason:** Functions just become easier to read.
```typescript
// ✅ Good
interface UserCardProps {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}
function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
return <div>User Card</div>
}
// ❌ Bad
function UserCard({
user,
showActions = false,
onEdit
}: {
user: User
showActions?: boolean
onEdit?: (userId: string) => void
}) {
return <div>User Card</div>
}
```
### 4. Spacing Guidelines
**Prefer padding over margins for spacing.**
**Reason:** We want to consolidate usage to paddings instead of margins.
```typescript
// ✅ Good
<div className="p-4 space-y-2">
<div className="p-2">Content</div>
</div>
// ❌ Bad
<div className="m-4 space-y-2">
<div className="m-2">Content</div>
</div>
```
### 5. Tailwind Dark Mode
**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
```typescript
// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
<div className="bg-background-neutral-03 text-text-02">
Content
</div>
// ✅ Good - Logo icons with dark mode handling via createLogoIcon
export const GithubIcon = createLogoIcon(githubLightIcon, {
monochromatic: true, // Will apply dark:invert internally
});
export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
darkSrc: gitbookDarkIcon, // Will use dark:hidden/dark:block internally
});
// ❌ Bad - Manual dark mode overrides
<div className="bg-white dark:bg-black text-black dark:text-white">
Content
</div>
```
### 6. Class Name Utilities
**Use the `cn` utility instead of raw string formatting for classNames.**
**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
```typescript
import { cn } from '@/lib/utils'
// ✅ Good
<div className={cn(
'base-class',
isActive && 'active-class',
className
)}>
Content
</div>
// ❌ Bad
<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
Content
</div>
```
### 7. Custom Hooks Organization
**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
**Reason:** This is just a layout preference. Keeps code clean.
```typescript
// web/src/hooks/useUserData.ts
export function useUserData(userId: string) {
// hook implementation
}
// web/src/hooks/useLocalStorage.ts
export function useLocalStorage<T>(key: string, initialValue: T) {
// hook implementation
}
```
### 8. Icon Usage
**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
```typescript
// ✅ Good
import SvgX from "@/icons/x";
import SvgMoreHorizontal from "@/icons/more-horizontal";
// ❌ Bad
import { User } from "lucide-react";
import { FiSearch } from "react-icons/fi";
```
**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
If you need help with this step, reach out to `raunak@onyx.app`.
### 9. Text Rendering
**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
```typescript
// ✅ Good
import { Text } from '@/refresh-components/texts/Text'
function UserCard({ name }: { name: string }) {
return (
<Text
{/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
text03
{/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
mainAction
>
{name}
</Text>
)
}
// ❌ Bad
function UserCard({ name }: { name: string }) {
return (
<div>
<h2>{name}</h2>
<p>User details</p>
</div>
)
}
```
### 10. Component Usage
**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
```typescript
// ✅ Good
import Button from '@/refresh-components/buttons/Button'
import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
import SvgPlusCircle from '@/icons/plus-circle'
function ContactForm() {
return (
<form>
<InputTypeIn placeholder="Search..." />
<Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
</form>
)
}
// ❌ Bad
function ContactForm() {
return (
<form>
<input placeholder="Name" />
<textarea placeholder="Message" />
<button type="submit">Submit</button>
</form>
)
}
```
### 11. Colors
**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
```typescript
// ✅ Good - Use custom Onyx color classes
<div className="bg-background-neutral-01 border border-border-02" />
<div className="bg-background-tint-02 border border-border-01" />
<div className="bg-status-success-01" />
<div className="bg-action-link-01" />
<div className="bg-theme-primary-05" />
// ❌ Bad - Do NOT use standard Tailwind colors
<div className="bg-gray-100 border border-gray-300 text-gray-600" />
<div className="bg-white border border-slate-200" />
<div className="bg-green-100 text-green-700" />
<div className="bg-blue-100 text-blue-600" />
<div className="bg-indigo-500" />
```
### 12. Data Fetching
**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
## Database & Migrations
### Running Migrations
@@ -580,6 +300,14 @@ will be tailing their logs to this file.
- Token management and rate limiting
- Custom prompts and agent actions
## UI/UX Patterns
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
## Creating a Plan
When creating a plan in the `plans` directory, make sure to include at least these elements:

View File

@@ -1,31 +1,286 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx
Hey there! We are so excited that you're interested in Onyx.
As an open source project in a rapidly changing space, we welcome all contributions.
## Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.
## 💃 Guidelines
If you have your own feature that you would like to build please create an issue and community members can provide feedback and
thumb it up if they feel a common need.
### Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
## Contributing Code
Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
To contribute, please follow the
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
will be marked with the `approved by maintainers` label.
Issues marked `good first issue` are an especially great place to start.
**Connectors** to other tools are another great place to contribute. For details on how, refer to this
[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
If you have a new/different contribution in mind, we'd love to hear about it!
Your input is vital to making sure that Onyx moves in the right direction.
Before starting on implementation, please raise a GitHub issue.
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
### Contributing Code
To contribute to this project, please follow the
["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
When opening a pull request, mention related issues and feel free to tag relevant maintainers.
Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
### Getting Help 🙋
Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
That way we can help future contributors and users can avoid the same issue.
We also have support channels and generally interesting discussions on our
[Discord](https://discord.gg/4NA5SbzrWb).
We would love to see you there!
## Get Started 🚀
Onyx being a fully functional app, relies on some external software, specifically:
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
> **Note:**
> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
### Local Set Up
Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
If using a lower version, modifications will have to be made to the code.
If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
#### Backend: Python requirements
Currently, we use pip and recommend creating a virtual environment.
For convenience here's a command for it:
```bash
python -m venv .venv
source .venv/bin/activate
```
_For Windows, activate the virtual environment using Command Prompt:_
```bash
.venv\Scripts\activate
```
If using PowerShell, the command slightly differs:
```powershell
.venv\Scripts\Activate.ps1
```
Install the required python dependencies:
```bash
pip install -r backend/requirements/combined.txt
```
or
```bash
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/ee.txt
pip install -r backend/requirements/model_server.txt
```
Fix vscode/cursor auto-imports:
```bash
pip install -e .
```
Install Playwright for Python (headless browser required by the Web Connector)
In the activated Python virtualenv, install Playwright for Python by running:
```bash
playwright install
```
You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
#### Frontend: Node dependencies
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
Navigate to `onyx/web` and run:
```bash
npm i
```
## Formatting and Linting
### Backend
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
With the virtual environment active, install the pre-commit library with:
```bash
pip install pre-commit
```
Then, from the `onyx/backend` directory, run:
```bash
pre-commit install
```
Additionally, we use `mypy` for static type checking.
Onyx is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
### Web
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
Re-stage your changes and commit again.
# Running the application for development
## Developing using VSCode Debugger (recommended)
**We highly recommend using VSCode debugger for development.**
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
Otherwise, you can follow the instructions below to run the application for development.
## Manually running the application for development
### Docker containers for external software
You will need Docker installed to run these containers.
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
```bash
docker compose up -d index relational_db cache minio
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
### Running Onyx locally
To start the frontend, navigate to `onyx/web` and run:
```bash
npm run dev
```
Next, start the model server which runs the local NLP models.
Navigate to `onyx/backend` and run:
```bash
uvicorn model_server.main:app --reload --port 9000
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
```
The first time running Onyx, you will need to run the DB migrations for Postgres.
After the first time, this is no longer required unless the DB models change.
Navigate to `onyx/backend` and with the venv active, run:
```bash
alembic upgrade head
```
Next, start the task queue which orchestrates the background jobs.
Jobs that take more time are run async from the API server.
Still in `onyx/backend`, run:
```bash
python ./scripts/dev_run_background_jobs.py
```
To run the backend API server, navigate back to `onyx/backend` and run:
```bash
AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "
$env:AUTH_TYPE='disabled'
uvicorn onyx.main:app --reload --port 8080
"
```
> **Note:**
> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
#### Wrapping up
You should now have 4 servers running:
- Web server
- Backend API
- Model server
- Background jobs
Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
You've successfully set up a local Onyx instance! 🏁
#### Running the Onyx application in a container
You can run the full Onyx application stack from pre-built images including all external software dependencies.
Navigate to `onyx/deployment/docker_compose` and run:
```bash
docker compose up -d
```
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
```bash
docker compose up -d --build
```
## Getting Help 🙋
We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).
### Release Process
See you there!
## Release Process
Onyx loosely follows the SemVer versioning standard.
Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
A set of Docker containers will be pushed automatically to DockerHub with every tag.

View File

@@ -7,6 +7,8 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
1. **Environment Setup**:
- Copy `.vscode/env_template.txt` to `.vscode/.env`
- Fill in the necessary environment variables in `.vscode/.env`
2. **launch.json**:
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
## Using the Debugger

View File

@@ -15,9 +15,3 @@ build/
dist/
.coverage
htmlcov/
model_server/legacy/
# Craft: demo_data directory should be unzipped at container startup, not copied
**/demo_data/
# Craft: templates/outputs/venv is created at container startup
**/templates/outputs/venv

View File

@@ -37,6 +37,10 @@ CVE-2023-50868
CVE-2023-52425
CVE-2024-28757
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
# No impact in our settings
CVE-2023-7104
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193

View File

@@ -7,22 +7,11 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
# Build argument for Craft support (disabled by default)
# Use --build-arg ENABLE_CRAFT=true to include Node.js and opencode CLI
ARG ENABLE_CRAFT=false
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
PLAYWRIGHT_BROWSERS_PATH="/app/.cache/ms-playwright"
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
# Install system dependencies
@@ -50,23 +39,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# Conditionally install Node.js 20 for Craft (required for Next.js)
# Only installed when ENABLE_CRAFT=true
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing Node.js 20 for Craft support..." && \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*; \
fi
# Conditionally install opencode CLI for Craft agent functionality
# Only installed when ENABLE_CRAFT=true
# TODO: download a specific, versioned release of the opencode CLI
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing opencode CLI for Craft support..." && \
curl -fsSL https://opencode.ai/install | bash; \
fi
ENV PATH="/root/.opencode/bin:${PATH}"
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
@@ -78,7 +51,6 @@ RUN uv pip install --system --no-cache-dir --upgrade \
pip uninstall -y py && \
playwright install chromium && \
playwright install-deps chromium && \
chown -R onyx:onyx /app && \
ln -s /usr/local/bin/supervisord /usr/bin/supervisord && \
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
@@ -111,8 +83,8 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
@@ -122,6 +94,13 @@ tiktoken.get_encoding('cl100k_base')"
# Set up application files
WORKDIR /app
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
# Enterprise Version Files
COPY --chown=onyx:onyx ./ee /app/ee
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
@@ -139,8 +118,7 @@ COPY --chown=onyx:onyx ./static /app/static
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
COPY --chown=onyx:onyx ./scripts/setup_craft_templates.sh /app/scripts/setup_craft_templates.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh /app/scripts/setup_craft_templates.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

View File

@@ -1,29 +1,4 @@
# Base stage with dependencies
FROM python:3.11.7-slim-bookworm AS base
ENV DANSWER_RUNNING_IN_DOCKER="true" \
HF_HOME=/app/.cache/huggingface
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
RUN mkdir -p /app/.cache/huggingface
COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt && \
rm -rf ~/.cache/uv /tmp/*.txt
# Stage for downloading embedding models
FROM base AS embedding-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download('nomic-ai/nomic-embed-text-v1');"
# Initialize SentenceTransformer to cache the custom architecture
RUN python -c "from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
# Final stage - combine all downloads
FROM base AS final
FROM python:3.11.7-slim-bookworm
LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is for the Onyx model server which runs all of the \
@@ -31,17 +6,44 @@ AI models for Onyx. This container and all the code is MIT Licensed and free for
You can find it at https://hub.docker.com/r/onyx/onyx-model-server. For more details, \
visit https://github.com/onyx-dot-app/onyx."
ENV DANSWER_RUNNING_IN_DOCKER="true" \
HF_HOME=/app/.cache/huggingface
COPY --from=ghcr.io/astral-sh/uv:0.9.9 /uv /uvx /bin/
# Create non-root user for security best practices
RUN groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
RUN mkdir -p /app && \
groupadd -g 1001 onyx && \
useradd -u 1001 -g onyx -m -s /bin/bash onyx && \
chown -R onyx:onyx /app && \
mkdir -p /var/log/onyx && \
chmod 755 /var/log/onyx && \
chown onyx:onyx /var/log/onyx
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
# it's preserved in order to combine with the user's cache contents
COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface
COPY ./requirements/model_server.txt /tmp/requirements.txt
RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt && \
rm -rf ~/.cache/uv /tmp/*.txt
# Pre-downloading models for setups with limited egress
# Download tokenizers, distilbert for the Onyx model
# Download model weights
# Run Nomic to pull in the custom architecture and have it cached locally
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
snapshot_download(repo_id='onyx-dot-app/information-content-model'); \
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);" && \
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
# it's preserved in order to combine with the user's cache contents
mv /app/.cache/huggingface /app/.cache/temp_huggingface && \
chown -R onyx:onyx /app
WORKDIR /app

View File

@@ -7,12 +7,8 @@ Onyx migrations use a generic single-database configuration with an async dbapi.
## To generate new migrations:
From onyx/backend, run:
`alembic revision -m <DESCRIPTION_OF_MIGRATION>`
Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.
Manually populate the upgrade and downgrade in your new migration.
run from onyx/backend:
`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`
More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

View File

@@ -39,9 +39,7 @@ config = context.config
if config.config_file_name is not None and config.attributes.get(
"configure_logger", True
):
# disable_existing_loggers=False prevents breaking pytest's caplog fixture
# See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
fileConfig(config.config_file_name, disable_existing_loggers=False)
fileConfig(config.config_file_name)
target_metadata = [Base.metadata, ResultModelBase.metadata]
@@ -225,6 +223,7 @@ def do_run_migrations(
) -> None:
if create_schema:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
connection.execute(text("COMMIT"))
connection.execute(text(f'SET search_path TO "{schema_name}"'))
@@ -308,7 +307,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
@@ -346,7 +344,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
@@ -463,49 +460,8 @@ def run_migrations_offline() -> None:
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
Supports pytest-alembic by checking for a pre-configured connection
in context.config.attributes["connection"]. If present, uses that
connection/engine directly instead of creating a new async engine.
"""
# Check if pytest-alembic is providing a connection/engine
connectable = context.config.attributes.get("connection", None)
if connectable is not None:
# pytest-alembic is providing an engine - use it directly
logger.info("run_migrations_online starting (pytest-alembic mode).")
# For pytest-alembic, we use the default schema (public)
schema_name = context.config.attributes.get(
"schema_name", POSTGRES_DEFAULT_SCHEMA
)
# pytest-alembic passes an Engine, we need to get a connection from it
with connectable.connect() as connection:
# Set search path for the schema
connection.execute(text(f'SET search_path TO "{schema_name}"'))
context.configure(
connection=connection,
target_metadata=target_metadata, # type: ignore
include_object=include_object,
version_table_schema=schema_name,
include_schemas=True,
compare_type=True,
compare_server_default=True,
script_location=config.get_main_option("script_location"),
)
with context.begin_transaction():
context.run_migrations()
# Commit the transaction to ensure changes are visible to next migration
connection.commit()
else:
# Normal operation - use async migrations
logger.info("run_migrations_online starting.")
asyncio.run(run_async_migrations())
logger.info("run_migrations_online starting.")
asyncio.run(run_async_migrations())
if context.is_offline_mode():

View File

@@ -1,29 +0,0 @@
"""add is_clarification to chat_message
Revision ID: 18b5b2524446
Revises: 87c52ec39f84
Create Date: 2025-01-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "18b5b2524446"
down_revision = "87c52ec39f84"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column(
"is_clarification", sa.Boolean(), nullable=False, server_default="false"
),
)
def downgrade() -> None:
op.drop_column("chat_message", "is_clarification")

View File

@@ -1,89 +0,0 @@
"""add internet search and content provider tables
Revision ID: 1f2a3b4c5d6e
Revises: 9drpiiw74ljy
Create Date: 2025-11-10 19:45:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "1f2a3b4c5d6e"
down_revision = "9drpiiw74ljy"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"internet_search_provider",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("name", sa.String(), nullable=False, unique=True),
sa.Column("provider_type", sa.String(), nullable=False),
sa.Column("api_key", sa.LargeBinary(), nullable=True),
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
)
op.create_index(
"ix_internet_search_provider_is_active",
"internet_search_provider",
["is_active"],
)
op.create_table(
"internet_content_provider",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("name", sa.String(), nullable=False, unique=True),
sa.Column("provider_type", sa.String(), nullable=False),
sa.Column("api_key", sa.LargeBinary(), nullable=True),
sa.Column("config", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"is_active", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
)
op.create_index(
"ix_internet_content_provider_is_active",
"internet_content_provider",
["is_active"],
)
def downgrade() -> None:
op.drop_index(
"ix_internet_content_provider_is_active", table_name="internet_content_provider"
)
op.drop_table("internet_content_provider")
op.drop_index(
"ix_internet_search_provider_is_active", table_name="internet_search_provider"
)
op.drop_table("internet_search_provider")

View File

@@ -1,351 +0,0 @@
"""single onyx craft migration
Consolidates all buildmode/onyx craft tables into a single migration.
Tables created:
- build_session: User build sessions with status tracking
- sandbox: User-owned containerized environments (one per user)
- artifact: Build output files (web apps, documents, images)
- snapshot: Sandbox filesystem snapshots
- build_message: Conversation messages for build sessions
Existing table modified:
- connector_credential_pair: Added processing_mode column
Revision ID: 2020d417ec84
Revises: 41fa44bef321
Create Date: 2026-01-26 14:43:54.641405
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "2020d417ec84"
down_revision = "41fa44bef321"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ==========================================================================
# ENUMS
# ==========================================================================
# Build session status enum
build_session_status_enum = sa.Enum(
"active",
"idle",
name="buildsessionstatus",
native_enum=False,
)
# Sandbox status enum
sandbox_status_enum = sa.Enum(
"provisioning",
"running",
"idle",
"sleeping",
"terminated",
"failed",
name="sandboxstatus",
native_enum=False,
)
# Artifact type enum
artifact_type_enum = sa.Enum(
"web_app",
"pptx",
"docx",
"markdown",
"excel",
"image",
name="artifacttype",
native_enum=False,
)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.create_table(
"build_session",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=True,
),
sa.Column("name", sa.String(), nullable=True),
sa.Column(
"status",
build_session_status_enum,
nullable=False,
server_default="active",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"last_activity_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("nextjs_port", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_session_user_created",
"build_session",
["user_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_build_session_status",
"build_session",
["status"],
unique=False,
)
# ==========================================================================
# SANDBOX TABLE (user-owned, one per user)
# ==========================================================================
op.create_table(
"sandbox",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("container_id", sa.String(), nullable=True),
sa.Column(
"status",
sandbox_status_enum,
nullable=False,
server_default="provisioning",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", name="sandbox_user_id_key"),
)
op.create_index(
"ix_sandbox_status",
"sandbox",
["status"],
unique=False,
)
op.create_index(
"ix_sandbox_container_id",
"sandbox",
["container_id"],
unique=False,
)
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.create_table(
"artifact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("type", artifact_type_enum, nullable=False),
sa.Column("path", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_artifact_session_created",
"artifact",
["session_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_artifact_type",
"artifact",
["type"],
unique=False,
)
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.create_table(
"snapshot",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("storage_path", sa.String(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_snapshot_session_created",
"snapshot",
["session_id", sa.text("created_at DESC")],
unique=False,
)
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.create_table(
"build_message",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"turn_index",
sa.Integer(),
nullable=False,
),
sa.Column(
"type",
sa.Enum(
"SYSTEM",
"USER",
"ASSISTANT",
"DANSWER",
name="messagetype",
create_type=False,
native_enum=False,
),
nullable=False,
),
sa.Column(
"message_metadata",
postgresql.JSONB(),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_message_session_turn",
"build_message",
["session_id", "turn_index", sa.text("created_at ASC")],
unique=False,
)
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.add_column(
"connector_credential_pair",
sa.Column(
"processing_mode",
sa.String(),
nullable=False,
server_default="regular",
),
)
def downgrade() -> None:
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.drop_column("connector_credential_pair", "processing_mode")
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.drop_index("ix_build_message_session_turn", table_name="build_message")
op.drop_table("build_message")
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.drop_index("ix_snapshot_session_created", table_name="snapshot")
op.drop_table("snapshot")
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.drop_index("ix_artifact_type", table_name="artifact")
op.drop_index("ix_artifact_session_created", table_name="artifact")
op.drop_table("artifact")
sa.Enum(name="artifacttype").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# SANDBOX TABLE
# ==========================================================================
op.drop_index("ix_sandbox_container_id", table_name="sandbox")
op.drop_index("ix_sandbox_status", table_name="sandbox")
op.drop_table("sandbox")
sa.Enum(name="sandboxstatus").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.drop_index("ix_build_session_status", table_name="build_session")
op.drop_index("ix_build_session_user_created", table_name="build_session")
op.drop_table("build_session")
sa.Enum(name="buildsessionstatus").drop(op.get_bind(), checkfirst=True)

View File

@@ -12,8 +12,8 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "23957775e5f5"
down_revision = "bc9771dccadf"
branch_labels = None
depends_on = None
branch_labels = None # type: ignore
depends_on = None # type: ignore
def upgrade() -> None:

View File

@@ -1,27 +0,0 @@
"""add last refreshed at mcp server
Revision ID: 2a391f840e85
Revises: 4cebcbc9b2ae
Create Date: 2025-12-06 15:19:59.766066
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembi.
revision = "2a391f840e85"
down_revision = "4cebcbc9b2ae"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"mcp_server",
sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
)
def downgrade() -> None:
op.drop_column("mcp_server", "last_refreshed_at")

View File

@@ -1,46 +0,0 @@
"""usage_limits
Revision ID: 2b90f3af54b8
Revises: 9a0296d7421e
Create Date: 2026-01-03 16:55:30.449692
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "2b90f3af54b8"
down_revision = "9a0296d7421e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"tenant_usage",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"window_start", sa.DateTime(timezone=True), nullable=False, index=True
),
sa.Column("llm_cost_cents", sa.Float(), nullable=False, server_default="0.0"),
sa.Column("chunks_indexed", sa.Integer(), nullable=False, server_default="0"),
sa.Column("api_calls", sa.Integer(), nullable=False, server_default="0"),
sa.Column(
"non_streaming_api_calls", sa.Integer(), nullable=False, server_default="0"
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=True,
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("window_start", name="uq_tenant_usage_window"),
)
def downgrade() -> None:
op.drop_index("ix_tenant_usage_window_start", table_name="tenant_usage")
op.drop_table("tenant_usage")

View File

@@ -1,42 +0,0 @@
"""add_unique_constraint_to_inputprompt_prompt_user_id
Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique constraint on (prompt, user_id) for user-owned prompts
# This ensures each user can only have one shortcut with a given name
op.create_unique_constraint(
"uq_inputprompt_prompt_user_id",
"inputprompt",
["prompt", "user_id"],
)
# Create partial unique index for public prompts (where user_id IS NULL)
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
# so we need a partial index to ensure public prompt names are also unique
op.execute(
"""
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
ON inputprompt (prompt)
WHERE user_id IS NULL
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")

View File

@@ -1,89 +0,0 @@
"""seed_exa_provider_from_env
Revision ID: 3c9a65f1207f
Revises: 1f2a3b4c5d6e
Create Date: 2025-11-20 19:18:00.000000
"""
from __future__ import annotations
import os
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from dotenv import load_dotenv, find_dotenv
from onyx.utils.encryption import encrypt_string_to_bytes
revision = "3c9a65f1207f"
down_revision = "1f2a3b4c5d6e"
branch_labels = None
depends_on = None
EXA_PROVIDER_NAME = "Exa"
def _get_internet_search_table(metadata: sa.MetaData) -> sa.Table:
return sa.Table(
"internet_search_provider",
metadata,
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("name", sa.String),
sa.Column("provider_type", sa.String),
sa.Column("api_key", sa.LargeBinary),
sa.Column("config", postgresql.JSONB),
sa.Column("is_active", sa.Boolean),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text("now()"),
),
)
def upgrade() -> None:
load_dotenv(find_dotenv())
exa_api_key = os.environ.get("EXA_API_KEY")
if not exa_api_key:
return
bind = op.get_bind()
metadata = sa.MetaData()
table = _get_internet_search_table(metadata)
existing = bind.execute(
sa.select(table.c.id).where(table.c.name == EXA_PROVIDER_NAME)
).first()
if existing:
return
encrypted_key = encrypt_string_to_bytes(exa_api_key)
has_active_provider = bind.execute(
sa.select(table.c.id).where(table.c.is_active.is_(True))
).first()
bind.execute(
table.insert().values(
name=EXA_PROVIDER_NAME,
provider_type="exa",
api_key=encrypted_key,
config=None,
is_active=not bool(has_active_provider),
)
)
def downgrade() -> None:
return

View File

@@ -1,29 +0,0 @@
"""remove default prompt shortcuts
Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Delete any user associations for the default prompts first (foreign key constraint)
op.execute(
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
)
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
op.execute("DELETE FROM inputprompt WHERE id < 0")
def downgrade() -> None:
# We don't restore the default prompts on downgrade
pass

View File

@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.llm.well_known_providers.llm_provider_options import (
from onyx.llm.llm_provider_options import (
fetch_model_names_for_provider_as_set,
fetch_visible_model_names_for_provider_as_set,
)

View File

@@ -1,27 +0,0 @@
"""add tab_index to tool_call
Revision ID: 4cebcbc9b2ae
Revises: a1b2c3d4e5f6
Create Date: 2025-12-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "4cebcbc9b2ae"
down_revision = "a1b2c3d4e5f6"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.add_column(
"tool_call",
sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
)
def downgrade() -> None:
op.drop_column("tool_call", "tab_index")

View File

@@ -62,11 +62,6 @@ def upgrade() -> None:
)
"""
)
# Drop the temporary table to avoid conflicts if migration runs again
# (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
op.execute("DROP TABLE IF EXISTS temp_connector_credential")
# If no exception was raised, alter the column
op.alter_column("credential", "source", nullable=True) # TODO modify
# # ### end Alembic commands ###

View File

@@ -1,104 +0,0 @@
"""add_open_url_tool
Revision ID: 4f8a2b3c1d9e
Revises: a852cbe15577
Create Date: 2025-11-24 12:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "4f8a2b3c1d9e"
down_revision = "a852cbe15577"
branch_labels = None
depends_on = None
OPEN_URL_TOOL = {
"name": "OpenURLTool",
"display_name": "Open URL",
"description": (
"The Open URL Action allows the agent to fetch and read contents of web pages."
),
"in_code_tool_id": "OpenURLTool",
"enabled": True,
}
def upgrade() -> None:
conn = op.get_bind()
# Check if tool already exists
existing = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
{"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
).fetchone()
if existing:
tool_id = existing[0]
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
OPEN_URL_TOOL,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
"""
),
OPEN_URL_TOOL,
)
# Get the newly inserted tool's id
result = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
{"in_code_tool_id": OPEN_URL_TOOL["in_code_tool_id"]},
).fetchone()
tool_id = result[0] # type: ignore
# Associate the tool with all existing personas
# Get all persona IDs
persona_ids = conn.execute(sa.text("SELECT id FROM persona")).fetchall()
for (persona_id,) in persona_ids:
# Check if association already exists
exists = conn.execute(
sa.text(
"""
SELECT 1 FROM persona__tool
WHERE persona_id = :persona_id AND tool_id = :tool_id
"""
),
{"persona_id": persona_id, "tool_id": tool_id},
).fetchone()
if not exists:
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (:persona_id, :tool_id)
"""
),
{"persona_id": persona_id, "tool_id": tool_id},
)
def downgrade() -> None:
# We don't remove the tool on downgrade since it's fine to have it around.
# If we upgrade again, it will be a no-op.
pass

View File

@@ -85,122 +85,103 @@ class UserRow(NamedTuple):
def upgrade() -> None:
conn = op.get_bind()
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
# Start transaction
conn.execute(sa.text("BEGIN"))
if search_assistant:
# Update existing Search assistant to be the unified assistant
try:
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
if search_assistant:
# Update existing Search assistant to be the unified assistant
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
)
)
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": search_tool[0]},
)
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": image_gen_tool[0]},
)
if web_search_tool:
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
@@ -209,148 +190,191 @@ def upgrade() -> None:
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
{"tool_id": search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
),
{"tool_id": image_gen_tool[0]},
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
if web_search_tool:
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:
conn = op.get_bind()
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
# Start transaction
conn.execute(sa.text("BEGIN"))
try:
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
)
)
)
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e

View File

@@ -1,35 +0,0 @@
"""backend driven notification details
Revision ID: 5c3dca366b35
Revises: 9087b548dd69
Create Date: 2026-01-06 16:03:11.413724
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "5c3dca366b35"
down_revision = "9087b548dd69"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"notification",
sa.Column(
"title", sa.String(), nullable=False, server_default="New Notification"
),
)
op.add_column(
"notification",
sa.Column("description", sa.String(), nullable=True, server_default=""),
)
def downgrade() -> None:
op.drop_column("notification", "title")
op.drop_column("notification", "description")

View File

@@ -1,55 +0,0 @@
"""update_default_persona_prompt
Revision ID: 5e6f7a8b9c0d
Revises: 4f8a2b3c1d9e
Create Date: 2025-11-30 12:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "5e6f7a8b9c0d"
down_revision = "4f8a2b3c1d9e"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :system_prompt
WHERE id = :persona_id
"""
),
{"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# We don't revert the system prompt on downgrade since we don't know
# what the previous value was. The new prompt is a reasonable default.
pass

View File

@@ -1,44 +0,0 @@
"""add_created_at_in_project_userfile
Revision ID: 6436661d5b65
Revises: c7e9f4a3b2d1
Create Date: 2025-11-24 11:50:24.536052
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "6436661d5b65"
down_revision = "c7e9f4a3b2d1"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add created_at column to project__user_file table
op.add_column(
"project__user_file",
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
)
# Add composite index on (project_id, created_at DESC)
op.create_index(
"ix_project__user_file_project_id_created_at",
"project__user_file",
["project_id", sa.text("created_at DESC")],
)
def downgrade() -> None:
# Remove composite index on (project_id, created_at)
op.drop_index(
"ix_project__user_file_project_id_created_at", table_name="project__user_file"
)
# Remove created_at column from project__user_file table
op.drop_column("project__user_file", "created_at")

View File

@@ -1,75 +0,0 @@
"""nullify_default_task_prompt
Revision ID: 699221885109
Revises: 7e490836d179
Create Date: 2025-12-30 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "699221885109"
down_revision = "7e490836d179"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
def upgrade() -> None:
# Make task_prompt column nullable
# Note: The model had nullable=True but the DB column was NOT NULL until this point
op.alter_column(
"persona",
"task_prompt",
nullable=True,
)
# Set task_prompt to NULL for the default persona
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET task_prompt = NULL
WHERE id = :persona_id
"""
),
{"persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# Restore task_prompt to empty string for the default persona
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET task_prompt = ''
WHERE id = :persona_id AND task_prompt IS NULL
"""
),
{"persona_id": DEFAULT_PERSONA_ID},
)
# Set any remaining NULL task_prompts to empty string before making non-nullable
conn.execute(
sa.text(
"""
UPDATE persona
SET task_prompt = ''
WHERE task_prompt IS NULL
"""
)
)
# Revert task_prompt column to not nullable
op.alter_column(
"persona",
"task_prompt",
nullable=False,
)

View File

@@ -1,54 +0,0 @@
"""add image generation config table
Revision ID: 7206234e012a
Revises: 699221885109
Create Date: 2025-12-21 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7206234e012a"
down_revision = "699221885109"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"image_generation_config",
sa.Column("image_provider_id", sa.String(), primary_key=True),
sa.Column("model_configuration_id", sa.Integer(), nullable=False),
sa.Column("is_default", sa.Boolean(), nullable=False),
sa.ForeignKeyConstraint(
["model_configuration_id"],
["model_configuration.id"],
ondelete="CASCADE",
),
)
op.create_index(
"ix_image_generation_config_is_default",
"image_generation_config",
["is_default"],
unique=False,
)
op.create_index(
"ix_image_generation_config_model_configuration_id",
"image_generation_config",
["model_configuration_id"],
unique=False,
)
def downgrade() -> None:
op.drop_index(
"ix_image_generation_config_model_configuration_id",
table_name="image_generation_config",
)
op.drop_index(
"ix_image_generation_config_is_default", table_name="image_generation_config"
)
op.drop_table("image_generation_config")

View File

@@ -1,45 +0,0 @@
"""make processing mode default all caps
Revision ID: 72aa7de2e5cf
Revises: 2020d417ec84
Create Date: 2026-01-26 18:58:47.705253
This migration fixes the ProcessingMode enum value mismatch:
- SQLAlchemy's Enum with native_enum=False uses enum member NAMES as valid values
- The original migration stored lowercase VALUES ('regular', 'file_system')
- This converts existing data to uppercase NAMES ('REGULAR', 'FILE_SYSTEM')
- Also drops any spurious native PostgreSQL enum type that may have been auto-created
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "72aa7de2e5cf"
down_revision = "2020d417ec84"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Convert existing lowercase values to uppercase to match enum member names
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' "
"WHERE processing_mode = 'regular'"
)
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' "
"WHERE processing_mode = 'file_system'"
)
# Update the server default to use uppercase
op.alter_column(
"connector_credential_pair",
"processing_mode",
server_default="REGULAR",
)
def downgrade() -> None:
# State prior to this was broken, so we don't want to revert back to it
pass

View File

@@ -1,47 +0,0 @@
"""add_search_query_table
Revision ID: 73e9983e5091
Revises: d1b637d7050a
Create Date: 2026-01-14 14:16:52.837489
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "73e9983e5091"
down_revision = "d1b637d7050a"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"search_query",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id"),
nullable=False,
),
sa.Column("query", sa.String(), nullable=False),
sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
def downgrade() -> None:
op.drop_index("ix_search_query_created_at", table_name="search_query")
op.drop_index("ix_search_query_user_id", table_name="search_query")
op.drop_table("search_query")

View File

@@ -10,7 +10,8 @@ from alembic import op
import sqlalchemy as sa
from onyx.db.models import IndexModelStatus
from onyx.context.search.enums import RecencyBiasSetting, SearchType
from onyx.context.search.enums import RecencyBiasSetting
from onyx.context.search.enums import SearchType
# revision identifiers, used by Alembic.
revision = "776b3bbe9092"

View File

@@ -10,7 +10,7 @@ from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.llm.well_known_providers.llm_provider_options import (
from onyx.llm.llm_provider_options import (
fetch_model_names_for_provider_as_set,
fetch_visible_model_names_for_provider_as_set,
)

View File

@@ -1,27 +0,0 @@
"""Add display_name to model_configuration
Revision ID: 7bd55f264e1b
Revises: e8f0d2a38171
Create Date: 2025-12-04
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7bd55f264e1b"
down_revision = "e8f0d2a38171"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"model_configuration",
sa.Column("display_name", sa.String(), nullable=True),
)
def downgrade() -> None:
op.drop_column("model_configuration", "display_name")

View File

@@ -1,80 +0,0 @@
"""nullify_default_system_prompt
Revision ID: 7e490836d179
Revises: c1d2e3f4a5b6
Create Date: 2025-12-29 16:54:36.635574
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7e490836d179"
down_revision = "c1d2e3f4a5b6"
branch_labels = None
depends_on = None
# This is the default system prompt from the previous migration (87c52ec39f84)
# ruff: noqa: E501, W605 start
PREVIOUS_DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
# Make system_prompt column nullable (model already has nullable=True but DB doesn't)
op.alter_column(
"persona",
"system_prompt",
nullable=True,
)
# Set system_prompt to NULL where it matches the previous default
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = NULL
WHERE system_prompt = :previous_default
"""
),
{"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
)
def downgrade() -> None:
# Restore the default system prompt for personas that have NULL
# Note: This may restore the prompt to personas that originally had NULL
# before this migration, but there's no way to distinguish them
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :previous_default
WHERE system_prompt IS NULL
"""
),
{"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
)
# Revert system_prompt column to not nullable
op.alter_column(
"persona",
"system_prompt",
nullable=False,
)

View File

@@ -42,13 +42,13 @@ def upgrade() -> None:
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
)
@@ -63,13 +63,13 @@ def upgrade() -> None:
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.ForeignKeyConstraint(

View File

@@ -1,49 +0,0 @@
"""notifications constraint, sort index, and cleanup old notifications
Revision ID: 8405ca81cc83
Revises: a3c1a7904cd0
Create Date: 2026-01-07 16:43:44.855156
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "8405ca81cc83"
down_revision = "a3c1a7904cd0"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique index for notification deduplication.
# This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
#
# Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
# in unique constraints, but we want NULL == NULL for deduplication).
# The '{}' represents an empty JSONB object as the NULL replacement.
# Clean up legacy notifications first
op.execute("DELETE FROM notification WHERE title = 'New Notification'")
op.execute(
"""
CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
"""
)
# Create index for efficient notification sorting by user
# Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
op.execute(
"""
CREATE INDEX IF NOT EXISTS ix_notification_user_sort
ON notification (user_id, dismissed, first_shown DESC)
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")

View File

@@ -1,55 +0,0 @@
"""update_default_system_prompt
Revision ID: 87c52ec39f84
Revises: 7bd55f264e1b
Create Date: 2025-12-05 15:54:06.002452
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "87c52ec39f84"
down_revision = "7bd55f264e1b"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :system_prompt
WHERE id = :persona_id
"""
),
{"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# We don't revert the system prompt on downgrade since we don't know
# what the previous value was. The new prompt is a reasonable default.
pass

View File

@@ -1,116 +0,0 @@
"""Add Discord bot tables
Revision ID: 8b5ce697290e
Revises: a1b2c3d4e5f7
Create Date: 2025-01-14
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "8b5ce697290e"
down_revision = "a1b2c3d4e5f7"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
# DiscordBotConfig (singleton table - one per tenant)
op.create_table(
"discord_bot_config",
sa.Column(
"id",
sa.String(),
primary_key=True,
server_default=sa.text("'SINGLETON'"),
),
sa.Column("bot_token", sa.LargeBinary(), nullable=False), # EncryptedString
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
)
# DiscordGuildConfig
op.create_table(
"discord_guild_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
sa.Column("guild_name", sa.String(), nullable=True),
sa.Column("registration_key", sa.String(), nullable=False, unique=True),
sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"default_persona_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
),
)
# DiscordChannelConfig
op.create_table(
"discord_channel_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"guild_config_id",
sa.Integer(),
sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("channel_id", sa.BigInteger(), nullable=False),
sa.Column("channel_name", sa.String(), nullable=False),
sa.Column(
"channel_type",
sa.String(20),
server_default=sa.text("'text'"),
nullable=False,
),
sa.Column(
"is_private",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"thread_only_mode",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"require_bot_invocation",
sa.Boolean(),
server_default=sa.text("true"),
nullable=False,
),
sa.Column(
"persona_override_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
),
)
# Unique constraint: one config per channel per guild
op.create_unique_constraint(
"uq_discord_channel_guild_channel",
"discord_channel_config",
["guild_config_id", "channel_id"],
)
def downgrade() -> None:
op.drop_table("discord_channel_config")
op.drop_table("discord_guild_config")
op.drop_table("discord_bot_config")

View File

@@ -1,136 +0,0 @@
"""seed_default_image_gen_config
Revision ID: 9087b548dd69
Revises: 2b90f3af54b8
Create Date: 2026-01-05 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9087b548dd69"
down_revision = "2b90f3af54b8"
branch_labels = None
depends_on = None
# Constants for default image generation config
# Source: web/src/app/admin/configuration/image-generation/constants.ts
IMAGE_PROVIDER_ID = "openai_gpt_image_1"
MODEL_NAME = "gpt-image-1"
PROVIDER_NAME = "openai"
def upgrade() -> None:
conn = op.get_bind()
# Check if image_generation_config table already has records
existing_configs = (
conn.execute(sa.text("SELECT COUNT(*) FROM image_generation_config")).scalar()
or 0
)
if existing_configs > 0:
# Skip if configs already exist - user may have configured manually
return
# Find the first OpenAI LLM provider
openai_provider = conn.execute(
sa.text(
"""
SELECT id, api_key
FROM llm_provider
WHERE provider = :provider
ORDER BY id
LIMIT 1
"""
),
{"provider": PROVIDER_NAME},
).fetchone()
if not openai_provider:
# No OpenAI provider found - nothing to do
return
source_provider_id, api_key = openai_provider
# Create new LLM provider for image generation (clone only api_key)
result = conn.execute(
sa.text(
"""
INSERT INTO llm_provider (
name, provider, api_key, api_base, api_version,
deployment_name, default_model_name, is_public,
is_default_provider, is_default_vision_provider, is_auto_mode
)
VALUES (
:name, :provider, :api_key, NULL, NULL,
NULL, :default_model_name, :is_public,
NULL, NULL, :is_auto_mode
)
RETURNING id
"""
),
{
"name": f"Image Gen - {IMAGE_PROVIDER_ID}",
"provider": PROVIDER_NAME,
"api_key": api_key,
"default_model_name": MODEL_NAME,
"is_public": True,
"is_auto_mode": False,
},
)
new_provider_id = result.scalar()
# Create model configuration
result = conn.execute(
sa.text(
"""
INSERT INTO model_configuration (
llm_provider_id, name, is_visible, max_input_tokens,
supports_image_input, display_name
)
VALUES (
:llm_provider_id, :name, :is_visible, :max_input_tokens,
:supports_image_input, :display_name
)
RETURNING id
"""
),
{
"llm_provider_id": new_provider_id,
"name": MODEL_NAME,
"is_visible": True,
"max_input_tokens": None,
"supports_image_input": False,
"display_name": None,
},
)
model_config_id = result.scalar()
# Create image generation config
conn.execute(
sa.text(
"""
INSERT INTO image_generation_config (
image_provider_id, model_configuration_id, is_default
)
VALUES (
:image_provider_id, :model_configuration_id, :is_default
)
"""
),
{
"image_provider_id": IMAGE_PROVIDER_ID,
"model_configuration_id": model_config_id,
"is_default": True,
},
)
def downgrade() -> None:
# We don't remove the config on downgrade since it's safe to keep around
# If we upgrade again, it will be a no-op due to the existing records check
pass

View File

@@ -1,33 +0,0 @@
"""add_is_auto_mode_to_llm_provider
Revision ID: 9a0296d7421e
Revises: 7206234e012a
Create Date: 2025-12-17 18:14:29.620981
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9a0296d7421e"
down_revision = "7206234e012a"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"llm_provider",
sa.Column(
"is_auto_mode",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
def downgrade() -> None:
op.drop_column("llm_provider", "is_auto_mode")

View File

@@ -234,8 +234,6 @@ def downgrade() -> None:
if "instructions" in columns:
op.drop_column("user_project", "instructions")
op.execute("ALTER TABLE user_project RENAME TO user_folder")
# Update NULL descriptions to empty string before setting NOT NULL constraint
op.execute("UPDATE user_folder SET description = '' WHERE description IS NULL")
op.alter_column("user_folder", "description", nullable=False)
logger.info("Renamed user_project back to user_folder")

View File

@@ -1,27 +0,0 @@
"""add processing_duration_seconds to chat_message
Revision ID: 9d1543a37106
Revises: 72aa7de2e5cf
Create Date: 2026-01-21 11:42:18.546188
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9d1543a37106"
down_revision = "72aa7de2e5cf"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
)
def downgrade() -> None:
op.drop_column("chat_message", "processing_duration_seconds")

View File

@@ -1,55 +0,0 @@
"""update_default_tool_descriptions
Revision ID: a01bf2971c5d
Revises: 87c52ec39f84
Create Date: 2025-12-16 15:21:25.656375
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a01bf2971c5d"
down_revision = "18b5b2524446"
branch_labels = None
depends_on = None
# new tool descriptions (12/2025)
TOOL_DESCRIPTIONS = {
"SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
"ImageGenerationTool": (
"The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
"The action will be used when the user asks the agent to generate an image."
),
"WebSearchTool": (
"The Web Search Action allows the agent "
"to perform internet searches for up-to-date information."
),
"KnowledgeGraphTool": (
"The Knowledge Graph Search Action allows the agent to search the "
"Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
"and it requires the Knowledge Graph to be enabled."
),
"OktaProfileTool": (
"The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
"This may include the user's name, email, phone number, address, and other details such as their "
"manager and direct reports."
),
}
def upgrade() -> None:
conn = op.get_bind()
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
def downgrade() -> None:
pass

View File

@@ -1,49 +0,0 @@
"""add license table
Revision ID: a1b2c3d4e5f6
Revises: a01bf2971c5d
Create Date: 2025-12-04 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f6"
down_revision = "a01bf2971c5d"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"license",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("license_data", sa.Text(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
)
# Singleton pattern - only ever one row in this table
op.create_index(
"idx_license_singleton",
"license",
[sa.text("(true)")],
unique=True,
)
def downgrade() -> None:
op.drop_index("idx_license_singleton", table_name="license")
op.drop_table("license")

View File

@@ -1,47 +0,0 @@
"""drop agent_search_metrics table
Revision ID: a1b2c3d4e5f7
Revises: 73e9983e5091
Create Date: 2026-01-17
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f7"
down_revision = "73e9983e5091"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_table("agent__search_metrics")
def downgrade() -> None:
op.create_table(
"agent__search_metrics",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("user_id", sa.UUID(), nullable=True),
sa.Column("persona_id", sa.Integer(), nullable=True),
sa.Column("agent_type", sa.String(), nullable=False),
sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
sa.Column("base_duration_s", sa.Float(), nullable=False),
sa.Column("full_duration_s", sa.Float(), nullable=False),
sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(
["user_id"],
["user.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["persona_id"],
["persona.id"],
),
sa.PrimaryKeyConstraint("id"),
)

View File

@@ -1,27 +0,0 @@
"""Remove fast_default_model_name from llm_provider
Revision ID: a2b3c4d5e6f7
Revises: 2a391f840e85
Create Date: 2024-12-17
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a2b3c4d5e6f7"
down_revision = "2a391f840e85"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.drop_column("llm_provider", "fast_default_model_name")
def downgrade() -> None:
op.add_column(
"llm_provider",
sa.Column("fast_default_model_name", sa.String(), nullable=True),
)

View File

@@ -1,39 +0,0 @@
"""remove userfile related deprecated fields
Revision ID: a3c1a7904cd0
Revises: 5c3dca366b35
Create Date: 2026-01-06 13:00:30.634396
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a3c1a7904cd0"
down_revision = "5c3dca366b35"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_column("user_file", "document_id")
op.drop_column("user_file", "document_id_migrated")
op.drop_column("connector_credential_pair", "is_user_file")
def downgrade() -> None:
op.add_column(
"connector_credential_pair",
sa.Column("is_user_file", sa.Boolean(), nullable=False, server_default="false"),
)
op.add_column(
"user_file",
sa.Column("document_id", sa.String(), nullable=True),
)
op.add_column(
"user_file",
sa.Column(
"document_id_migrated", sa.Boolean(), nullable=False, server_default="true"
),
)

View File

@@ -1,425 +0,0 @@
"""New Chat History
Revision ID: a852cbe15577
Revises: 6436661d5b65
Create Date: 2025-11-08 15:16:37.781308
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a852cbe15577"
down_revision = "6436661d5b65"
branch_labels = None
depends_on = None
def upgrade() -> None:
# 1. Drop old research/agent tables (CASCADE handles dependencies)
op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")
# 2. ChatMessage table changes
# Rename columns and add FKs
op.alter_column(
"chat_message", "parent_message", new_column_name="parent_message_id"
)
op.create_foreign_key(
"fk_chat_message_parent_message_id",
"chat_message",
"chat_message",
["parent_message_id"],
["id"],
)
op.alter_column(
"chat_message",
"latest_child_message",
new_column_name="latest_child_message_id",
)
op.create_foreign_key(
"fk_chat_message_latest_child_message_id",
"chat_message",
"chat_message",
["latest_child_message_id"],
["id"],
)
# Add new column
op.add_column(
"chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
)
# Drop old columns
op.drop_column("chat_message", "rephrased_query")
op.drop_column("chat_message", "alternate_assistant_id")
op.drop_column("chat_message", "overridden_model")
op.drop_column("chat_message", "is_agentic")
op.drop_column("chat_message", "refined_answer_improvement")
op.drop_column("chat_message", "research_type")
op.drop_column("chat_message", "research_plan")
op.drop_column("chat_message", "research_answer_purpose")
# 3. ToolCall table changes
# Drop the unique constraint first
op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")
# Delete orphaned tool_call rows (those without valid chat_message)
op.execute(
"DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)"
)
# Add chat_session_id as nullable first, populate, then make NOT NULL
op.add_column(
"tool_call",
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=True),
)
# Populate chat_session_id from the related chat_message
op.execute(
"""
UPDATE tool_call
SET chat_session_id = chat_message.chat_session_id
FROM chat_message
WHERE tool_call.message_id = chat_message.id
"""
)
# Now make it NOT NULL and add FK
op.alter_column("tool_call", "chat_session_id", nullable=False)
op.create_foreign_key(
"fk_tool_call_chat_session_id",
"tool_call",
"chat_session",
["chat_session_id"],
["id"],
ondelete="CASCADE",
)
# Rename message_id and make nullable, recreate FK with CASCADE
op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
op.alter_column(
"tool_call",
"message_id",
new_column_name="parent_chat_message_id",
nullable=True,
)
op.create_foreign_key(
"fk_tool_call_parent_chat_message_id",
"tool_call",
"chat_message",
["parent_chat_message_id"],
["id"],
ondelete="CASCADE",
)
# Add parent_tool_call_id with FK
op.add_column(
"tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
)
op.create_foreign_key(
"fk_tool_call_parent_tool_call_id",
"tool_call",
"tool_call",
["parent_tool_call_id"],
["id"],
ondelete="CASCADE",
)
# Add other new columns
op.add_column(
"tool_call",
sa.Column("turn_number", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column(
"tool_call",
sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
)
op.add_column("tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True))
op.add_column(
"tool_call",
sa.Column("tool_call_tokens", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column(
"tool_call",
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
)
# Rename columns
op.alter_column(
"tool_call", "tool_arguments", new_column_name="tool_call_arguments"
)
op.alter_column("tool_call", "tool_result", new_column_name="tool_call_response")
# Change tool_call_response type from JSONB to Text
op.execute(
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE TEXT
USING tool_call_response::text
"""
)
# Drop old columns
op.drop_column("tool_call", "tool_name")
# 4. Create new association table
op.create_table(
"tool_call__search_doc",
sa.Column("tool_call_id", sa.Integer(), nullable=False),
sa.Column("search_doc_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
)
# 5. Persona table change
op.add_column(
"persona",
sa.Column(
"replace_base_system_prompt",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
def downgrade() -> None:
# Reverse persona changes
op.drop_column("persona", "replace_base_system_prompt")
# Drop new association table
op.drop_table("tool_call__search_doc")
# Reverse ToolCall changes
op.add_column(
"tool_call",
sa.Column("tool_name", sa.String(), nullable=False, server_default=""),
)
# Change tool_call_response back to JSONB
op.execute(
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE JSONB
USING tool_call_response::jsonb
"""
)
op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
op.alter_column(
"tool_call", "tool_call_arguments", new_column_name="tool_arguments"
)
op.drop_column("tool_call", "generated_images")
op.drop_column("tool_call", "tool_call_tokens")
op.drop_column("tool_call", "reasoning_tokens")
op.drop_column("tool_call", "tool_call_id")
op.drop_column("tool_call", "turn_number")
op.drop_constraint(
"fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
)
op.drop_column("tool_call", "parent_tool_call_id")
op.drop_constraint(
"fk_tool_call_parent_chat_message_id", "tool_call", type_="foreignkey"
)
op.alter_column(
"tool_call",
"parent_chat_message_id",
new_column_name="message_id",
nullable=False,
)
op.create_foreign_key(
"tool_call_message_id_fkey",
"tool_call",
"chat_message",
["message_id"],
["id"],
)
op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
op.drop_column("tool_call", "chat_session_id")
op.create_unique_constraint("uq_tool_call_message_id", "tool_call", ["message_id"])
# Reverse ChatMessage changes
# Note: research_answer_purpose and research_type were originally String columns,
# not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)
op.add_column(
"chat_message",
sa.Column("research_answer_purpose", sa.String(), nullable=True),
)
op.add_column(
"chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
)
op.add_column(
"chat_message",
sa.Column("research_type", sa.String(), nullable=True),
)
op.add_column(
"chat_message",
sa.Column("refined_answer_improvement", sa.Boolean(), nullable=True),
)
op.add_column(
"chat_message",
sa.Column("is_agentic", sa.Boolean(), nullable=False, server_default="false"),
)
op.add_column(
"chat_message", sa.Column("overridden_model", sa.String(), nullable=True)
)
op.add_column(
"chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
)
# Recreate the FK constraint that was implicitly dropped when the column was dropped
op.create_foreign_key(
"fk_chat_message_persona",
"chat_message",
"persona",
["alternate_assistant_id"],
["id"],
)
op.add_column(
"chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
)
op.drop_column("chat_message", "reasoning_tokens")
op.drop_constraint(
"fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
)
op.alter_column(
"chat_message",
"latest_child_message_id",
new_column_name="latest_child_message",
)
op.drop_constraint(
"fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
)
op.alter_column(
"chat_message", "parent_message_id", new_column_name="parent_message"
)
# Recreate agent sub question and sub query tables
op.create_table(
"agent__sub_question",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("primary_question_id", sa.Integer(), nullable=False),
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("sub_question", sa.Text(), nullable=False),
sa.Column("level", sa.Integer(), nullable=False),
sa.Column("level_question_num", sa.Integer(), nullable=False),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("sub_answer", sa.Text(), nullable=False),
sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=False),
sa.ForeignKeyConstraint(
["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
sa.PrimaryKeyConstraint("id"),
)
op.create_table(
"agent__sub_query",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("parent_question_id", sa.Integer(), nullable=False),
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("sub_query", sa.Text(), nullable=False),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.ForeignKeyConstraint(
["parent_question_id"], ["agent__sub_question.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["chat_session_id"], ["chat_session.id"]),
sa.PrimaryKeyConstraint("id"),
)
op.create_table(
"agent__sub_query__search_doc",
sa.Column("sub_query_id", sa.Integer(), nullable=False),
sa.Column("search_doc_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["sub_query_id"], ["agent__sub_query.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["search_doc_id"], ["search_doc.id"]),
sa.PrimaryKeyConstraint("sub_query_id", "search_doc_id"),
)
# Recreate research agent tables
op.create_table(
"research_agent_iteration",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("primary_question_id", sa.Integer(), nullable=False),
sa.Column("iteration_nr", sa.Integer(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("purpose", sa.String(), nullable=True),
sa.Column("reasoning", sa.String(), nullable=True),
sa.ForeignKeyConstraint(
["primary_question_id"], ["chat_message.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint(
"primary_question_id",
"iteration_nr",
name="_research_agent_iteration_unique_constraint",
),
)
op.create_table(
"research_agent_iteration_sub_step",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("primary_question_id", sa.Integer(), nullable=False),
sa.Column("iteration_nr", sa.Integer(), nullable=False),
sa.Column("iteration_sub_step_nr", sa.Integer(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("sub_step_instructions", sa.String(), nullable=True),
sa.Column("sub_step_tool_id", sa.Integer(), nullable=True),
sa.Column("reasoning", sa.String(), nullable=True),
sa.Column("sub_answer", sa.String(), nullable=True),
sa.Column("cited_doc_results", postgresql.JSONB(), nullable=False),
sa.Column("claims", postgresql.JSONB(), nullable=True),
sa.Column("is_web_fetch", sa.Boolean(), nullable=True),
sa.Column("queries", postgresql.JSONB(), nullable=True),
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
sa.Column("additional_data", postgresql.JSONB(), nullable=True),
sa.Column("file_ids", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(
["primary_question_id", "iteration_nr"],
[
"research_agent_iteration.primary_question_id",
"research_agent_iteration.iteration_nr",
],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(["sub_step_tool_id"], ["tool.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)

View File

@@ -1,46 +0,0 @@
"""Drop milestone table
Revision ID: b8c9d0e1f2a3
Revises: a2b3c4d5e6f7
Create Date: 2025-12-18
"""
from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "b8c9d0e1f2a3"
down_revision = "a2b3c4d5e6f7"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_table("milestone")
def downgrade() -> None:
op.create_table(
"milestone",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("tenant_id", sa.String(), nullable=True),
sa.Column(
"user_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=True,
),
sa.Column("event_type", sa.String(), nullable=False),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
)

View File

@@ -1,51 +0,0 @@
"""add_deep_research_tool
Revision ID: c1d2e3f4a5b6
Revises: b8c9d0e1f2a3
Create Date: 2025-12-18 16:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "c1d2e3f4a5b6"
down_revision = "b8c9d0e1f2a3"
branch_labels = None
depends_on = None
DEEP_RESEARCH_TOOL = {
"name": "ResearchAgent",
"display_name": "Research Agent",
"description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
"in_code_tool_id": "ResearchAgent",
}
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, false)
"""
),
DEEP_RESEARCH_TOOL,
)
def downgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
DELETE FROM tool
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
)

View File

@@ -1,73 +0,0 @@
"""add_python_tool
Revision ID: c7e9f4a3b2d1
Revises: 3c9a65f1207f
Create Date: 2025-11-08 00:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "c7e9f4a3b2d1"
down_revision = "3c9a65f1207f"
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Add PythonTool to built-in tools"""
conn = op.get_bind()
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, :enabled)
"""
),
{
"name": "PythonTool",
# in the UI, call it `Code Interpreter` since this is a well known term for this tool
"display_name": "Code Interpreter",
"description": (
"The Code Interpreter Action allows the assistant to execute "
"Python code in a secure, isolated environment for data analysis, "
"computation, visualization, and file processing."
),
"in_code_tool_id": "PythonTool",
"enabled": True,
},
)
# needed to store files generated by the python tool
op.add_column(
"research_agent_iteration_sub_step",
sa.Column(
"file_ids",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
)
def downgrade() -> None:
"""Remove PythonTool from built-in tools"""
conn = op.get_bind()
conn.execute(
sa.text(
"""
DELETE FROM tool
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"in_code_tool_id": "PythonTool",
},
)
op.drop_column("research_agent_iteration_sub_step", "file_ids")

View File

@@ -257,8 +257,8 @@ def _migrate_files_to_external_storage() -> None:
print(f"File {file_id} not found in PostgreSQL storage.")
continue
lobj_id = cast(int, file_record.lobj_oid)
file_metadata = cast(Any, file_record.file_metadata)
lobj_id = cast(int, file_record.lobj_oid) # type: ignore
file_metadata = cast(Any, file_record.file_metadata) # type: ignore
# Read file content from PostgreSQL
try:
@@ -280,7 +280,7 @@ def _migrate_files_to_external_storage() -> None:
else:
# Convert other types to dict if possible, otherwise None
try:
file_metadata = dict(file_record.file_metadata)
file_metadata = dict(file_record.file_metadata) # type: ignore
except (TypeError, ValueError):
file_metadata = None

View File

@@ -70,66 +70,80 @@ BUILT_IN_TOOLS = [
def upgrade() -> None:
conn = op.get_bind()
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Start transaction
conn.execute(sa.text("BEGIN"))
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
try:
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text(
"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
)
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:

View File

@@ -1,64 +0,0 @@
"""sync_exa_api_key_to_content_provider
Revision ID: d1b637d7050a
Revises: d25168c2beee
Create Date: 2026-01-09 15:54:15.646249
"""
from alembic import op
from sqlalchemy import text
# revision identifiers, used by Alembic.
revision = "d1b637d7050a"
down_revision = "d25168c2beee"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Exa uses a shared API key between search and content providers.
# For existing Exa search providers with API keys, create the corresponding
# content provider if it doesn't exist yet.
connection = op.get_bind()
# Check if Exa search provider exists with an API key
result = connection.execute(
text(
"""
SELECT api_key FROM internet_search_provider
WHERE provider_type = 'exa' AND api_key IS NOT NULL
LIMIT 1
"""
)
)
row = result.fetchone()
if row:
api_key = row[0]
# Create Exa content provider with the shared key
connection.execute(
text(
"""
INSERT INTO internet_content_provider
(name, provider_type, api_key, is_active)
VALUES ('Exa', 'exa', :api_key, false)
ON CONFLICT (name) DO NOTHING
"""
),
{"api_key": api_key},
)
def downgrade() -> None:
# Remove the Exa content provider that was created by this migration
connection = op.get_bind()
connection.execute(
text(
"""
DELETE FROM internet_content_provider
WHERE provider_type = 'exa'
"""
)
)

View File

@@ -1,86 +0,0 @@
"""tool_name_consistency
Revision ID: d25168c2beee
Revises: 8405ca81cc83
Create Date: 2026-01-11 17:54:40.135777
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d25168c2beee"
down_revision = "8405ca81cc83"
branch_labels = None
depends_on = None
# Currently the seeded tools have the in_code_tool_id == name
CURRENT_TOOL_NAME_MAPPING = [
"SearchTool",
"WebSearchTool",
"ImageGenerationTool",
"PythonTool",
"OpenURLTool",
"KnowledgeGraphTool",
"ResearchAgent",
]
# Mapping of in_code_tool_id -> name
# These are the expected names that we want in the database
EXPECTED_TOOL_NAME_MAPPING = {
"SearchTool": "internal_search",
"WebSearchTool": "web_search",
"ImageGenerationTool": "generate_image",
"PythonTool": "python",
"OpenURLTool": "open_url",
"KnowledgeGraphTool": "run_kg_search",
"ResearchAgent": "research_agent",
}
def upgrade() -> None:
conn = op.get_bind()
# Mapping of in_code_tool_id to the NAME constant from each tool class
# These match the .name property of each tool implementation
tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
# Update the name column for each tool based on its in_code_tool_id
for in_code_tool_id, expected_name in tool_name_mapping.items():
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :expected_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"expected_name": expected_name,
"in_code_tool_id": in_code_tool_id,
},
)
def downgrade() -> None:
conn = op.get_bind()
# Reverse the migration by setting name back to in_code_tool_id
# This matches the original pattern where name was the class name
for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :current_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"current_name": in_code_tool_id,
"in_code_tool_id": in_code_tool_id,
},
)

View File

@@ -11,8 +11,8 @@ import sqlalchemy as sa
revision = "e209dc5a8156"
down_revision = "48d14957fe80"
branch_labels = None
depends_on = None
branch_labels = None # type: ignore
depends_on = None # type: ignore
def upgrade() -> None:

View File

@@ -1,115 +0,0 @@
"""add status to mcp server and make auth fields nullable
Revision ID: e8f0d2a38171
Revises: ed9e44312505
Create Date: 2025-11-28 11:15:37.667340
"""
from alembic import op
import sqlalchemy as sa
from onyx.db.enums import (
MCPTransport,
MCPAuthenticationType,
MCPAuthenticationPerformer,
MCPServerStatus,
)
# revision identifiers, used by Alembic.
revision = "e8f0d2a38171"
down_revision = "ed9e44312505"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make auth fields nullable
op.alter_column(
"mcp_server",
"transport",
existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
nullable=True,
)
op.alter_column(
"mcp_server",
"auth_type",
existing_type=sa.Enum(
MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
),
nullable=True,
)
op.alter_column(
"mcp_server",
"auth_performer",
existing_type=sa.Enum(
MCPAuthenticationPerformer,
name="mcp_authentication_performer",
native_enum=False,
),
nullable=True,
)
# Add status column with default
op.add_column(
"mcp_server",
sa.Column(
"status",
sa.Enum(MCPServerStatus, name="mcp_server_status", native_enum=False),
nullable=False,
server_default="CREATED",
),
)
# For existing records, mark status as CONNECTED
bind = op.get_bind()
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET status = 'CONNECTED'
WHERE status != 'CONNECTED'
and admin_connection_config_id IS NOT NULL
"""
)
)
def downgrade() -> None:
# Remove status column
op.drop_column("mcp_server", "status")
# Make auth fields non-nullable (set defaults first)
op.execute(
"UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL"
)
op.execute("UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL")
op.execute(
"UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL"
)
op.alter_column(
"mcp_server",
"transport",
existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
nullable=False,
)
op.alter_column(
"mcp_server",
"auth_type",
existing_type=sa.Enum(
MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
),
nullable=False,
)
op.alter_column(
"mcp_server",
"auth_performer",
existing_type=sa.Enum(
MCPAuthenticationPerformer,
name="mcp_authentication_performer",
native_enum=False,
),
nullable=False,
)

Some files were not shown because too many files have changed in this diff Show More