Compare commits

..

6 Commits

Author SHA1 Message Date
Dane Urban
5848975679 Remove comment 2026-01-08 19:21:24 -08:00
Dane Urban
dcc330010e Remove comment 2026-01-08 19:21:08 -08:00
Dane Urban
d0f5f1f5ae Handle error and log 2026-01-08 19:20:28 -08:00
Dane Urban
3e475993ff Change which event loop we get 2026-01-08 19:16:12 -08:00
Dane Urban
7c2b5fa822 Change loggin 2026-01-08 17:29:00 -08:00
Dane Urban
409cfdc788 nits 2026-01-08 17:23:08 -08:00
1612 changed files with 38571 additions and 157312 deletions

View File

@@ -8,5 +8,4 @@
## Additional Options
- [ ] [Required] I have considered whether this PR needs to be cherry-picked to the latest beta branch.
- [ ] [Optional] Override Linear Check

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -15,7 +15,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -29,7 +29,6 @@ jobs:
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
with:
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'

View File

@@ -28,7 +28,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -94,7 +94,7 @@ jobs:
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -1,28 +0,0 @@
name: Require beta cherry-pick consideration
concurrency:
group: Require-Beta-Cherrypick-Consideration-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
pull_request:
types: [opened, edited, reopened, synchronize]
permissions:
contents: read
jobs:
beta-cherrypick-check:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Check PR body for beta cherry-pick consideration
env:
PR_BODY: ${{ github.event.pull_request.body }}
run: |
if echo "$PR_BODY" | grep -qiE "\\[x\\][[:space:]]*\\[Required\\][[:space:]]*I have considered whether this PR needs to be cherry[- ]picked to the latest beta branch"; then
echo "Cherry-pick consideration box is checked. Check passed."
exit 0
fi
echo "::error::Please check the 'I have considered whether this PR needs to be cherry-picked to the latest beta branch' box in the PR description."
exit 1

View File

@@ -27,7 +27,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -1,114 +0,0 @@
name: Build Desktop App
concurrency:
group: Build-Desktop-App-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
paths:
- "desktop/**"
- ".github/workflows/pr-desktop-build.yml"
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
build-desktop:
name: Build Desktop (${{ matrix.platform }})
runs-on: ${{ matrix.os }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- platform: linux
os: ubuntu-latest
target: x86_64-unknown-linux-gnu
args: "--bundles deb,rpm"
# TODO: Fix and enable the macOS build.
#- platform: macos
# os: macos-latest
# target: universal-apple-darwin
# args: "--target universal-apple-darwin"
# TODO: Fix and enable the Windows build.
#- platform: windows
# os: windows-latest
# target: x86_64-pc-windows-msvc
# args: ""
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020
with:
node-version: 24
cache: "npm" # zizmor: ignore[cache-poisoning]
cache-dependency-path: ./desktop/package-lock.json
- name: Setup Rust
uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7
with:
toolchain: stable
targets: ${{ matrix.target }}
- name: Cache Cargo registry and build
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # zizmor: ignore[cache-poisoning]
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
desktop/src-tauri/target/
key: ${{ runner.os }}-cargo-${{ hashFiles('desktop/src-tauri/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Install Linux dependencies
if: matrix.platform == 'linux'
run: |
sudo apt-get update
sudo apt-get install -y \
build-essential \
libglib2.0-dev \
libgirepository1.0-dev \
libgtk-3-dev \
libjavascriptcoregtk-4.1-dev \
libwebkit2gtk-4.1-dev \
libayatana-appindicator3-dev \
gobject-introspection \
pkg-config \
curl \
xdg-utils
- name: Install npm dependencies
working-directory: ./desktop
run: npm ci
- name: Build desktop app
working-directory: ./desktop
run: npx tauri build ${{ matrix.args }}
env:
TAURI_SIGNING_PRIVATE_KEY: ""
TAURI_SIGNING_PRIVATE_KEY_PASSWORD: ""
- name: Upload build artifacts
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: desktop-build-${{ matrix.platform }}-${{ github.run_id }}
path: |
desktop/src-tauri/target/release/bundle/
retention-days: 7
if-no-files-found: ignore

View File

@@ -45,9 +45,6 @@ env:
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
# OpenSearch
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -57,7 +54,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -91,7 +88,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -128,13 +125,11 @@ jobs:
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
-f docker-compose.opensearch.yml \
up -d \
minio \
relational_db \
cache \
index \
opensearch \
code-interpreter
- name: Run migrations
@@ -163,7 +158,7 @@ jobs:
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
# Collect logs from each container
for container in $containers; do
@@ -177,7 +172,7 @@ jobs:
- name: Upload Docker logs
if: failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
with:
name: docker-logs-${{ matrix.test-dir }}
path: docker-logs/

View File

@@ -30,7 +30,7 @@ jobs:
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -88,7 +88,6 @@ jobs:
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
@@ -181,11 +180,6 @@ jobs:
trap cleanup EXIT
# Run the actual installation with detailed logging
# Note that opensearch.enabled is true whereas others in this install
# are false. There is some work that needs to be done to get this
# entire step working in CI, enabling opensearch here is a small step
# in that direction. If this is causing issues, disabling it in this
# step should be ok in the short term.
echo "=== Starting ct install ==="
set +e
ct install --all \
@@ -193,10 +187,9 @@ jobs:
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=opensearch.enabled=true \
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \
--set=postgresql.cluster.storage.storageClass=standard \
--set=redis.enabled=true \
--set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \

View File

@@ -48,7 +48,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -84,7 +84,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -103,7 +103,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -144,7 +144,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -163,7 +163,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -203,12 +203,12 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -279,7 +279,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -310,9 +310,8 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
MCP_SERVER_ENABLED=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
- name: Start Docker containers
@@ -439,7 +438,7 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
@@ -460,7 +459,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -568,7 +567,7 @@ jobs:
- name: Upload logs (multi-tenant)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-multitenant
path: ${{ github.workspace }}/docker-compose-multitenant.log

View File

@@ -23,7 +23,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -44,7 +44,7 @@ jobs:
- name: Upload coverage reports
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: jest-coverage-${{ github.run_id }}
path: ./web/coverage

View File

@@ -40,7 +40,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -76,7 +76,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -95,7 +95,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -136,7 +136,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -155,7 +155,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -195,7 +195,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -214,7 +214,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -271,7 +271,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -301,7 +301,7 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
EOF
- name: Start Docker containers
@@ -424,7 +424,7 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -66,7 +66,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -85,7 +85,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -127,7 +127,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -146,7 +146,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -188,7 +188,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -207,7 +207,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -254,7 +254,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -435,7 +435,7 @@ jobs:
fi
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
if: always()
with:
# Includes test results and trace.zip files
@@ -455,7 +455,7 @@ jobs:
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log
@@ -488,7 +488,7 @@ jobs:
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -50,9 +50,8 @@ jobs:
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: backend/.mypy_cache
key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
restore-keys: |
mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
mypy-${{ runner.os }}-
- name: Run MyPy

View File

@@ -65,7 +65,7 @@ env:
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ vars.SF_USERNAME }}
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
@@ -110,9 +110,6 @@ env:
# Slack
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
# Discord
DISCORD_CONNECTOR_BOT_TOKEN: ${{ secrets.DISCORD_CONNECTOR_BOT_TOKEN }}
# Teams
TEAMS_APPLICATION_ID: ${{ secrets.TEAMS_APPLICATION_ID }}
TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
@@ -142,7 +139,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -5,6 +5,11 @@ on:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
workflow_dispatch:
inputs:
branch:
description: 'Branch to run the workflow on'
required: false
default: 'main'
permissions:
contents: read
@@ -26,11 +31,7 @@ env:
jobs:
model-check:
# See https://runs-on.com/runners/linux/
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-model-check"
- "extras=ecr-cache"
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
timeout-minutes: 45
env:
@@ -38,91 +39,112 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
- name: Build and load
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
env:
TAG: model-server-${{ github.run_id }}
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Model Server Docker image
run: |
docker pull onyxdotapp/onyx-model-server:latest
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
load: true
targets: model-server
set: |
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Start Docker containers
id: start_docker
env:
IMAGE_TAG: model-server-${{ github.run_id }}
run: |
cd deployment/docker_compose
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
up -d --wait \
inference_model_server
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
failed-jobs: model-check
title: "🚨 Scheduled Model Tests failed!"
ref-name: ${{ github.ref_name }}
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
$SLACK_WEBHOOK
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -32,7 +32,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -38,7 +38,7 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@9d6a3097e0c1865ecce00cfb89fe80f2ee91b547 # ratchet:j178/prek-action@v1
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
with:
prek-version: '0.2.21'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}

View File

@@ -24,11 +24,11 @@ jobs:
- { goos: "darwin", goarch: "arm64" }
- { goos: "", goarch: "" }
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
- uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"

View File

@@ -14,7 +14,7 @@ jobs:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -18,7 +18,7 @@ jobs:
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true

View File

@@ -17,7 +17,7 @@ jobs:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6.0.2
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
persist-credentials: false
@@ -31,7 +31,7 @@ jobs:
- name: Install the latest version of uv
if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # ratchet:astral-sh/setup-uv@v7
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"

7
.gitignore vendored
View File

@@ -1,9 +1,5 @@
# editors
.vscode/*
!/.vscode/env_template.txt
!/.vscode/env.web_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.vscode
.zed
.cursor
@@ -25,7 +21,6 @@ backend/tests/regression/search_quality/*.json
backend/onyx/evals/data/
backend/onyx/evals/one_off/*.json
*.log
*.csv
# secret files
.env

View File

@@ -11,6 +11,7 @@ repos:
- id: uv-sync
args: ["--locked", "--all-extras"]
- id: uv-lock
files: ^pyproject\.toml$
- id: uv-export
name: uv-export default.txt
args:
@@ -66,8 +67,7 @@ repos:
- id: uv-run
name: Check lazy imports
args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
pass_filenames: true
files: ^backend/(?!\.venv/|scripts/).*\.py$
files: ^backend/(?!\.venv/).*\.py$
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
@@ -75,13 +75,6 @@ repos:
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-added-large-files
name: Check for added large files
args: ["--maxkb=1500"]
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
hooks:
@@ -154,22 +147,6 @@ repos:
pass_filenames: false
files: \.tf$
- id: npm-install
name: npm install
description: "Automatically run 'npm install' after a checkout, pull or rebase"
language: system
entry: bash -c 'cd web && npm install --no-save'
pass_filenames: false
files: ^web/package(-lock)?\.json$
stages: [post-checkout, post-merge, post-rewrite]
- id: npm-install-check
name: npm install --package-lock-only
description: "Check the 'web/package-lock.json' is updated"
language: system
entry: bash -c 'cd web && npm install --package-lock-only'
pass_filenames: false
files: ^web/package(-lock)?\.json$
# Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
# This is a preview package - if it breaks:
# 1. Try updating: cd web && npm update @typescript/native-preview

View File

@@ -1,16 +0,0 @@
# Copy this file to .env.web in the .vscode folder.
# Fill in the <REPLACE THIS> values as needed
# Web Server specific environment variables
# Minimal set needed for Next.js dev server
# Auth
AUTH_TYPE=basic
DEV_MODE=true
# Enable the full set of Danswer Enterprise Edition features.
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
# are using this for local testing/development).
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false
# Enable Onyx Craft
ENABLE_CRAFT=true

View File

@@ -6,17 +6,23 @@
# processes.
AUTH_TYPE=basic
DEV_MODE=true
# For local dev, often user Authentication is not needed.
AUTH_TYPE=disabled
# Always keep these on for Dev.
# Logs model prompts, reasoning, and answer to stdout.
LOG_ONYX_MODEL_INTERACTIONS=False
LOG_ONYX_MODEL_INTERACTIONS=True
# More verbose logging
LOG_LEVEL=debug
# This passes top N results to LLM an additional time for reranking prior to
# answer generation.
# This step is quite heavy on token usage so we disable it for dev generally.
DISABLE_LLM_DOC_RELEVANCE=False
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
OAUTH_CLIENT_ID=<REPLACE THIS>
OAUTH_CLIENT_SECRET=<REPLACE THIS>
@@ -35,6 +41,7 @@ GEN_AI_API_KEY=<REPLACE THIS>
OPENAI_API_KEY=<REPLACE THIS>
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# Python stuff

View File

@@ -1,3 +1,5 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
@@ -22,10 +24,9 @@
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery background",
"Celery docfetching",
"Celery docprocessing",
"Celery user_file_processing",
"Celery beat"
],
"presentation": {
@@ -87,7 +88,7 @@
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.vscode/.env.web",
"envFile": "${workspaceFolder}/.vscode/.env",
"runtimeArgs": ["run", "dev"],
"presentation": {
"group": "2"
@@ -122,6 +123,7 @@
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_ONYX_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
},
@@ -149,24 +151,6 @@
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Discord Bot",
"consoleName": "Discord Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/discord/client.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Discord Bot Console"
},
{
"name": "MCP Server",
"consoleName": "MCP Server",
@@ -415,6 +399,7 @@
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docfetching@%n",
@@ -445,6 +430,7 @@
"onyx.background.celery.versioned_apps.docprocessing",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docprocessing@%n",
@@ -572,6 +558,7 @@
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_ONYX_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
@@ -592,137 +579,6 @@
"group": "3"
}
},
{
"name": "Build Sandbox Templates",
"type": "debugpy",
"request": "launch",
"module": "onyx.server.features.build.sandbox.build_templates",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"console": "integratedTerminal",
"presentation": {
"group": "3"
},
"consoleTitle": "Build Sandbox Templates"
},
{
// Dummy entry used to label the group
"name": "--- Database ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "4",
"order": 0
}
},
{
"name": "Restore seeded database dump",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore seeded database dump (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--clean",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Create database snapshot",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"dump",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore database snapshot (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--clean",
"--yes",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Upgrade database to head revision",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"upgrade"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -1,31 +1,262 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx
Hey there! We are so excited that you're interested in Onyx.
As an open source project in a rapidly changing space, we welcome all contributions.
## Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.
## 💃 Guidelines
If you have your own feature that you would like to build please create an issue and community members can provide feedback and
thumb it up if they feel a common need.
### Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
## Contributing Code
Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
To contribute, please follow the
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
will be marked with the `approved by maintainers` label.
Issues marked `good first issue` are an especially great place to start.
**Connectors** to other tools are another great place to contribute. For details on how, refer to this
[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
If you have a new/different contribution in mind, we'd love to hear about it!
Your input is vital to making sure that Onyx moves in the right direction.
Before starting on implementation, please raise a GitHub issue.
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
### Contributing Code
To contribute to this project, please follow the
["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
When opening a pull request, mention related issues and feel free to tag relevant maintainers.
Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
### Getting Help 🙋
Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
That way we can help future contributors and users can avoid the same issue.
We also have support channels and generally interesting discussions on our
[Discord](https://discord.gg/4NA5SbzrWb).
We would love to see you there!
## Get Started 🚀
Onyx being a fully functional app, relies on some external software, specifically:
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
> **Note:**
> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
### Local Set Up
Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
If using a lower version, modifications will have to be made to the code.
If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
#### Backend: Python requirements
Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
For convenience here's a command for it:
```bash
uv venv .venv --python 3.11
source .venv/bin/activate
```
_For Windows, activate the virtual environment using Command Prompt:_
```bash
.venv\Scripts\activate
```
If using PowerShell, the command slightly differs:
```powershell
.venv\Scripts\Activate.ps1
```
Install the required python dependencies:
```bash
uv sync --all-extras
```
Install Playwright for Python (headless browser required by the Web Connector):
```bash
uv run playwright install
```
#### Frontend: Node dependencies
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
Navigate to `onyx/web` and run:
```bash
npm i
```
## Formatting and Linting
### Backend
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
Then run:
```bash
uv run pre-commit install
```
Additionally, we use `mypy` for static type checking.
Onyx is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
### Web
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
Re-stage your changes and commit again.
# Running the application for development
## Developing using VSCode Debugger (recommended)
**We highly recommend using VSCode debugger for development.**
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
Otherwise, you can follow the instructions below to run the application for development.
## Manually running the application for development
### Docker containers for external software
You will need Docker installed to run these containers.
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
### Running Onyx locally
To start the frontend, navigate to `onyx/web` and run:
```bash
npm run dev
```
Next, start the model server which runs the local NLP models.
Navigate to `onyx/backend` and run:
```bash
uvicorn model_server.main:app --reload --port 9000
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
```
The first time running Onyx, you will need to run the DB migrations for Postgres.
After the first time, this is no longer required unless the DB models change.
Navigate to `onyx/backend` and with the venv active, run:
```bash
alembic upgrade head
```
Next, start the task queue which orchestrates the background jobs.
Jobs that take more time are run async from the API server.
Still in `onyx/backend`, run:
```bash
python ./scripts/dev_run_background_jobs.py
```
To run the backend API server, navigate back to `onyx/backend` and run:
```bash
AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "
$env:AUTH_TYPE='disabled'
uvicorn onyx.main:app --reload --port 8080
"
```
> **Note:**
> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
#### Wrapping up
You should now have 4 servers running:
- Web server
- Backend API
- Model server
- Background jobs
Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
You've successfully set up a local Onyx instance! 🏁
#### Running the Onyx application in a container
You can run the full Onyx application stack from pre-built images including all external software dependencies.
Navigate to `onyx/deployment/docker_compose` and run:
```bash
docker compose up -d
```
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
```bash
docker compose up -d --build
```
## Getting Help 🙋
We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).
### Release Process
See you there!
## Release Process
Onyx loosely follows the SemVer versioning standard.
Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
A set of Docker containers will be pushed automatically to DockerHub with every tag.

View File

@@ -7,6 +7,8 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
1. **Environment Setup**:
- Copy `.vscode/env_template.txt` to `.vscode/.env`
- Fill in the necessary environment variables in `.vscode/.env`
2. **launch.json**:
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
## Using the Debugger

View File

@@ -16,8 +16,3 @@ dist/
.coverage
htmlcov/
model_server/legacy/
# Craft: demo_data directory should be unzipped at container startup, not copied
**/demo_data/
# Craft: templates/outputs/venv is created at container startup
**/templates/outputs/venv

View File

@@ -37,6 +37,10 @@ CVE-2023-50868
CVE-2023-52425
CVE-2024-28757
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
# No impact in our settings
CVE-2023-7104
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193

View File

@@ -7,10 +7,6 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
# Build argument for Craft support (disabled by default)
# Use --build-arg ENABLE_CRAFT=true to include Node.js and opencode CLI
ARG ENABLE_CRAFT=false
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
@@ -50,23 +46,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# Conditionally install Node.js 20 for Craft (required for Next.js)
# Only installed when ENABLE_CRAFT=true
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing Node.js 20 for Craft support..." && \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*; \
fi
# Conditionally install opencode CLI for Craft agent functionality
# Only installed when ENABLE_CRAFT=true
# TODO: download a specific, versioned release of the opencode CLI
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing opencode CLI for Craft support..." && \
curl -fsSL https://opencode.ai/install | bash; \
fi
ENV PATH="/root/.opencode/bin:${PATH}"
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
@@ -111,8 +91,8 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
@@ -139,15 +119,7 @@ COPY --chown=onyx:onyx ./static /app/static
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
COPY --chown=onyx:onyx ./scripts/setup_craft_templates.sh /app/scripts/setup_craft_templates.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh /app/scripts/setup_craft_templates.sh
# Run Craft template setup at build time when ENABLE_CRAFT=true
# This pre-bakes demo data, Python venv, and npm dependencies into the image
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Running Craft template setup at build time..." && \
ENABLE_CRAFT=true /app/scripts/setup_craft_templates.sh; \
fi
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

View File

@@ -225,6 +225,7 @@ def do_run_migrations(
) -> None:
if create_schema:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
connection.execute(text("COMMIT"))
connection.execute(text(f'SET search_path TO "{schema_name}"'))
@@ -308,7 +309,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
@@ -346,7 +346,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:

View File

@@ -1,58 +0,0 @@
"""LLMProvider deprecated fields are nullable
Revision ID: 001984c88745
Revises: 01f8e6d95a33
Create Date: 2026-02-01 22:24:34.171100
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "001984c88745"
down_revision = "01f8e6d95a33"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make default_model_name nullable (was NOT NULL)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=True,
)
# Remove server_default from is_default_vision_provider (was server_default=false())
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=None,
)
# is_default_provider and default_vision_model are already nullable with no server_default
def downgrade() -> None:
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
op.execute(
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=False,
)
# Restore server_default for is_default_vision_provider
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=sa.false(),
)

View File

@@ -1,112 +0,0 @@
"""Populate flow mapping data
Revision ID: 01f8e6d95a33
Revises: f220515df7b4
Create Date: 2026-01-31 17:37:10.485558
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "01f8e6d95a33"
down_revision = "f220515df7b4"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add each model config to the conversation flow, setting the global default if it exists
# Exclude models that are part of ImageGenerationConfig
op.execute(
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'chat' AS llm_model_flow_type,
COALESCE(
(lp.is_default_provider IS TRUE AND lp.default_model_name = mc.name),
FALSE
) AS is_default,
mc.id AS model_configuration_id
FROM model_configuration mc
LEFT JOIN llm_provider lp
ON lp.id = mc.llm_provider_id
WHERE NOT EXISTS (
SELECT 1 FROM image_generation_config igc
WHERE igc.model_configuration_id = mc.id
);
"""
)
# Add models with supports_image_input to the vision flow
op.execute(
"""
INSERT INTO llm_model_flow (llm_model_flow_type, is_default, model_configuration_id)
SELECT
'vision' AS llm_model_flow_type,
COALESCE(
(lp.is_default_vision_provider IS TRUE AND lp.default_vision_model = mc.name),
FALSE
) AS is_default,
mc.id AS model_configuration_id
FROM model_configuration mc
LEFT JOIN llm_provider lp
ON lp.id = mc.llm_provider_id
WHERE mc.supports_image_input IS TRUE;
"""
)
def downgrade() -> None:
# Populate vision defaults from model_flow
op.execute(
"""
UPDATE llm_provider AS lp
SET
is_default_vision_provider = TRUE,
default_vision_model = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'vision'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
)
# Populate conversation defaults from model_flow
op.execute(
"""
UPDATE llm_provider AS lp
SET
is_default_provider = TRUE,
default_model_name = mc.name
FROM llm_model_flow mf
JOIN model_configuration mc ON mc.id = mf.model_configuration_id
WHERE mf.llm_model_flow_type = 'chat'
AND mf.is_default = TRUE
AND mc.llm_provider_id = lp.id;
"""
)
# For providers that have conversation flow mappings but aren't the default,
# we still need a default_model_name (it was NOT NULL originally)
# Pick the first visible model or any model for that provider
op.execute(
"""
UPDATE llm_provider AS lp
SET default_model_name = (
SELECT mc.name
FROM model_configuration mc
JOIN llm_model_flow mf ON mf.model_configuration_id = mc.id
WHERE mc.llm_provider_id = lp.id
AND mf.llm_model_flow_type = 'chat'
ORDER BY mc.is_visible DESC, mc.id ASC
LIMIT 1
)
WHERE lp.default_model_name IS NULL;
"""
)
# Delete all model_flow entries (reverse the inserts from upgrade)
op.execute("DELETE FROM llm_model_flow;")

View File

@@ -10,6 +10,8 @@ from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
# revision identifiers, used by Alembic.
revision = "1f60f60c3401"
down_revision = "f17bf3b0d9f1"
@@ -64,7 +66,7 @@ def upgrade() -> None:
"num_rerank",
sa.Integer(),
nullable=False,
server_default=str(20),
server_default=str(NUM_POSTPROCESSED_RESULTS),
),
)

View File

@@ -1,351 +0,0 @@
"""single onyx craft migration
Consolidates all buildmode/onyx craft tables into a single migration.
Tables created:
- build_session: User build sessions with status tracking
- sandbox: User-owned containerized environments (one per user)
- artifact: Build output files (web apps, documents, images)
- snapshot: Sandbox filesystem snapshots
- build_message: Conversation messages for build sessions
Existing table modified:
- connector_credential_pair: Added processing_mode column
Revision ID: 2020d417ec84
Revises: 41fa44bef321
Create Date: 2026-01-26 14:43:54.641405
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "2020d417ec84"
down_revision = "41fa44bef321"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ==========================================================================
# ENUMS
# ==========================================================================
# Build session status enum
build_session_status_enum = sa.Enum(
"active",
"idle",
name="buildsessionstatus",
native_enum=False,
)
# Sandbox status enum
sandbox_status_enum = sa.Enum(
"provisioning",
"running",
"idle",
"sleeping",
"terminated",
"failed",
name="sandboxstatus",
native_enum=False,
)
# Artifact type enum
artifact_type_enum = sa.Enum(
"web_app",
"pptx",
"docx",
"markdown",
"excel",
"image",
name="artifacttype",
native_enum=False,
)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.create_table(
"build_session",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=True,
),
sa.Column("name", sa.String(), nullable=True),
sa.Column(
"status",
build_session_status_enum,
nullable=False,
server_default="active",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"last_activity_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("nextjs_port", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_session_user_created",
"build_session",
["user_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_build_session_status",
"build_session",
["status"],
unique=False,
)
# ==========================================================================
# SANDBOX TABLE (user-owned, one per user)
# ==========================================================================
op.create_table(
"sandbox",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("container_id", sa.String(), nullable=True),
sa.Column(
"status",
sandbox_status_enum,
nullable=False,
server_default="provisioning",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", name="sandbox_user_id_key"),
)
op.create_index(
"ix_sandbox_status",
"sandbox",
["status"],
unique=False,
)
op.create_index(
"ix_sandbox_container_id",
"sandbox",
["container_id"],
unique=False,
)
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.create_table(
"artifact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("type", artifact_type_enum, nullable=False),
sa.Column("path", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_artifact_session_created",
"artifact",
["session_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_artifact_type",
"artifact",
["type"],
unique=False,
)
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.create_table(
"snapshot",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("storage_path", sa.String(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_snapshot_session_created",
"snapshot",
["session_id", sa.text("created_at DESC")],
unique=False,
)
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.create_table(
"build_message",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"turn_index",
sa.Integer(),
nullable=False,
),
sa.Column(
"type",
sa.Enum(
"SYSTEM",
"USER",
"ASSISTANT",
"DANSWER",
name="messagetype",
create_type=False,
native_enum=False,
),
nullable=False,
),
sa.Column(
"message_metadata",
postgresql.JSONB(),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_message_session_turn",
"build_message",
["session_id", "turn_index", sa.text("created_at ASC")],
unique=False,
)
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.add_column(
"connector_credential_pair",
sa.Column(
"processing_mode",
sa.String(),
nullable=False,
server_default="regular",
),
)
def downgrade() -> None:
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.drop_column("connector_credential_pair", "processing_mode")
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.drop_index("ix_build_message_session_turn", table_name="build_message")
op.drop_table("build_message")
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.drop_index("ix_snapshot_session_created", table_name="snapshot")
op.drop_table("snapshot")
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.drop_index("ix_artifact_type", table_name="artifact")
op.drop_index("ix_artifact_session_created", table_name="artifact")
op.drop_table("artifact")
sa.Enum(name="artifacttype").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# SANDBOX TABLE
# ==========================================================================
op.drop_index("ix_sandbox_container_id", table_name="sandbox")
op.drop_index("ix_sandbox_status", table_name="sandbox")
op.drop_table("sandbox")
sa.Enum(name="sandboxstatus").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.drop_index("ix_build_session_status", table_name="build_session")
op.drop_index("ix_build_session_user_created", table_name="build_session")
op.drop_table("build_session")
sa.Enum(name="buildsessionstatus").drop(op.get_bind(), checkfirst=True)

View File

@@ -1,42 +0,0 @@
"""add_unique_constraint_to_inputprompt_prompt_user_id
Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique constraint on (prompt, user_id) for user-owned prompts
# This ensures each user can only have one shortcut with a given name
op.create_unique_constraint(
"uq_inputprompt_prompt_user_id",
"inputprompt",
["prompt", "user_id"],
)
# Create partial unique index for public prompts (where user_id IS NULL)
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
# so we need a partial index to ensure public prompt names are also unique
op.execute(
"""
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
ON inputprompt (prompt)
WHERE user_id IS NULL
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")

View File

@@ -1,29 +0,0 @@
"""remove default prompt shortcuts
Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Delete any user associations for the default prompts first (foreign key constraint)
op.execute(
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
)
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
op.execute("DELETE FROM inputprompt WHERE id < 0")
def downgrade() -> None:
# We don't restore the default prompts on downgrade
pass

View File

@@ -85,122 +85,103 @@ class UserRow(NamedTuple):
def upgrade() -> None:
conn = op.get_bind()
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
# Start transaction
conn.execute(sa.text("BEGIN"))
if search_assistant:
# Update existing Search assistant to be the unified assistant
try:
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
if search_assistant:
# Update existing Search assistant to be the unified assistant
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
)
)
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": search_tool[0]},
)
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": image_gen_tool[0]},
)
if web_search_tool:
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
@@ -209,148 +190,191 @@ def upgrade() -> None:
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
{"tool_id": search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
),
{"tool_id": image_gen_tool[0]},
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
if web_search_tool:
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:
conn = op.get_bind()
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
# Start transaction
conn.execute(sa.text("BEGIN"))
try:
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
)
)
)
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e

View File

@@ -1,45 +0,0 @@
"""make processing mode default all caps
Revision ID: 72aa7de2e5cf
Revises: 2020d417ec84
Create Date: 2026-01-26 18:58:47.705253
This migration fixes the ProcessingMode enum value mismatch:
- SQLAlchemy's Enum with native_enum=False uses enum member NAMES as valid values
- The original migration stored lowercase VALUES ('regular', 'file_system')
- This converts existing data to uppercase NAMES ('REGULAR', 'FILE_SYSTEM')
- Also drops any spurious native PostgreSQL enum type that may have been auto-created
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "72aa7de2e5cf"
down_revision = "2020d417ec84"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Convert existing lowercase values to uppercase to match enum member names
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' "
"WHERE processing_mode = 'regular'"
)
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' "
"WHERE processing_mode = 'file_system'"
)
# Update the server default to use uppercase
op.alter_column(
"connector_credential_pair",
"processing_mode",
server_default="REGULAR",
)
def downgrade() -> None:
# State prior to this was broken, so we don't want to revert back to it
pass

View File

@@ -1,47 +0,0 @@
"""add_search_query_table
Revision ID: 73e9983e5091
Revises: d1b637d7050a
Create Date: 2026-01-14 14:16:52.837489
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "73e9983e5091"
down_revision = "d1b637d7050a"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"search_query",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id"),
nullable=False,
),
sa.Column("query", sa.String(), nullable=False),
sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
def downgrade() -> None:
op.drop_index("ix_search_query_created_at", table_name="search_query")
op.drop_index("ix_search_query_user_id", table_name="search_query")
op.drop_table("search_query")

View File

@@ -10,7 +10,8 @@ from alembic import op
import sqlalchemy as sa
from onyx.db.models import IndexModelStatus
from onyx.context.search.enums import RecencyBiasSetting, SearchType
from onyx.context.search.enums import RecencyBiasSetting
from onyx.context.search.enums import SearchType
# revision identifiers, used by Alembic.
revision = "776b3bbe9092"

View File

@@ -1,58 +0,0 @@
"""remove reranking from search_settings
Revision ID: 78ebc66946a0
Revises: 849b21c732f8
Create Date: 2026-01-28
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "78ebc66946a0"
down_revision = "849b21c732f8"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.drop_column("search_settings", "disable_rerank_for_streaming")
op.drop_column("search_settings", "rerank_model_name")
op.drop_column("search_settings", "rerank_provider_type")
op.drop_column("search_settings", "rerank_api_key")
op.drop_column("search_settings", "rerank_api_url")
op.drop_column("search_settings", "num_rerank")
def downgrade() -> None:
op.add_column(
"search_settings",
sa.Column(
"disable_rerank_for_streaming",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
op.add_column(
"search_settings", sa.Column("rerank_model_name", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_provider_type", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_api_key", sa.String(), nullable=True)
)
op.add_column(
"search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True)
)
op.add_column(
"search_settings",
sa.Column(
"num_rerank",
sa.Integer(),
nullable=False,
server_default=str(20),
),
)

View File

@@ -1,349 +0,0 @@
"""hierarchy_nodes_v1
Revision ID: 81c22b1e2e78
Revises: 72aa7de2e5cf
Create Date: 2026-01-13 18:10:01.021451
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from onyx.configs.constants import DocumentSource
# revision identifiers, used by Alembic.
revision = "81c22b1e2e78"
down_revision = "72aa7de2e5cf"
branch_labels = None
depends_on = None
# Human-readable display names for each source
SOURCE_DISPLAY_NAMES: dict[str, str] = {
"ingestion_api": "Ingestion API",
"slack": "Slack",
"web": "Web",
"google_drive": "Google Drive",
"gmail": "Gmail",
"requesttracker": "Request Tracker",
"github": "GitHub",
"gitbook": "GitBook",
"gitlab": "GitLab",
"guru": "Guru",
"bookstack": "BookStack",
"outline": "Outline",
"confluence": "Confluence",
"jira": "Jira",
"slab": "Slab",
"productboard": "Productboard",
"file": "File",
"coda": "Coda",
"notion": "Notion",
"zulip": "Zulip",
"linear": "Linear",
"hubspot": "HubSpot",
"document360": "Document360",
"gong": "Gong",
"google_sites": "Google Sites",
"zendesk": "Zendesk",
"loopio": "Loopio",
"dropbox": "Dropbox",
"sharepoint": "SharePoint",
"teams": "Teams",
"salesforce": "Salesforce",
"discourse": "Discourse",
"axero": "Axero",
"clickup": "ClickUp",
"mediawiki": "MediaWiki",
"wikipedia": "Wikipedia",
"asana": "Asana",
"s3": "S3",
"r2": "R2",
"google_cloud_storage": "Google Cloud Storage",
"oci_storage": "OCI Storage",
"xenforo": "XenForo",
"not_applicable": "Not Applicable",
"discord": "Discord",
"freshdesk": "Freshdesk",
"fireflies": "Fireflies",
"egnyte": "Egnyte",
"airtable": "Airtable",
"highspot": "Highspot",
"drupal_wiki": "Drupal Wiki",
"imap": "IMAP",
"bitbucket": "Bitbucket",
"testrail": "TestRail",
"mock_connector": "Mock Connector",
"user_file": "User File",
}
def upgrade() -> None:
# 1. Create hierarchy_node table
op.create_table(
"hierarchy_node",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("raw_node_id", sa.String(), nullable=False),
sa.Column("display_name", sa.String(), nullable=False),
sa.Column("link", sa.String(), nullable=True),
sa.Column("source", sa.String(), nullable=False),
sa.Column("node_type", sa.String(), nullable=False),
sa.Column("document_id", sa.String(), nullable=True),
sa.Column("parent_id", sa.Integer(), nullable=True),
# Permission fields - same pattern as Document table
sa.Column(
"external_user_emails",
postgresql.ARRAY(sa.String()),
nullable=True,
),
sa.Column(
"external_user_group_ids",
postgresql.ARRAY(sa.String()),
nullable=True,
),
sa.Column("is_public", sa.Boolean(), nullable=False, server_default="false"),
sa.PrimaryKeyConstraint("id"),
# When document is deleted, just unlink (node can exist without document)
sa.ForeignKeyConstraint(["document_id"], ["document.id"], ondelete="SET NULL"),
# When parent node is deleted, orphan children (cleanup via pruning)
sa.ForeignKeyConstraint(
["parent_id"], ["hierarchy_node.id"], ondelete="SET NULL"
),
sa.UniqueConstraint(
"raw_node_id", "source", name="uq_hierarchy_node_raw_id_source"
),
)
op.create_index("ix_hierarchy_node_parent_id", "hierarchy_node", ["parent_id"])
op.create_index(
"ix_hierarchy_node_source_type", "hierarchy_node", ["source", "node_type"]
)
# Add partial unique index to ensure only one SOURCE-type node per source
# This prevents duplicate source root nodes from being created
# NOTE: node_type stores enum NAME ('SOURCE'), not value ('source')
op.execute(
sa.text(
"""
CREATE UNIQUE INDEX uq_hierarchy_node_one_source_per_type
ON hierarchy_node (source)
WHERE node_type = 'SOURCE'
"""
)
)
# 2. Create hierarchy_fetch_attempt table
op.create_table(
"hierarchy_fetch_attempt",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
sa.Column("status", sa.String(), nullable=False),
sa.Column("nodes_fetched", sa.Integer(), nullable=True, server_default="0"),
sa.Column("nodes_updated", sa.Integer(), nullable=True, server_default="0"),
sa.Column("error_msg", sa.Text(), nullable=True),
sa.Column("full_exception_trace", sa.Text(), nullable=True),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.Column("time_started", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"time_updated",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(
["connector_credential_pair_id"],
["connector_credential_pair.id"],
ondelete="CASCADE",
),
)
op.create_index(
"ix_hierarchy_fetch_attempt_status", "hierarchy_fetch_attempt", ["status"]
)
op.create_index(
"ix_hierarchy_fetch_attempt_time_created",
"hierarchy_fetch_attempt",
["time_created"],
)
op.create_index(
"ix_hierarchy_fetch_attempt_cc_pair",
"hierarchy_fetch_attempt",
["connector_credential_pair_id"],
)
# 3. Insert SOURCE-type hierarchy nodes for each DocumentSource
# We insert these so every existing document can have a parent hierarchy node
# NOTE: SQLAlchemy's Enum with native_enum=False stores the enum NAME (e.g., 'GOOGLE_DRIVE'),
# not the VALUE (e.g., 'google_drive'). We must use .name for source and node_type columns.
# SOURCE nodes are always public since they're just categorical roots.
for source in DocumentSource:
source_name = (
source.name
) # e.g., 'GOOGLE_DRIVE' - what SQLAlchemy stores/expects
source_value = source.value # e.g., 'google_drive' - the raw_node_id
display_name = SOURCE_DISPLAY_NAMES.get(
source_value, source_value.replace("_", " ").title()
)
op.execute(
sa.text(
"""
INSERT INTO hierarchy_node (raw_node_id, display_name, source, node_type, parent_id, is_public)
VALUES (:raw_node_id, :display_name, :source, 'SOURCE', NULL, true)
ON CONFLICT (raw_node_id, source) DO NOTHING
"""
).bindparams(
raw_node_id=source_value, # Use .value for raw_node_id (human-readable identifier)
display_name=display_name,
source=source_name, # Use .name for source column (SQLAlchemy enum storage)
)
)
# 4. Add parent_hierarchy_node_id column to document table
op.add_column(
"document",
sa.Column("parent_hierarchy_node_id", sa.Integer(), nullable=True),
)
# When hierarchy node is deleted, just unlink the document (SET NULL)
op.create_foreign_key(
"fk_document_parent_hierarchy_node",
"document",
"hierarchy_node",
["parent_hierarchy_node_id"],
["id"],
ondelete="SET NULL",
)
op.create_index(
"ix_document_parent_hierarchy_node_id",
"document",
["parent_hierarchy_node_id"],
)
# 5. Set all existing documents' parent_hierarchy_node_id to their source's SOURCE node
# For documents with multiple connectors, we pick one source deterministically (MIN connector_id)
# NOTE: Both connector.source and hierarchy_node.source store enum NAMEs (e.g., 'GOOGLE_DRIVE')
# because SQLAlchemy Enum(native_enum=False) uses the enum name for storage.
op.execute(
sa.text(
"""
UPDATE document d
SET parent_hierarchy_node_id = hn.id
FROM (
-- Get the source for each document (pick MIN connector_id for determinism)
SELECT DISTINCT ON (dbcc.id)
dbcc.id as doc_id,
c.source as source
FROM document_by_connector_credential_pair dbcc
JOIN connector c ON dbcc.connector_id = c.id
ORDER BY dbcc.id, dbcc.connector_id
) doc_source
JOIN hierarchy_node hn ON hn.source = doc_source.source AND hn.node_type = 'SOURCE'
WHERE d.id = doc_source.doc_id
"""
)
)
# Create the persona__hierarchy_node association table
op.create_table(
"persona__hierarchy_node",
sa.Column("persona_id", sa.Integer(), nullable=False),
sa.Column("hierarchy_node_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["persona_id"],
["persona.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["hierarchy_node_id"],
["hierarchy_node.id"],
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("persona_id", "hierarchy_node_id"),
)
# Add index for efficient lookups
op.create_index(
"ix_persona__hierarchy_node_hierarchy_node_id",
"persona__hierarchy_node",
["hierarchy_node_id"],
)
# Create the persona__document association table for attaching individual
# documents directly to assistants
op.create_table(
"persona__document",
sa.Column("persona_id", sa.Integer(), nullable=False),
sa.Column("document_id", sa.String(), nullable=False),
sa.ForeignKeyConstraint(
["persona_id"],
["persona.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["document_id"],
["document.id"],
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("persona_id", "document_id"),
)
# Add index for efficient lookups by document_id
op.create_index(
"ix_persona__document_document_id",
"persona__document",
["document_id"],
)
# 6. Add last_time_hierarchy_fetch column to connector_credential_pair table
op.add_column(
"connector_credential_pair",
sa.Column(
"last_time_hierarchy_fetch", sa.DateTime(timezone=True), nullable=True
),
)
def downgrade() -> None:
# Remove last_time_hierarchy_fetch from connector_credential_pair
op.drop_column("connector_credential_pair", "last_time_hierarchy_fetch")
# Drop persona__document table
op.drop_index("ix_persona__document_document_id", table_name="persona__document")
op.drop_table("persona__document")
# Drop persona__hierarchy_node table
op.drop_index(
"ix_persona__hierarchy_node_hierarchy_node_id",
table_name="persona__hierarchy_node",
)
op.drop_table("persona__hierarchy_node")
# Remove parent_hierarchy_node_id from document
op.drop_index("ix_document_parent_hierarchy_node_id", table_name="document")
op.drop_constraint(
"fk_document_parent_hierarchy_node", "document", type_="foreignkey"
)
op.drop_column("document", "parent_hierarchy_node_id")
# Drop hierarchy_fetch_attempt table
op.drop_index(
"ix_hierarchy_fetch_attempt_cc_pair", table_name="hierarchy_fetch_attempt"
)
op.drop_index(
"ix_hierarchy_fetch_attempt_time_created", table_name="hierarchy_fetch_attempt"
)
op.drop_index(
"ix_hierarchy_fetch_attempt_status", table_name="hierarchy_fetch_attempt"
)
op.drop_table("hierarchy_fetch_attempt")
# Drop hierarchy_node table
op.drop_index("uq_hierarchy_node_one_source_per_type", table_name="hierarchy_node")
op.drop_index("ix_hierarchy_node_source_type", table_name="hierarchy_node")
op.drop_index("ix_hierarchy_node_parent_id", table_name="hierarchy_node")
op.drop_table("hierarchy_node")

View File

@@ -1,49 +0,0 @@
"""notifications constraint, sort index, and cleanup old notifications
Revision ID: 8405ca81cc83
Revises: a3c1a7904cd0
Create Date: 2026-01-07 16:43:44.855156
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "8405ca81cc83"
down_revision = "a3c1a7904cd0"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique index for notification deduplication.
# This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
#
# Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
# in unique constraints, but we want NULL == NULL for deduplication).
# The '{}' represents an empty JSONB object as the NULL replacement.
# Clean up legacy notifications first
op.execute("DELETE FROM notification WHERE title = 'New Notification'")
op.execute(
"""
CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
"""
)
# Create index for efficient notification sorting by user
# Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
op.execute(
"""
CREATE INDEX IF NOT EXISTS ix_notification_user_sort
ON notification (user_id, dismissed, first_shown DESC)
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")

View File

@@ -1,32 +0,0 @@
"""add demo_data_enabled to build_session
Revision ID: 849b21c732f8
Revises: 81c22b1e2e78
Create Date: 2026-01-28 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "849b21c732f8"
down_revision = "81c22b1e2e78"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"build_session",
sa.Column(
"demo_data_enabled",
sa.Boolean(),
nullable=False,
server_default=sa.text("true"),
),
)
def downgrade() -> None:
op.drop_column("build_session", "demo_data_enabled")

View File

@@ -1,116 +0,0 @@
"""Add Discord bot tables
Revision ID: 8b5ce697290e
Revises: a1b2c3d4e5f7
Create Date: 2025-01-14
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "8b5ce697290e"
down_revision = "a1b2c3d4e5f7"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
# DiscordBotConfig (singleton table - one per tenant)
op.create_table(
"discord_bot_config",
sa.Column(
"id",
sa.String(),
primary_key=True,
server_default=sa.text("'SINGLETON'"),
),
sa.Column("bot_token", sa.LargeBinary(), nullable=False), # EncryptedString
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
)
# DiscordGuildConfig
op.create_table(
"discord_guild_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
sa.Column("guild_name", sa.String(), nullable=True),
sa.Column("registration_key", sa.String(), nullable=False, unique=True),
sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"default_persona_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
),
)
# DiscordChannelConfig
op.create_table(
"discord_channel_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"guild_config_id",
sa.Integer(),
sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("channel_id", sa.BigInteger(), nullable=False),
sa.Column("channel_name", sa.String(), nullable=False),
sa.Column(
"channel_type",
sa.String(20),
server_default=sa.text("'text'"),
nullable=False,
),
sa.Column(
"is_private",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"thread_only_mode",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"require_bot_invocation",
sa.Boolean(),
server_default=sa.text("true"),
nullable=False,
),
sa.Column(
"persona_override_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
),
)
# Unique constraint: one config per channel per guild
op.create_unique_constraint(
"uq_discord_channel_guild_channel",
"discord_channel_config",
["guild_config_id", "channel_id"],
)
def downgrade() -> None:
op.drop_table("discord_channel_config")
op.drop_table("discord_guild_config")
op.drop_table("discord_bot_config")

View File

@@ -1,27 +0,0 @@
"""add processing_duration_seconds to chat_message
Revision ID: 9d1543a37106
Revises: cbc03e08d0f3
Create Date: 2026-01-21 11:42:18.546188
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9d1543a37106"
down_revision = "cbc03e08d0f3"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
)
def downgrade() -> None:
op.drop_column("chat_message", "processing_duration_seconds")

View File

@@ -42,13 +42,20 @@ TOOL_DESCRIPTIONS = {
def upgrade() -> None:
conn = op.get_bind()
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("BEGIN"))
try:
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("COMMIT"))
except Exception as e:
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:

View File

@@ -1,47 +0,0 @@
"""drop agent_search_metrics table
Revision ID: a1b2c3d4e5f7
Revises: 73e9983e5091
Create Date: 2026-01-17
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f7"
down_revision = "73e9983e5091"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_table("agent__search_metrics")
def downgrade() -> None:
op.create_table(
"agent__search_metrics",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("user_id", sa.UUID(), nullable=True),
sa.Column("persona_id", sa.Integer(), nullable=True),
sa.Column("agent_type", sa.String(), nullable=False),
sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
sa.Column("base_duration_s", sa.Float(), nullable=False),
sa.Column("full_duration_s", sa.Float(), nullable=False),
sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(
["user_id"],
["user.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["persona_id"],
["persona.id"],
),
sa.PrimaryKeyConstraint("id"),
)

View File

@@ -1,40 +0,0 @@
"""Persona new default model configuration id column
Revision ID: be87a654d5af
Revises: e7f8a9b0c1d2
Create Date: 2026-01-30 11:14:17.306275
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "be87a654d5af"
down_revision = "e7f8a9b0c1d2"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"persona",
sa.Column("default_model_configuration_id", sa.Integer(), nullable=True),
)
op.create_foreign_key(
"fk_persona_default_model_configuration_id",
"persona",
"model_configuration",
["default_model_configuration_id"],
["id"],
ondelete="SET NULL",
)
def downgrade() -> None:
op.drop_constraint(
"fk_persona_default_model_configuration_id", "persona", type_="foreignkey"
)
op.drop_column("persona", "default_model_configuration_id")

View File

@@ -7,6 +7,7 @@ Create Date: 2025-12-18 16:00:00.000000
"""
from alembic import op
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
import sqlalchemy as sa
@@ -18,7 +19,7 @@ depends_on = None
DEEP_RESEARCH_TOOL = {
"name": "ResearchAgent",
"name": RESEARCH_AGENT_DB_NAME,
"display_name": "Research Agent",
"description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
"in_code_tool_id": "ResearchAgent",

View File

@@ -1,128 +0,0 @@
"""add_opensearch_migration_tables
Revision ID: cbc03e08d0f3
Revises: be87a654d5af
Create Date: 2026-01-31 17:00:45.176604
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "cbc03e08d0f3"
down_revision = "be87a654d5af"
branch_labels = None
depends_on = None
def upgrade() -> None:
# 1. Create opensearch_document_migration_record table.
op.create_table(
"opensearch_document_migration_record",
sa.Column("document_id", sa.String(), nullable=False),
sa.Column("status", sa.String(), nullable=False, server_default="pending"),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("attempts_count", sa.Integer(), nullable=False, server_default="0"),
sa.Column("last_attempt_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.PrimaryKeyConstraint("document_id"),
sa.ForeignKeyConstraint(
["document_id"],
["document.id"],
ondelete="CASCADE",
),
)
# 2. Create indices.
op.create_index(
"ix_opensearch_document_migration_record_status",
"opensearch_document_migration_record",
["status"],
)
op.create_index(
"ix_opensearch_document_migration_record_attempts_count",
"opensearch_document_migration_record",
["attempts_count"],
)
op.create_index(
"ix_opensearch_document_migration_record_created_at",
"opensearch_document_migration_record",
["created_at"],
)
# 3. Create opensearch_tenant_migration_record table (singleton).
op.create_table(
"opensearch_tenant_migration_record",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"document_migration_record_table_population_status",
sa.String(),
nullable=False,
server_default="pending",
),
sa.Column(
"num_times_observed_no_additional_docs_to_populate_migration_table",
sa.Integer(),
nullable=False,
server_default="0",
),
sa.Column(
"overall_document_migration_status",
sa.String(),
nullable=False,
server_default="pending",
),
sa.Column(
"num_times_observed_no_additional_docs_to_migrate",
sa.Integer(),
nullable=False,
server_default="0",
),
sa.Column(
"last_updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
# 4. Create unique index on constant to enforce singleton pattern.
op.execute(
sa.text(
"""
CREATE UNIQUE INDEX idx_opensearch_tenant_migration_singleton
ON opensearch_tenant_migration_record ((true))
"""
)
)
def downgrade() -> None:
# Drop opensearch_tenant_migration_record.
op.drop_index(
"idx_opensearch_tenant_migration_singleton",
table_name="opensearch_tenant_migration_record",
)
op.drop_table("opensearch_tenant_migration_record")
# Drop opensearch_document_migration_record.
op.drop_index(
"ix_opensearch_document_migration_record_created_at",
table_name="opensearch_document_migration_record",
)
op.drop_index(
"ix_opensearch_document_migration_record_attempts_count",
table_name="opensearch_document_migration_record",
)
op.drop_index(
"ix_opensearch_document_migration_record_status",
table_name="opensearch_document_migration_record",
)
op.drop_table("opensearch_document_migration_record")

View File

@@ -70,66 +70,80 @@ BUILT_IN_TOOLS = [
def upgrade() -> None:
conn = op.get_bind()
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Start transaction
conn.execute(sa.text("BEGIN"))
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
try:
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text(
"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
)
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:

View File

@@ -1,64 +0,0 @@
"""sync_exa_api_key_to_content_provider
Revision ID: d1b637d7050a
Revises: d25168c2beee
Create Date: 2026-01-09 15:54:15.646249
"""
from alembic import op
from sqlalchemy import text
# revision identifiers, used by Alembic.
revision = "d1b637d7050a"
down_revision = "d25168c2beee"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Exa uses a shared API key between search and content providers.
# For existing Exa search providers with API keys, create the corresponding
# content provider if it doesn't exist yet.
connection = op.get_bind()
# Check if Exa search provider exists with an API key
result = connection.execute(
text(
"""
SELECT api_key FROM internet_search_provider
WHERE provider_type = 'exa' AND api_key IS NOT NULL
LIMIT 1
"""
)
)
row = result.fetchone()
if row:
api_key = row[0]
# Create Exa content provider with the shared key
connection.execute(
text(
"""
INSERT INTO internet_content_provider
(name, provider_type, api_key, is_active)
VALUES ('Exa', 'exa', :api_key, false)
ON CONFLICT (name) DO NOTHING
"""
),
{"api_key": api_key},
)
def downgrade() -> None:
# Remove the Exa content provider that was created by this migration
connection = op.get_bind()
connection.execute(
text(
"""
DELETE FROM internet_content_provider
WHERE provider_type = 'exa'
"""
)
)

View File

@@ -1,86 +0,0 @@
"""tool_name_consistency
Revision ID: d25168c2beee
Revises: 8405ca81cc83
Create Date: 2026-01-11 17:54:40.135777
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d25168c2beee"
down_revision = "8405ca81cc83"
branch_labels = None
depends_on = None
# Currently the seeded tools have the in_code_tool_id == name
CURRENT_TOOL_NAME_MAPPING = [
"SearchTool",
"WebSearchTool",
"ImageGenerationTool",
"PythonTool",
"OpenURLTool",
"KnowledgeGraphTool",
"ResearchAgent",
]
# Mapping of in_code_tool_id -> name
# These are the expected names that we want in the database
EXPECTED_TOOL_NAME_MAPPING = {
"SearchTool": "internal_search",
"WebSearchTool": "web_search",
"ImageGenerationTool": "generate_image",
"PythonTool": "python",
"OpenURLTool": "open_url",
"KnowledgeGraphTool": "run_kg_search",
"ResearchAgent": "research_agent",
}
def upgrade() -> None:
conn = op.get_bind()
# Mapping of in_code_tool_id to the NAME constant from each tool class
# These match the .name property of each tool implementation
tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
# Update the name column for each tool based on its in_code_tool_id
for in_code_tool_id, expected_name in tool_name_mapping.items():
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :expected_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"expected_name": expected_name,
"in_code_tool_id": in_code_tool_id,
},
)
def downgrade() -> None:
conn = op.get_bind()
# Reverse the migration by setting name back to in_code_tool_id
# This matches the original pattern where name was the class name
for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :current_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"current_name": in_code_tool_id,
"in_code_tool_id": in_code_tool_id,
},
)

View File

@@ -1,125 +0,0 @@
"""create_anonymous_user
This migration creates a permanent anonymous user in the database.
When anonymous access is enabled, unauthenticated requests will use this user
instead of returning user_id=NULL.
Revision ID: e7f8a9b0c1d2
Revises: f7ca3e2f45d9
Create Date: 2026-01-15 14:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "e7f8a9b0c1d2"
down_revision = "f7ca3e2f45d9"
branch_labels = None
depends_on = None
# Must match constants in onyx/configs/constants.py file
ANONYMOUS_USER_UUID = "00000000-0000-0000-0000-000000000002"
ANONYMOUS_USER_EMAIL = "anonymous@onyx.app"
# Tables with user_id foreign key that may need migration
TABLES_WITH_USER_ID = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
def upgrade() -> None:
"""
Create the anonymous user for anonymous access feature.
Also migrates any remaining user_id=NULL records to the anonymous user.
"""
connection = op.get_bind()
# Create the anonymous user (using ON CONFLICT to be idempotent)
connection.execute(
sa.text(
"""
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
ON CONFLICT (id) DO NOTHING
"""
),
{
"id": ANONYMOUS_USER_UUID,
"email": ANONYMOUS_USER_EMAIL,
"hashed_password": "", # Empty password - user cannot log in directly
"is_active": True, # Active so it can be used for anonymous access
"is_superuser": False,
"is_verified": True, # Verified since no email verification needed
"role": "LIMITED", # Anonymous users have limited role to restrict access
},
)
# Migrate any remaining user_id=NULL records to anonymous user
for table in TABLES_WITH_USER_ID:
try:
# Exclude public credential (id=0) which must remain user_id=NULL
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
# Exclude builtin personas (builtin_persona=True) which must remain user_id=NULL
# Exclude system input prompts (is_public=True with user_id=NULL) which must remain user_id=NULL
if table == "credential":
condition = "user_id IS NULL AND id != 0"
elif table == "tool":
condition = "user_id IS NULL AND in_code_tool_id IS NULL"
elif table == "persona":
condition = "user_id IS NULL AND builtin_persona = false"
elif table == "inputprompt":
condition = "user_id IS NULL AND is_public = false"
else:
condition = "user_id IS NULL"
result = connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = :user_id
WHERE {condition}
"""
),
{"user_id": ANONYMOUS_USER_UUID},
)
if result.rowcount > 0:
print(f"Updated {result.rowcount} rows in {table} to anonymous user")
except Exception as e:
print(f"Skipping {table}: {e}")
def downgrade() -> None:
"""
Set anonymous user's records back to NULL and delete the anonymous user.
"""
connection = op.get_bind()
# Set records back to NULL
for table in TABLES_WITH_USER_ID:
try:
connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = NULL
WHERE user_id = :user_id
"""
),
{"user_id": ANONYMOUS_USER_UUID},
)
except Exception:
pass
# Delete the anonymous user
connection.execute(
sa.text('DELETE FROM "user" WHERE id = :user_id'),
{"user_id": ANONYMOUS_USER_UUID},
)

View File

@@ -1,57 +0,0 @@
"""Add flow mapping table
Revision ID: f220515df7b4
Revises: cbc03e08d0f3
Create Date: 2026-01-30 12:21:24.955922
"""
from onyx.db.enums import LLMModelFlowType
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "f220515df7b4"
down_revision = "9d1543a37106"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"llm_model_flow",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"llm_model_flow_type",
sa.Enum(LLMModelFlowType, name="llmmodelflowtype", native_enum=False),
nullable=False,
),
sa.Column(
"is_default", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column("model_configuration_id", sa.Integer(), nullable=False),
sa.PrimaryKeyConstraint("id"),
sa.ForeignKeyConstraint(
["model_configuration_id"], ["model_configuration.id"], ondelete="CASCADE"
),
sa.UniqueConstraint(
"llm_model_flow_type",
"model_configuration_id",
name="uq_model_config_per_llm_model_flow_type",
),
)
# Partial unique index so that there is at most one default for each flow type
op.create_index(
"ix_one_default_per_llm_model_flow",
"llm_model_flow",
["llm_model_flow_type"],
unique=True,
postgresql_where=sa.text("is_default IS TRUE"),
)
def downgrade() -> None:
# Drop the llm_model_flow table (index is dropped automatically with table)
op.drop_table("llm_model_flow")

View File

@@ -1,281 +0,0 @@
"""migrate_no_auth_data_to_placeholder
This migration handles the transition from AUTH_TYPE=disabled to requiring
authentication. It creates a placeholder user and assigns all data that was
created without a user (user_id=NULL) to this placeholder.
A database trigger is installed that automatically transfers all data from
the placeholder user to the first real user who registers, then drops itself.
Revision ID: f7ca3e2f45d9
Revises: 78ebc66946a0
Create Date: 2026-01-15 12:49:53.802741
"""
import os
from alembic import op
import sqlalchemy as sa
from shared_configs.configs import MULTI_TENANT
# revision identifiers, used by Alembic.
revision = "f7ca3e2f45d9"
down_revision = "78ebc66946a0"
branch_labels = None
depends_on = None
# Must match constants in onyx/configs/constants.py file
NO_AUTH_PLACEHOLDER_USER_UUID = "00000000-0000-0000-0000-000000000001"
NO_AUTH_PLACEHOLDER_USER_EMAIL = "no-auth-placeholder@onyx.app"
# Trigger and function names
TRIGGER_NAME = "trg_migrate_no_auth_data"
FUNCTION_NAME = "migrate_no_auth_data_to_user"
# Trigger function that migrates data from placeholder to first real user
MIGRATE_NO_AUTH_TRIGGER_FUNCTION = f"""
CREATE OR REPLACE FUNCTION {FUNCTION_NAME}()
RETURNS TRIGGER AS $$
DECLARE
placeholder_uuid UUID := '00000000-0000-0000-0000-000000000001'::uuid;
anonymous_uuid UUID := '00000000-0000-0000-0000-000000000002'::uuid;
placeholder_row RECORD;
schema_name TEXT;
BEGIN
-- Skip if this is the placeholder user being inserted
IF NEW.id = placeholder_uuid THEN
RETURN NULL;
END IF;
-- Skip if this is the anonymous user being inserted (not a real user)
IF NEW.id = anonymous_uuid THEN
RETURN NULL;
END IF;
-- Skip if the new user is not active
IF NEW.is_active = FALSE THEN
RETURN NULL;
END IF;
-- Get current schema for self-cleanup
schema_name := current_schema();
-- Try to lock the placeholder user row with FOR UPDATE SKIP LOCKED
-- This ensures only one concurrent transaction can proceed with migration
-- SKIP LOCKED means if another transaction has the lock, we skip (don't wait)
SELECT id INTO placeholder_row
FROM "user"
WHERE id = placeholder_uuid
FOR UPDATE SKIP LOCKED;
IF NOT FOUND THEN
-- Either placeholder doesn't exist or another transaction has it locked
-- Either way, drop the trigger and return without making admin
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
RETURN NULL;
END IF;
-- We have exclusive lock on placeholder - proceed with migration
-- The INSERT has already completed (AFTER INSERT), so NEW.id exists in the table
-- Migrate chat_session
UPDATE "chat_session" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate credential (exclude public credential id=0)
UPDATE "credential" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND id != 0;
-- Migrate document_set
UPDATE "document_set" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate persona (exclude builtin personas)
UPDATE "persona" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND builtin_persona = FALSE;
-- Migrate tool (exclude builtin tools)
UPDATE "tool" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND in_code_tool_id IS NULL;
-- Migrate notification
UPDATE "notification" SET user_id = NEW.id WHERE user_id = placeholder_uuid;
-- Migrate inputprompt (exclude system/public prompts)
UPDATE "inputprompt" SET user_id = NEW.id WHERE user_id = placeholder_uuid AND is_public = FALSE;
-- Make the new user an admin (they had admin access in no-auth mode)
-- In AFTER INSERT trigger, we must UPDATE the row since it already exists
UPDATE "user" SET role = 'ADMIN' WHERE id = NEW.id;
-- Delete the placeholder user (we hold the lock so this is safe)
DELETE FROM "user" WHERE id = placeholder_uuid;
-- Drop the trigger and function (self-cleanup)
EXECUTE format('DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON %I."user"', schema_name);
EXECUTE format('DROP FUNCTION IF EXISTS %I.{FUNCTION_NAME}()', schema_name);
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
"""
MIGRATE_NO_AUTH_TRIGGER = f"""
CREATE TRIGGER {TRIGGER_NAME}
AFTER INSERT ON "user"
FOR EACH ROW
EXECUTE FUNCTION {FUNCTION_NAME}();
"""
def upgrade() -> None:
"""
Create a placeholder user and assign all NULL user_id records to it.
Install a trigger that migrates data to the first real user and self-destructs.
Only runs if AUTH_TYPE is currently disabled/none.
Skipped in multi-tenant mode - each tenant starts fresh with no legacy data.
"""
# Skip in multi-tenant mode - this migration handles single-tenant
# AUTH_TYPE=disabled -> auth transitions only
if MULTI_TENANT:
return
# Only run if AUTH_TYPE is currently disabled/none
# If they've already switched to auth-enabled, NULL data is stale anyway
auth_type = (os.environ.get("AUTH_TYPE") or "").lower()
if auth_type not in ("disabled", "none", ""):
print(f"AUTH_TYPE is '{auth_type}', not disabled. Skipping migration.")
return
connection = op.get_bind()
# Check if there are any NULL user_id records that need migration
tables_to_check = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
has_null_records = False
for table in tables_to_check:
try:
result = connection.execute(
sa.text(f'SELECT 1 FROM "{table}" WHERE user_id IS NULL LIMIT 1')
)
if result.fetchone():
has_null_records = True
break
except Exception:
# Table might not exist
pass
if not has_null_records:
return
# Create the placeholder user
connection.execute(
sa.text(
"""
INSERT INTO "user" (id, email, hashed_password, is_active, is_superuser, is_verified, role)
VALUES (:id, :email, :hashed_password, :is_active, :is_superuser, :is_verified, :role)
"""
),
{
"id": NO_AUTH_PLACEHOLDER_USER_UUID,
"email": NO_AUTH_PLACEHOLDER_USER_EMAIL,
"hashed_password": "", # Empty password - user cannot log in
"is_active": False, # Inactive - user cannot log in
"is_superuser": False,
"is_verified": False,
"role": "BASIC",
},
)
# Assign NULL user_id records to the placeholder user
for table in tables_to_check:
try:
# Base condition for all tables
condition = "user_id IS NULL"
# Exclude public credential (id=0) which must remain user_id=NULL
if table == "credential":
condition += " AND id != 0"
# Exclude builtin tools (in_code_tool_id IS NOT NULL) which must remain user_id=NULL
elif table == "tool":
condition += " AND in_code_tool_id IS NULL"
# Exclude builtin personas which must remain user_id=NULL
elif table == "persona":
condition += " AND builtin_persona = FALSE"
# Exclude system/public input prompts which must remain user_id=NULL
elif table == "inputprompt":
condition += " AND is_public = FALSE"
result = connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = :user_id
WHERE {condition}
"""
),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)
if result.rowcount > 0:
print(f"Updated {result.rowcount} rows in {table}")
except Exception as e:
print(f"Skipping {table}: {e}")
# Install the trigger function and trigger for automatic migration on first user registration
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER_FUNCTION))
connection.execute(sa.text(MIGRATE_NO_AUTH_TRIGGER))
print("Installed trigger for automatic data migration on first user registration")
def downgrade() -> None:
"""
Drop trigger and function, set placeholder user's records back to NULL,
and delete the placeholder user.
"""
# Skip in multi-tenant mode for consistency with upgrade
if MULTI_TENANT:
return
connection = op.get_bind()
# Drop trigger and function if they exist (they may have already self-destructed)
connection.execute(sa.text(f'DROP TRIGGER IF EXISTS {TRIGGER_NAME} ON "user"'))
connection.execute(sa.text(f"DROP FUNCTION IF EXISTS {FUNCTION_NAME}()"))
tables_to_update = [
"chat_session",
"credential",
"document_set",
"persona",
"tool",
"notification",
"inputprompt",
]
# Set records back to NULL
for table in tables_to_update:
try:
connection.execute(
sa.text(
f"""
UPDATE "{table}"
SET user_id = NULL
WHERE user_id = :user_id
"""
),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)
except Exception:
pass
# Delete the placeholder user
connection.execute(
sa.text('DELETE FROM "user" WHERE id = :user_id'),
{"user_id": NO_AUTH_PLACEHOLDER_USER_UUID},
)

View File

@@ -1,31 +0,0 @@
"""add chat_background to user
Revision ID: fb80bdd256de
Revises: 8b5ce697290e
Create Date: 2026-01-16 16:15:59.222617
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "fb80bdd256de"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"chat_background",
sa.String(),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("user", "chat_background")

View File

@@ -116,7 +116,7 @@ def _get_access_for_documents(
return access_map
def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL
@@ -124,16 +124,13 @@ def _get_acl_for_user(user: User, db_session: Session) -> set[str]:
NOTE: is imported in onyx.access.access by `fetch_versioned_implementation`
DO NOT REMOVE."""
is_anonymous = user.is_anonymous
db_user_groups = (
[] if is_anonymous else fetch_user_groups_for_user(db_session, user.id)
)
db_user_groups = fetch_user_groups_for_user(db_session, user.id) if user else []
prefixed_user_groups = [
prefix_user_group(db_user_group.name) for db_user_group in db_user_groups
]
db_external_groups = (
[] if is_anonymous else fetch_external_groups_for_user(db_session, user.id)
fetch_external_groups_for_user(db_session, user.id) if user else []
)
prefixed_external_groups = [
prefix_external_group(db_external_group.external_user_group_id)

View File

@@ -1,11 +0,0 @@
from sqlalchemy.orm import Session
from ee.onyx.db.external_perm import fetch_external_groups_for_user
from onyx.db.models import User
def _get_user_external_group_ids(db_session: Session, user: User) -> list[str]:
if not user:
return []
external_groups = fetch_external_groups_for_user(db_session, user.id)
return [external_group.external_user_group_id for external_group in external_groups]

View File

@@ -33,8 +33,8 @@ def get_default_admin_user_emails_() -> list[str]:
async def current_cloud_superuser(
request: Request,
user: User = Depends(current_admin_user),
) -> User:
user: User | None = Depends(current_admin_user),
) -> User | None:
api_key = request.headers.get("Authorization", "").replace("Bearer ", "")
if api_key != SUPER_CLOUD_API_KEY:
raise HTTPException(status_code=401, detail="Invalid API key")

View File

@@ -25,7 +25,6 @@ from ee.onyx.db.connector_credential_pair import get_all_auto_sync_cc_pairs
from ee.onyx.db.document import upsert_document_external_perms
from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_find_task
from onyx.background.celery.celery_redis import celery_get_queue_length
@@ -56,9 +55,6 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import (
update_hierarchy_node_permissions as db_update_hierarchy_node_permissions,
)
from onyx.db.models import ConnectorCredentialPair
from onyx.db.permission_sync_attempt import complete_doc_permission_sync_attempt
from onyx.db.permission_sync_attempt import create_doc_permission_sync_attempt
@@ -641,24 +637,17 @@ def connector_permission_sync_generator_task(
),
stop=stop_after_delay(DOCUMENT_PERMISSIONS_UPDATE_STOP_AFTER),
)
def element_update_permissions(
def document_update_permissions(
tenant_id: str,
permissions: ElementExternalAccess,
permissions: DocExternalAccess,
source_type_str: str,
connector_id: int,
credential_id: int,
) -> bool:
"""Update permissions for a document or hierarchy node."""
start = time.monotonic()
external_access = permissions.external_access
# Determine element type and identifier for logging
if isinstance(permissions, DocExternalAccess):
element_id = permissions.doc_id
element_type = "doc"
else:
element_id = permissions.raw_node_id
element_type = "node"
doc_id = permissions.doc_id
external_access = permissions.external_access
try:
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
@@ -668,57 +657,39 @@ def element_update_permissions(
emails=list(external_access.external_user_emails),
continue_on_error=True,
)
# Then upsert the document's external permissions
created_new_doc = upsert_document_external_perms(
db_session=db_session,
doc_id=doc_id,
external_access=external_access,
source_type=DocumentSource(source_type_str),
)
if isinstance(permissions, DocExternalAccess):
# Document permission update
created_new_doc = upsert_document_external_perms(
if created_new_doc:
# If a new document was created, we associate it with the cc_pair
upsert_document_by_connector_credential_pair(
db_session=db_session,
doc_id=permissions.doc_id,
external_access=external_access,
source_type=DocumentSource(source_type_str),
)
if created_new_doc:
# If a new document was created, we associate it with the cc_pair
upsert_document_by_connector_credential_pair(
db_session=db_session,
connector_id=connector_id,
credential_id=credential_id,
document_ids=[permissions.doc_id],
)
else:
# Hierarchy node permission update
db_update_hierarchy_node_permissions(
db_session=db_session,
raw_node_id=permissions.raw_node_id,
source=DocumentSource(permissions.source),
is_public=external_access.is_public,
external_user_emails=(
list(external_access.external_user_emails)
if external_access.external_user_emails
else None
),
external_user_group_ids=(
list(external_access.external_user_group_ids)
if external_access.external_user_group_ids
else None
),
connector_id=connector_id,
credential_id=credential_id,
document_ids=[doc_id],
)
elapsed = time.monotonic() - start
task_logger.info(
f"{element_type}={element_id} "
f"connector_id={connector_id} "
f"doc={doc_id} "
f"action=update_permissions "
f"elapsed={elapsed:.2f}"
)
except Exception as e:
task_logger.exception(
f"element_update_permissions exceptioned: {element_type}={element_id}, {connector_id=} {credential_id=}"
f"document_update_permissions exceptioned: "
f"connector_id={connector_id} doc_id={doc_id}"
)
raise e
finally:
task_logger.info(
f"element_update_permissions completed: {element_type}={element_id}, {connector_id=} {credential_id=}"
f"document_update_permissions completed: connector_id={connector_id} doc={doc_id}"
)
return True

View File

@@ -109,6 +109,7 @@ CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(
STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")
STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")
# JWT Public Key URL
JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
@@ -122,23 +123,9 @@ SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")
# when the capture is called. These defaults prevent Posthog issues from breaking the Onyx app
POSTHOG_API_KEY = os.environ.get("POSTHOG_API_KEY") or "FooBar"
POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
POSTHOG_DEBUG_LOGS_ENABLED = (
os.environ.get("POSTHOG_DEBUG_LOGS_ENABLED", "").lower() == "true"
)
MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")
GATED_TENANTS_KEY = "gated_tenants"
# License enforcement - when True, blocks API access for gated/expired licenses
LICENSE_ENFORCEMENT_ENABLED = (
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
)
# Cloud data plane URL - self-hosted instances call this to reach cloud proxy endpoints
# Used when MULTI_TENANT=false (self-hosted mode)
CLOUD_DATA_PLANE_URL = os.environ.get(
"CLOUD_DATA_PLANE_URL", "https://cloud.onyx.app/api"
)

View File

@@ -1,73 +0,0 @@
"""Constants for license enforcement.
This file is the single source of truth for:
1. Paths that bypass license enforcement (always accessible)
2. Paths that require an EE license (EE-only features)
Import these constants in both production code and tests to ensure consistency.
"""
# Paths that are ALWAYS accessible, even when license is expired/gated.
# These enable users to:
# /auth - Log in/out (users can't fix billing if locked out of auth)
# /license - Fetch, upload, or check license status
# /health - Health checks for load balancers/orchestrators
# /me - Basic user info needed for UI rendering
# /settings, /enterprise-settings - View app status and branding
# /billing - Unified billing API
# /proxy - Self-hosted proxy endpoints (have own license-based auth)
# /tenants/billing-* - Legacy billing endpoints (backwards compatibility)
# /manage/users, /users - User management (needed for seat limit resolution)
# /notifications - Needed for UI to load properly
LICENSE_ENFORCEMENT_ALLOWED_PREFIXES: frozenset[str] = frozenset(
{
"/auth",
"/license",
"/health",
"/me",
"/settings",
"/enterprise-settings",
# Billing endpoints (unified API for both MT and self-hosted)
"/billing",
"/admin/billing",
# Proxy endpoints for self-hosted billing (no tenant context)
"/proxy",
# Legacy tenant billing endpoints (kept for backwards compatibility)
"/tenants/billing-information",
"/tenants/create-customer-portal-session",
"/tenants/create-subscription-session",
# User management - needed to remove users when seat limit exceeded
"/manage/users",
"/manage/admin/users",
"/manage/admin/valid-domains",
"/manage/admin/deactivate-user",
"/manage/admin/delete-user",
"/users",
# Notifications - needed for UI to load properly
"/notifications",
}
)
# EE-only paths that require a valid license.
# Users without a license (community edition) cannot access these.
# These are blocked even when user has never subscribed (no license).
EE_ONLY_PATH_PREFIXES: frozenset[str] = frozenset(
{
# User groups and access control
"/manage/admin/user-group",
# Analytics and reporting
"/analytics",
# Query history (admin chat session endpoints)
"/admin/chat-sessions",
"/admin/chat-session-history",
"/admin/query-history",
# Usage reporting/export
"/admin/usage-report",
# Standard answers (canned responses)
"/manage/admin/standard-answer",
# Token rate limits
"/admin/token-rate-limits",
# Evals
"/evals",
}
)

View File

@@ -334,9 +334,11 @@ def fetch_assistant_unique_users_total(
# Users can view assistant stats if they created the persona,
# or if they are an admin
def user_can_view_assistant_stats(
db_session: Session, user: User, assistant_id: int
db_session: Session, user: User | None, assistant_id: int
) -> bool:
if user.role == UserRole.ADMIN:
# If user is None and auth is disabled, assume the user is an admin
if user is None or user.role == UserRole.ADMIN:
return True
# Check if the user created the persona

View File

@@ -1,67 +0,0 @@
"""EE version of hierarchy node access control.
This module provides permission-aware hierarchy node access for Enterprise Edition.
It filters hierarchy nodes based on user email and external group membership.
"""
from sqlalchemy import any_
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import Session
from sqlalchemy.sql.elements import ColumnElement
from onyx.configs.constants import DocumentSource
from onyx.db.models import HierarchyNode
def _build_hierarchy_access_filter(
user_email: str | None,
external_group_ids: list[str],
) -> ColumnElement[bool]:
"""Build SQLAlchemy filter for hierarchy node access.
A user can access a hierarchy node if any of the following are true:
- The node is marked as public (is_public=True)
- The user's email is in the node's external_user_emails list
- Any of the user's external group IDs overlap with the node's external_user_group_ids
"""
access_filters: list[ColumnElement[bool]] = [HierarchyNode.is_public.is_(True)]
if user_email:
access_filters.append(any_(HierarchyNode.external_user_emails) == user_email)
if external_group_ids:
access_filters.append(
HierarchyNode.external_user_group_ids.overlap(
postgresql.array(external_group_ids)
)
)
return or_(*access_filters)
def _get_accessible_hierarchy_nodes_for_source(
db_session: Session,
source: DocumentSource,
user_email: str | None,
external_group_ids: list[str],
) -> list[HierarchyNode]:
"""
EE version: Returns hierarchy nodes filtered by user permissions.
A user can access a hierarchy node if any of the following are true:
- The node is marked as public (is_public=True)
- The user's email is in the node's external_user_emails list
- Any of the user's external group IDs overlap with the node's external_user_group_ids
Args:
db_session: SQLAlchemy session
source: Document source type
user_email: User's email for permission checking
external_group_ids: User's external group IDs for permission checking
Returns:
List of HierarchyNode objects the user has access to
"""
stmt = select(HierarchyNode).where(HierarchyNode.source == source)
stmt = stmt.where(_build_hierarchy_access_filter(user_email, external_group_ids))
stmt = stmt.order_by(HierarchyNode.display_name)
return list(db_session.execute(stmt).scalars().all())

View File

@@ -1,7 +1,6 @@
"""Database and cache operations for the license table."""
from datetime import datetime
from typing import NamedTuple
from sqlalchemy import func
from sqlalchemy import select
@@ -10,7 +9,6 @@ from sqlalchemy.orm import Session
from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.auth.schemas import UserRole
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
@@ -25,13 +23,6 @@ LICENSE_METADATA_KEY = "license:metadata"
LICENSE_CACHE_TTL_SECONDS = 86400 # 24 hours
class SeatAvailabilityResult(NamedTuple):
"""Result of a seat availability check."""
available: bool
error_message: str | None = None
# -----------------------------------------------------------------------------
# Database CRUD Operations
# -----------------------------------------------------------------------------
@@ -104,30 +95,23 @@ def delete_license(db_session: Session) -> bool:
def get_used_seats(tenant_id: str | None = None) -> int:
"""
Get current seat usage directly from database.
Get current seat usage.
For multi-tenant: counts users in UserTenantMapping for this tenant.
For self-hosted: counts all active users (excludes EXT_PERM_USER role).
TODO: Exclude API key dummy users from seat counting. API keys create
users with emails like `__DANSWER_API_KEY_*` that should not count toward
seat limits. See: https://linear.app/onyx-app/issue/ENG-3518
For self-hosted: counts all active users (includes both Onyx UI users
and Slack users who have been converted to Onyx users).
"""
if MULTI_TENANT:
from ee.onyx.server.tenants.user_mapping import get_tenant_count
return get_tenant_count(tenant_id or get_current_tenant_id())
else:
# Self-hosted: count all active users (Onyx + converted Slack users)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
with get_session_with_current_tenant() as db_session:
result = db_session.execute(
select(func.count())
.select_from(User)
.where(
User.is_active == True, # type: ignore # noqa: E712
User.role != UserRole.EXT_PERM_USER,
)
select(func.count()).select_from(User).where(User.is_active) # type: ignore
)
return result.scalar() or 0
@@ -227,10 +211,10 @@ def update_license_cache(
stripe_subscription_id=payload.stripe_subscription_id,
)
redis_client.set(
redis_client.setex(
LICENSE_METADATA_KEY,
LICENSE_CACHE_TTL_SECONDS,
metadata.model_dump_json(),
ex=LICENSE_CACHE_TTL_SECONDS,
)
logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
@@ -292,43 +276,3 @@ def get_license_metadata(
# Refresh from database
return refresh_license_cache(db_session, tenant_id)
def check_seat_availability(
db_session: Session,
seats_needed: int = 1,
tenant_id: str | None = None,
) -> SeatAvailabilityResult:
"""
Check if there are enough seats available to add users.
Args:
db_session: Database session
seats_needed: Number of seats needed (default 1)
tenant_id: Tenant ID (for multi-tenant deployments)
Returns:
SeatAvailabilityResult with available=True if seats are available,
or available=False with error_message if limit would be exceeded.
Returns available=True if no license exists (self-hosted = unlimited).
"""
metadata = get_license_metadata(db_session, tenant_id)
# No license = no enforcement (self-hosted without license)
if metadata is None:
return SeatAvailabilityResult(available=True)
# Calculate current usage directly from DB (not cache) for accuracy
current_used = get_used_seats(tenant_id)
total_seats = metadata.seats
# Use > (not >=) to allow filling to exactly 100% capacity
would_exceed_limit = current_used + seats_needed > total_seats
if would_exceed_limit:
return SeatAvailabilityResult(
available=False,
error_message=f"Seat limit would be exceeded: {current_used} of {total_seats} seats used, "
f"cannot add {seats_needed} more user(s).",
)
return SeatAvailabilityResult(available=True)

View File

@@ -3,42 +3,30 @@ from uuid import UUID
from sqlalchemy.orm import Session
from onyx.configs.constants import NotificationType
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import PersonaSharedNotificationData
def update_persona_access(
def make_persona_private(
persona_id: int,
creator_user_id: UUID | None,
user_ids: list[UUID] | None,
group_ids: list[int] | None,
db_session: Session,
is_public: bool | None = None,
user_ids: list[UUID] | None = None,
group_ids: list[int] | None = None,
) -> None:
"""Updates the access settings for a persona including public status, user shares,
and group shares.
"""NOTE(rkuo): This function batches all updates into a single commit. If we don't
dedupe the inputs, the commit will exception."""
NOTE: This function batches all updates. If we don't dedupe the inputs,
the commit will exception.
NOTE: Callers are responsible for committing."""
if is_public is not None:
persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
if persona:
persona.is_public = is_public
# NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
# and a non-empty list means "replace with these shares".
if user_ids is not None:
db_session.query(Persona__User).filter(
Persona__User.persona_id == persona_id
).delete(synchronize_session="fetch")
db_session.query(Persona__User).filter(
Persona__User.persona_id == persona_id
).delete(synchronize_session="fetch")
db_session.query(Persona__UserGroup).filter(
Persona__UserGroup.persona_id == persona_id
).delete(synchronize_session="fetch")
if user_ids:
user_ids_set = set(user_ids)
for user_id in user_ids_set:
db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
@@ -53,13 +41,11 @@ def update_persona_access(
).model_dump(),
)
if group_ids is not None:
db_session.query(Persona__UserGroup).filter(
Persona__UserGroup.persona_id == persona_id
).delete(synchronize_session="fetch")
if group_ids:
group_ids_set = set(group_ids)
for group_id in group_ids_set:
db_session.add(
Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
)
db_session.commit()

View File

@@ -1,64 +0,0 @@
import uuid
from datetime import timedelta
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import SearchQuery
def create_search_query(
db_session: Session,
user_id: UUID,
query: str,
query_expansions: list[str] | None = None,
) -> SearchQuery:
"""Create and persist a `SearchQuery` row.
Notes:
- `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
- `created_at` is filled by the DB (server_default=now()).
"""
search_query = SearchQuery(
id=uuid.uuid4(),
user_id=user_id,
query=query,
query_expansions=query_expansions,
)
db_session.add(search_query)
db_session.commit()
db_session.refresh(search_query)
return search_query
def fetch_search_queries_for_user(
db_session: Session,
user_id: UUID,
filter_days: int | None = None,
limit: int | None = None,
) -> list[SearchQuery]:
"""Fetch `SearchQuery` rows for a user.
Args:
user_id: User UUID.
filter_days: Optional time filter. If provided, only rows created within
the last `filter_days` days are returned.
limit: Optional max number of rows to return.
"""
if filter_days is not None and filter_days <= 0:
raise ValueError("filter_days must be > 0")
stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)
if filter_days is not None and filter_days > 0:
cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
stmt = stmt.where(SearchQuery.created_at >= cutoff)
stmt = stmt.order_by(SearchQuery.created_at.desc())
if limit is not None:
stmt = stmt.limit(limit)
return list(db_session.scalars(stmt).all())

View File

@@ -7,6 +7,7 @@ from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
from onyx.configs.app_configs import DISABLE_AUTH
from onyx.configs.constants import TokenRateLimitScope
from onyx.db.models import TokenRateLimit
from onyx.db.models import TokenRateLimit__UserGroup
@@ -17,15 +18,13 @@ from onyx.db.models import UserRole
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Select:
if user.role == UserRole.ADMIN:
def _add_user_filters(
stmt: Select, user: User | None, get_editable: bool = True
) -> Select:
# If user is None and auth is disabled, assume the user is an admin
if (user is None and DISABLE_AUTH) or (user and user.role == UserRole.ADMIN):
return stmt
# If anonymous user, only show global/public token_rate_limits
if user.is_anonymous:
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
return stmt.where(where_clause)
stmt = stmt.distinct()
TRLimit_UG = aliased(TokenRateLimit__UserGroup)
User__UG = aliased(User__UserGroup)
@@ -50,6 +49,11 @@ def _add_user_filters(stmt: Select, user: User, get_editable: bool = True) -> Se
- if we are not editing, we show all token_rate_limits in the groups the user curates
"""
# If user is None, this is an anonymous user and we should only show public token_rate_limits
if user is None:
where_clause = TokenRateLimit.scope == TokenRateLimitScope.GLOBAL
return stmt.where(where_clause)
where_clause = User__UG.user_id == user.id
if user.role == UserRole.CURATOR and get_editable:
where_clause &= User__UG.is_curator == True # noqa: E712
@@ -110,7 +114,7 @@ def insert_user_group_token_rate_limit(
def fetch_user_group_token_rate_limits_for_user(
db_session: Session,
group_id: int,
user: User,
user: User | None,
enabled_only: bool = False,
ordered: bool = True,
get_editable: bool = True,

View File

@@ -125,7 +125,7 @@ def _cleanup_document_set__user_group_relationships__no_commit(
def validate_object_creation_for_user(
db_session: Session,
user: User,
user: User | None,
target_group_ids: list[int] | None = None,
object_is_public: bool | None = None,
object_is_perm_sync: bool | None = None,
@@ -144,8 +144,7 @@ def validate_object_creation_for_user(
if object_is_perm_sync and not target_group_ids:
return
# Admins are allowed
if user.role == UserRole.ADMIN:
if not user or user.role == UserRole.ADMIN:
return
# Allow curators and global curators to create public objects
@@ -475,15 +474,14 @@ def remove_curator_status__no_commit(db_session: Session, user: User) -> None:
def _validate_curator_relationship_update_requester(
db_session: Session,
user_group_id: int,
user_making_change: User,
user_making_change: User | None = None,
) -> None:
"""
This function validates that the user making the change has the necessary permissions
to update the curator relationship for the target user in the given user group.
"""
# Admins can update curator relationships for any group
if user_making_change.role == UserRole.ADMIN:
if user_making_change is None or user_making_change.role == UserRole.ADMIN:
return
# check if the user making the change is a curator in the group they are changing the curator relationship for
@@ -552,7 +550,7 @@ def update_user_curator_relationship(
db_session: Session,
user_group_id: int,
set_curator_request: SetCuratorRequest,
user_making_change: User,
user_making_change: User | None = None,
) -> None:
target_user = fetch_user_by_id(db_session, set_curator_request.user_id)
if not target_user:
@@ -601,7 +599,7 @@ def update_user_curator_relationship(
def add_users_to_user_group(
db_session: Session,
user: User,
user: User | None,
user_group_id: int,
user_ids: list[UUID],
) -> UserGroup:
@@ -643,7 +641,7 @@ def add_users_to_user_group(
def update_user_group(
db_session: Session,
user: User,
user: User | None,
user_group_id: int,
user_group_update: UserGroupUpdate,
) -> UserGroup:

View File

@@ -8,7 +8,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.confluence.connector import ConfluenceConnector
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
@@ -28,7 +28,7 @@ def confluence_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
"""
Fetches document permissions from Confluence and yields DocExternalAccess objects.
Compares fetched documents against existing documents in the DB for the connector.

View File

@@ -5,12 +5,8 @@ from datetime import timezone
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.gmail.connector import GmailConnector
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
@@ -42,12 +38,12 @@ def gmail_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
"""
Adds the external permissions to the documents and hierarchy nodes in postgres.
If the document doesn't already exist in postgres, we create
Adds the external permissions to the documents in postgres
if the document doesn't already exists in postgres, we create
it in postgres so that when it gets created later, the permissions are
already populated.
already populated
"""
gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
gmail_connector.load_credentials(cc_pair.credential.credential_json)
@@ -64,15 +60,6 @@ def gmail_doc_sync(
callback.progress("gmail_doc_sync", 1)
if isinstance(slim_doc, HierarchyNode):
# Yield hierarchy node permissions to be processed in outer layer
if slim_doc.external_access:
yield NodeExternalAccess(
external_access=slim_doc.external_access,
raw_node_id=slim_doc.raw_node_id,
source=DocumentSource.GMAIL.value,
)
continue
if slim_doc.external_access is None:
logger.warning(f"No permissions found for document {slim_doc.id}")
continue

View File

@@ -10,15 +10,11 @@ from ee.onyx.external_permissions.google_drive.permission_retrieval import (
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import GoogleDriveFileType
from onyx.connectors.google_utils.resources import GoogleDriveService
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
@@ -171,101 +167,17 @@ def get_external_access_for_raw_gdrive_file(
)
def get_external_access_for_folder(
folder: GoogleDriveFileType,
google_domain: str,
drive_service: GoogleDriveService,
) -> ExternalAccess:
"""
Extract ExternalAccess from a folder's permissions.
This fetches permissions using the Drive API (via permissionIds) and extracts
user emails, group emails, and public access status.
Args:
folder: The folder metadata from Google Drive API (must include permissionIds field)
google_domain: The company's Google Workspace domain (e.g., "company.com")
drive_service: Google Drive service for fetching permission details
Returns:
ExternalAccess with extracted permission info
"""
folder_id = folder.get("id")
if not folder_id:
logger.warning("Folder missing ID, returning empty permissions")
return ExternalAccess(
external_user_emails=set(),
external_user_group_ids=set(),
is_public=False,
)
# Get permission IDs from folder metadata
permission_ids = folder.get("permissionIds") or []
if not permission_ids:
logger.debug(f"No permissionIds found for folder {folder_id}")
return ExternalAccess(
external_user_emails=set(),
external_user_group_ids=set(),
is_public=False,
)
# Fetch full permission objects using the permission IDs
permissions_list = get_permissions_by_ids(
drive_service=drive_service,
doc_id=folder_id,
permission_ids=permission_ids,
)
user_emails: set[str] = set()
group_emails: set[str] = set()
is_public = False
for permission in permissions_list:
if permission.type == PermissionType.USER:
if permission.email_address:
user_emails.add(permission.email_address)
else:
logger.warning(f"User permission without email for folder {folder_id}")
elif permission.type == PermissionType.GROUP:
# Groups are represented as email addresses in Google Drive
if permission.email_address:
group_emails.add(permission.email_address)
else:
logger.warning(f"Group permission without email for folder {folder_id}")
elif permission.type == PermissionType.DOMAIN:
# Domain permission - check if it matches company domain
if permission.domain == google_domain:
# Only public if discoverable (allowFileDiscovery is not False)
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
else:
logger.debug(
f"Domain permission for {permission.domain} does not match "
f"company domain {google_domain} for folder {folder_id}"
)
elif permission.type == PermissionType.ANYONE:
# Only public if discoverable (allowFileDiscovery is not False)
# If allowFileDiscovery is False, it's "link only" access
is_public = permission.allow_file_discovery is not False
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_emails,
is_public=is_public,
)
def gdrive_doc_sync(
cc_pair: ConnectorCredentialPair,
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
"""
Adds the external permissions to the documents and hierarchy nodes in postgres.
If the document doesn't already exist in postgres, we create
Adds the external permissions to the documents in postgres
if the document doesn't already exists in postgres, we create
it in postgres so that when it gets created later, the permissions are
already populated.
already populated
"""
google_drive_connector = GoogleDriveConnector(
**cc_pair.connector.connector_specific_config
@@ -283,15 +195,7 @@ def gdrive_doc_sync(
raise RuntimeError("gdrive_doc_sync: Stop signal detected")
callback.progress("gdrive_doc_sync", 1)
if isinstance(slim_doc, HierarchyNode):
# Yield hierarchy node permissions to be processed in outer layer
if slim_doc.external_access:
yield NodeExternalAccess(
external_access=slim_doc.external_access,
raw_node_id=slim_doc.raw_node_id,
source=DocumentSource.GOOGLE_DRIVE.value,
)
continue
if slim_doc.external_access is None:
raise ValueError(
f"Drive perm sync: No external access for document {slim_doc.id}"

View File

@@ -30,10 +30,6 @@ class GoogleDrivePermission(BaseModel):
type: PermissionType
domain: str | None # only applies to domain permissions
permission_details: GoogleDrivePermissionDetails | None
# Whether this permission makes the file discoverable in search
# False means "anyone with the link" (not searchable/discoverable)
# Only applicable for domain/anyone permission types
allow_file_discovery: bool | None
@classmethod
def from_drive_permission(
@@ -50,7 +46,6 @@ class GoogleDrivePermission(BaseModel):
email_address=drive_permission.get("emailAddress"),
type=PermissionType(drive_permission["type"]),
domain=drive_permission.get("domain"),
allow_file_discovery=drive_permission.get("allowFileDiscovery"),
permission_details=(
GoogleDrivePermissionDetails(
permission_type=permission_details.get("type"),

View File

@@ -36,7 +36,7 @@ def get_permissions_by_ids(
retrieval_function=drive_service.permissions().list,
list_key="permissions",
fileId=doc_id,
fields="permissions(id, emailAddress, type, domain, allowFileDiscovery, permissionDetails),nextPageToken",
fields="permissions(id, emailAddress, type, domain, permissionDetails),nextPageToken",
supportsAllDrives=True,
continue_on_404_or_403=True,
)

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.jira.connector import JiraConnector
from onyx.db.models import ConnectorCredentialPair
@@ -20,7 +20,7 @@ def jira_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
jira_connector = JiraConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -5,8 +5,6 @@ from typing import Protocol
from ee.onyx.db.external_perm import ExternalUserGroup # noqa
from onyx.access.models import DocExternalAccess # noqa
from onyx.access.models import ElementExternalAccess # noqa
from onyx.access.models import NodeExternalAccess # noqa
from onyx.context.search.models import InferenceChunk
from onyx.db.models import ConnectorCredentialPair # noqa
from onyx.db.utils import DocumentRow
@@ -55,7 +53,7 @@ DocSyncFuncType = Callable[
FetchAllDocumentsIdsFunction,
Optional[IndexingHeartbeatInterface],
],
Generator[ElementExternalAccess, None, None],
Generator[DocExternalAccess, None, None],
]
GroupSyncFuncType = Callable[

View File

@@ -34,21 +34,21 @@ def _get_all_censoring_enabled_sources() -> set[DocumentSource]:
# NOTE: This is only called if ee is enabled.
def _post_query_chunk_censoring(
chunks: list[InferenceChunk],
user: User,
user: User | None,
) -> list[InferenceChunk]:
"""
This function checks all chunks to see if they need to be sent to a censoring
function. If they do, it sends them to the censoring function and returns the
censored chunks. If they don't, it returns the original chunks.
"""
sources_to_censor = _get_all_censoring_enabled_sources()
# Anonymous users can only access public (non-permission-synced) content
if user.is_anonymous:
return [chunk for chunk in chunks if chunk.source_type not in sources_to_censor]
if user is None:
# if user is None, permissions are not enforced
return chunks
final_chunk_dict: dict[str, InferenceChunk] = {}
chunks_to_process: dict[DocumentSource, list[InferenceChunk]] = {}
sources_to_censor = _get_all_censoring_enabled_sources()
for chunk in chunks:
# Separate out chunks that require permission post-processing by source
if chunk.source_type in sources_to_censor:

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.models import ConnectorCredentialPair
@@ -20,7 +20,7 @@ def sharepoint_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None = None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
sharepoint_connector = SharepointConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -8,7 +8,6 @@ from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
from onyx.access.models import DocExternalAccess
from onyx.access.models import ExternalAccess
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.models import HierarchyNode
from onyx.connectors.slack.connector import get_channels
from onyx.connectors.slack.connector import make_paginated_slack_api_call
from onyx.connectors.slack.connector import SlackConnector
@@ -112,9 +111,6 @@ def _get_slack_document_access(
for doc_metadata_batch in slim_doc_generator:
for doc_metadata in doc_metadata_batch:
if isinstance(doc_metadata, HierarchyNode):
# TODO: handle hierarchynodes during sync
continue
if doc_metadata.external_access is None:
raise ValueError(
f"No external access for document {doc_metadata.id}. "

View File

@@ -3,7 +3,7 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from ee.onyx.external_permissions.utils import generic_doc_sync
from onyx.access.models import ElementExternalAccess
from onyx.access.models import DocExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.teams.connector import TeamsConnector
from onyx.db.models import ConnectorCredentialPair
@@ -21,7 +21,7 @@ def teams_doc_sync(
fetch_all_existing_docs_fn: FetchAllDocumentsFunction,
fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
callback: IndexingHeartbeatInterface | None,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
teams_connector = TeamsConnector(
**cc_pair.connector.connector_specific_config,
)

View File

@@ -2,12 +2,9 @@ from collections.abc import Generator
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFunction
from onyx.access.models import DocExternalAccess
from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.utils.logger import setup_logger
@@ -22,7 +19,7 @@ def generic_doc_sync(
doc_source: DocumentSource,
slim_connector: SlimConnectorWithPermSync,
label: str,
) -> Generator[ElementExternalAccess, None, None]:
) -> Generator[DocExternalAccess, None, None]:
"""
A convenience function for performing a generic document synchronization.
@@ -32,7 +29,7 @@ def generic_doc_sync(
- fetching *all* new (slim) docs
- yielding external-access permissions for existing docs which do not exist in the newly fetched slim-docs set (with their
`external_access` set to "private")
- yielding external-access permissions for newly fetched docs and hierarchy nodes
- yielding external-access permissions for newly fetched docs
Returns:
A `Generator` which yields existing and newly fetched external-access permissions.
@@ -52,15 +49,6 @@ def generic_doc_sync(
callback.progress(label, 1)
for doc in doc_batch:
if isinstance(doc, HierarchyNode):
# Yield hierarchy node permissions to be processed in outer layer
if doc.external_access:
yield NodeExternalAccess(
external_access=doc.external_access,
raw_node_id=doc.raw_node_id,
source=doc_source.value,
)
continue
if not doc.external_access:
raise RuntimeError(
f"No external access found for document ID; {cc_pair.id=} {doc_source=} {doc.id=}"

View File

@@ -4,10 +4,8 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from httpx_oauth.clients.google import GoogleOAuth2
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.server.analytics.api import router as analytics_router
from ee.onyx.server.auth_check import check_ee_router_auth
from ee.onyx.server.billing.api import router as billing_router
from ee.onyx.server.documents.cc_pair import router as ee_document_cc_pair_router
from ee.onyx.server.enterprise_settings.api import (
admin_router as enterprise_settings_admin_router,
@@ -18,17 +16,16 @@ from ee.onyx.server.enterprise_settings.api import (
from ee.onyx.server.evals.api import router as evals_router
from ee.onyx.server.license.api import router as license_router
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
from ee.onyx.server.middleware.license_enforcement import (
add_license_enforcement_middleware,
)
from ee.onyx.server.middleware.tenant_tracking import (
add_api_server_tenant_id_middleware,
)
from ee.onyx.server.oauth.api import router as ee_oauth_router
from ee.onyx.server.query_and_chat.chat_backend import (
router as chat_router,
)
from ee.onyx.server.query_and_chat.query_backend import (
basic_router as ee_query_router,
)
from ee.onyx.server.query_and_chat.search_backend import router as search_router
from ee.onyx.server.query_history.api import router as query_history_router
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
from ee.onyx.server.seeding import seed_db
@@ -87,11 +84,6 @@ def get_application() -> FastAPI:
if MULTI_TENANT:
add_api_server_tenant_id_middleware(application, logger)
else:
# License enforcement middleware for self-hosted deployments only
# Checks LICENSE_ENFORCEMENT_ENABLED at runtime (can be toggled without restart)
# MT deployments use control plane gating via is_tenant_gated() instead
add_license_enforcement_middleware(application, logger)
if AUTH_TYPE == AuthType.CLOUD:
# For Google OAuth, refresh tokens are requested by:
@@ -132,7 +124,7 @@ def get_application() -> FastAPI:
# EE only backend APIs
include_router_with_global_prefix_prepended(application, query_router)
include_router_with_global_prefix_prepended(application, ee_query_router)
include_router_with_global_prefix_prepended(application, search_router)
include_router_with_global_prefix_prepended(application, chat_router)
include_router_with_global_prefix_prepended(application, standard_answer_router)
include_router_with_global_prefix_prepended(application, ee_oauth_router)
include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
@@ -151,13 +143,6 @@ def get_application() -> FastAPI:
# License management
include_router_with_global_prefix_prepended(application, license_router)
# Unified billing API - available when license system is enabled
# Works for both self-hosted and cloud deployments
# TODO(ENG-3533): Once frontend migrates to /admin/billing/*, this becomes the
# primary billing API and /tenants/* billing endpoints can be removed
if LICENSE_ENFORCEMENT_ENABLED:
include_router_with_global_prefix_prepended(application, billing_router)
if MULTI_TENANT:
# Tenant management
include_router_with_global_prefix_prepended(application, tenants_router)

View File

@@ -1,27 +0,0 @@
# Single message is likely most reliable and generally better for this task
# No final reminders at the end since the user query is expected to be short
# If it is not short, it should go into the chat flow so we do not need to account for this.
KEYWORD_EXPANSION_PROMPT = """
Generate a set of keyword-only queries to help find relevant documents for the provided query. \
These queries will be passed to a bm25-based keyword search engine. \
Provide a single query per line (where each query consists of one or more keywords). \
The queries must be purely keywords and not contain any filler natural language. \
The each query should have as few keywords as necessary to represent the user's search intent. \
If there are no useful expansions, simply return the original query with no additional keyword queries. \
CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.
The user query is:
{user_query}
""".strip()
QUERY_TYPE_PROMPT = """
Determine if the provided query is better suited for a keyword search or a semantic search.
Respond with "keyword" or "semantic" literally and nothing else.
Do not provide any additional text or reasoning to your response.
CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".
The user query is:
{user_query}
""".strip()

View File

@@ -1,42 +0,0 @@
# ruff: noqa: E501, W605 start
SEARCH_CLASS = "search"
CHAT_CLASS = "chat"
# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
# use cases.
SEARCH_CHAT_PROMPT = f"""
Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".
# Classification Guidelines:
## {SEARCH_CLASS}
- If the query consists entirely of keywords or query doesn't require any answer from the AI
- If the query is a short statement that seems like a search query rather than a question
- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document
### Examples of {SEARCH_CLASS} queries:
- Find me the document that goes over the onboarding process for a new hire
- Pull requests since last week
- Sales Runbook AMEA Region
- Procurement process
- Retrieve the PRD for project X
## {CHAT_CLASS}
- If the query is asking a question that requires an answer rather than a document
- If the query is asking for a solution, suggestion, or general help
- If the query is seeking information that is on the web and likely not in a company internal document
- If the query should be answered without any context from additional documents or searches
### Examples of {CHAT_CLASS} queries:
- What led us to win the deal with company X? (seeking answer)
- Google Drive not sync-ing files to my computer (seeking solution)
- Review my email: <whatever the email is> (general help)
- Write me a script to... (general help)
- Cheap flights Europe to Tokyo (information likely found on the web, not internal)
# User Query:
{{user_query}}
REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
""".strip()
# ruff: noqa: E501, W605 end

View File

@@ -1,285 +0,0 @@
from collections.abc import Generator
from sqlalchemy.orm import Session
from ee.onyx.db.search import create_search_query
from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import InferenceChunk
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.factory import get_default_llm
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
from onyx.tools.tool_implementations.search.search_utils import (
weighted_reciprocal_rank_fusion,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
logger = setup_logger()
# This is just a heuristic that also happens to work well for the UI/UX
# Users would not find it useful to see a huge list of suggested docs
# but more than 1 is also likely good as many questions may target more than 1 doc.
TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3
def _run_single_search(
query: str,
filters: BaseFilters | None,
document_index: DocumentIndex,
user: User,
db_session: Session,
num_hits: int | None = None,
) -> list[InferenceChunk]:
"""Execute a single search query and return chunks."""
chunk_search_request = ChunkSearchRequest(
query=query,
user_selected_filters=filters,
limit=num_hits,
)
return search_pipeline(
chunk_search_request=chunk_search_request,
document_index=document_index,
user=user,
persona=None, # No persona for direct search
db_session=db_session,
)
def stream_search_query(
request: SendSearchQueryRequest,
user: User,
db_session: Session,
) -> Generator[
SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
None,
None,
]:
"""
Core search function that yields streaming packets.
Used by both streaming and non-streaming endpoints.
"""
# Get document index
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
# Determine queries to execute
original_query = request.search_query
keyword_expansions: list[str] = []
if request.run_query_expansion:
try:
llm = get_default_llm()
keyword_expansions = expand_keywords(
user_query=original_query,
llm=llm,
)
if keyword_expansions:
logger.debug(
f"Query expansion generated {len(keyword_expansions)} keyword queries"
)
except Exception as e:
logger.warning(f"Query expansion failed: {e}; using original query only.")
keyword_expansions = []
# Build list of all executed queries for tracking
all_executed_queries = [original_query] + keyword_expansions
if not user.is_anonymous:
create_search_query(
db_session=db_session,
user_id=user.id,
query=request.search_query,
query_expansions=keyword_expansions if keyword_expansions else None,
)
# Execute search(es)
if not keyword_expansions:
# Single query (original only) - no threading needed
chunks = _run_single_search(
query=original_query,
filters=request.filters,
document_index=document_index,
user=user,
db_session=db_session,
num_hits=request.num_hits,
)
else:
# Multiple queries - run in parallel and merge with RRF
# First query is the original (semantic), rest are keyword expansions
search_functions = [
(
_run_single_search,
(
query,
request.filters,
document_index,
user,
db_session,
request.num_hits,
),
)
for query in all_executed_queries
]
# Run all searches in parallel
all_search_results: list[list[InferenceChunk]] = (
run_functions_tuples_in_parallel(
search_functions,
allow_failures=True,
)
)
# Separate original query results from keyword expansion results
# Note that in rare cases, the original query may have failed and so we may be
# just overweighting one set of keyword results, should be not a big deal though.
original_result = all_search_results[0] if all_search_results else []
keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []
# Build valid results and weights
# Original query (semantic): weight 2.0
# Keyword expansions: weight 1.0 each
valid_results: list[list[InferenceChunk]] = []
weights: list[float] = []
if original_result:
valid_results.append(original_result)
weights.append(2.0)
for keyword_result in keyword_results:
if keyword_result:
valid_results.append(keyword_result)
weights.append(1.0)
if not valid_results:
logger.warning("All parallel searches returned empty results")
chunks = []
else:
chunks = weighted_reciprocal_rank_fusion(
ranked_results=valid_results,
weights=weights,
id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
)
# Merge chunks into sections
sections = merge_individual_chunks(chunks)
# Truncate to the requested number of hits
sections = sections[: request.num_hits]
# Apply LLM document selection if requested
# num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
# The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
# llm_selected_doc_ids will be:
# - None if LLM selection was not requested or failed
# - Empty list if LLM selection ran but selected nothing
# - List of doc IDs if LLM selection succeeded
run_llm_selection = (
request.num_docs_fed_to_llm_selection is not None
and request.num_docs_fed_to_llm_selection >= 1
)
llm_selected_doc_ids: list[str] | None = None
llm_selection_failed = False
if run_llm_selection and sections:
try:
llm = get_default_llm()
sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
selected_sections, _ = select_sections_for_expansion(
sections=sections_to_evaluate,
user_query=original_query,
llm=llm,
max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
try_to_fill_to_max=True,
)
# Extract unique document IDs from selected sections (may be empty)
llm_selected_doc_ids = list(
dict.fromkeys(
section.center_chunk.document_id for section in selected_sections
)
)
logger.debug(
f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
)
except Exception as e:
# Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
logger.warning(f"LLM document selection failed: {e}")
llm_selection_failed = True
elif run_llm_selection and not sections:
# LLM selection requested but no sections to evaluate
llm_selected_doc_ids = []
# Convert to SearchDocWithContent list, optionally including content
search_docs = SearchDocWithContent.from_inference_sections(
sections,
include_content=request.include_content,
is_internet=False,
)
# Yield queries packet
yield SearchQueriesPacket(all_executed_queries=all_executed_queries)
# Yield docs packet
yield SearchDocsPacket(search_docs=search_docs)
# Yield LLM selected docs packet if LLM selection was requested
# - llm_selected_doc_ids is None if selection failed
# - llm_selected_doc_ids is empty list if no docs were selected
# - llm_selected_doc_ids is list of IDs if docs were selected
if run_llm_selection:
yield LLMSelectedDocsPacket(
llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
)
def gather_search_stream(
packets: Generator[
SearchQueriesPacket
| SearchDocsPacket
| LLMSelectedDocsPacket
| SearchErrorPacket,
None,
None,
],
) -> SearchFullResponse:
"""
Aggregate all streaming packets into SearchFullResponse.
"""
all_executed_queries: list[str] = []
search_docs: list[SearchDocWithContent] = []
llm_selected_doc_ids: list[str] | None = None
error: str | None = None
for packet in packets:
if isinstance(packet, SearchQueriesPacket):
all_executed_queries = packet.all_executed_queries
elif isinstance(packet, SearchDocsPacket):
search_docs = packet.search_docs
elif isinstance(packet, LLMSelectedDocsPacket):
llm_selected_doc_ids = packet.llm_selected_doc_ids
elif isinstance(packet, SearchErrorPacket):
error = packet.error
return SearchFullResponse(
all_executed_queries=all_executed_queries,
search_docs=search_docs,
doc_selection_reasoning=None,
llm_selected_doc_ids=llm_selected_doc_ids,
error=error,
)

View File

@@ -1,92 +0,0 @@
import re
from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')
def _clean_keyword_line(line: str) -> str:
"""Clean a keyword line by removing common LLM artifacts.
Removes brackets, quotes, and other characters that LLMs may accidentally
include in their output.
"""
# Remove common artifacts
cleaned = CLEANUP_PATTERN.sub("", line)
# Remove leading list markers like "1.", "2.", "-", "*"
cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
return cleaned.strip()
def expand_keywords(
user_query: str,
llm: LLM,
) -> list[str]:
"""Expand a user query into multiple keyword-only queries for BM25 search.
Uses an LLM to generate keyword-based search queries that capture different
aspects of the user's search intent. Returns only the expanded queries,
not the original query.
Args:
user_query: The original search query from the user
llm: Language model to use for keyword expansion
Returns:
List of expanded keyword queries (excluding the original query).
Returns empty list if expansion fails or produces no useful expansions.
"""
messages: LanguageModelInput = [
UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
]
try:
response = llm.invoke(
prompt=messages,
reasoning_effort=ReasoningEffort.OFF,
# Limit output - we only expect a few short keyword queries
max_tokens=150,
)
content = llm_response_to_string(response).strip()
if not content:
logger.warning("Keyword expansion returned empty response.")
return []
# Parse response - each line is a separate keyword query
# Clean each line to remove LLM artifacts and drop empty lines
parsed_queries = []
for line in content.strip().split("\n"):
cleaned = _clean_keyword_line(line)
if cleaned:
parsed_queries.append(cleaned)
if not parsed_queries:
logger.warning("Keyword expansion parsing returned no queries.")
return []
# Filter out duplicates and queries that match the original
expanded_queries: list[str] = []
seen_lower: set[str] = {user_query.lower()}
for query in parsed_queries:
query_lower = query.lower()
if query_lower not in seen_lower:
seen_lower.add(query_lower)
expanded_queries.append(query)
logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
return expanded_queries
except Exception as e:
logger.warning(f"Keyword expansion failed: {e}")
return []

View File

@@ -1,50 +0,0 @@
from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time
logger = setup_logger()
@log_function_time(print_only=True)
def classify_is_search_flow(
query: str,
llm: LLM,
) -> bool:
messages: LanguageModelInput = [
UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
]
response = llm.invoke(
prompt=messages,
reasoning_effort=ReasoningEffort.OFF,
# Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
timeout_override=2,
# Well more than necessary but just to ensure completion and in case it succeeds with classifying but
# ends up rambling
max_tokens=20,
)
content = llm_response_to_string(response).strip().lower()
if not content:
logger.warning(
"Search flow classification returned empty response; defaulting to chat flow."
)
return False
# Prefer chat if both appear.
if CHAT_CLASS in content:
return False
if SEARCH_CLASS in content:
return True
logger.warning(
"Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
content,
)
return False

View File

@@ -19,9 +19,9 @@ from ee.onyx.db.analytics import fetch_query_analytics
from ee.onyx.db.analytics import user_can_view_assistant_stats
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.utils import PUBLIC_API_TAGS
router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)
@@ -40,7 +40,7 @@ class QueryAnalyticsResponse(BaseModel):
def get_query_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User = Depends(current_admin_user),
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[QueryAnalyticsResponse]:
daily_query_usage_info = fetch_query_analytics(
@@ -71,7 +71,7 @@ class UserAnalyticsResponse(BaseModel):
def get_user_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User = Depends(current_admin_user),
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[UserAnalyticsResponse]:
daily_query_usage_info_per_user = fetch_per_user_query_analytics(
@@ -105,7 +105,7 @@ class OnyxbotAnalyticsResponse(BaseModel):
def get_onyxbot_analytics(
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User = Depends(current_admin_user),
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[OnyxbotAnalyticsResponse]:
daily_onyxbot_info = fetch_onyxbot_analytics(
@@ -141,7 +141,7 @@ def get_persona_messages(
persona_id: int,
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
_: User = Depends(current_admin_user),
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[PersonaMessageAnalyticsResponse]:
"""Fetch daily message counts for a single persona within the given time range."""
@@ -179,7 +179,7 @@ def get_persona_unique_users(
persona_id: int,
start: datetime.datetime,
end: datetime.datetime,
_: User = Depends(current_admin_user),
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> list[PersonaUniqueUsersResponse]:
"""Get unique users per day for a single persona."""
@@ -218,7 +218,7 @@ def get_assistant_stats(
assistant_id: int,
start: datetime.datetime | None = None,
end: datetime.datetime | None = None,
user: User = Depends(current_user),
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> AssistantStatsResponse:
"""

View File

@@ -10,16 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
("/enterprise-settings/logo", {"GET"}),
("/enterprise-settings/logotype", {"GET"}),
("/enterprise-settings/custom-analytics-script", {"GET"}),
# Stripe publishable key is safe to expose publicly
("/tenants/stripe-publishable-key", {"GET"}),
("/admin/billing/stripe-publishable-key", {"GET"}),
# Proxy endpoints use license-based auth, not user auth
("/proxy/create-checkout-session", {"POST"}),
("/proxy/claim-license", {"POST"}),
("/proxy/create-customer-portal-session", {"POST"}),
("/proxy/billing-information", {"GET"}),
("/proxy/license/{tenant_id}", {"GET"}),
("/proxy/seats/update", {"POST"}),
]

Some files were not shown because too many files have changed in this diff Show More