Compare commits

..

6 Commits

Author SHA1 Message Date
Dane Urban
5848975679 Remove comment 2026-01-08 19:21:24 -08:00
Dane Urban
dcc330010e Remove comment 2026-01-08 19:21:08 -08:00
Dane Urban
d0f5f1f5ae Handle error and log 2026-01-08 19:20:28 -08:00
Dane Urban
3e475993ff Change which event loop we get 2026-01-08 19:16:12 -08:00
Dane Urban
7c2b5fa822 Change loggin 2026-01-08 17:29:00 -08:00
Dane Urban
409cfdc788 nits 2026-01-08 17:23:08 -08:00
1172 changed files with 31257 additions and 124974 deletions

View File

@@ -8,9 +8,7 @@ on:
# Set restrictive default permissions for all jobs. Jobs that need more permissions
# should explicitly declare them.
permissions:
# Required for OIDC authentication with AWS
id-token: write # zizmor: ignore[excessive-permissions]
permissions: {}
env:
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
@@ -152,30 +150,16 @@ jobs:
if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'
runs-on: ubuntu-slim
timeout-minutes: 10
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
@@ -184,7 +168,6 @@ jobs:
needs: determine-builds
if: needs.determine-builds.outputs.build-desktop == 'true'
permissions:
id-token: write
contents: write
actions: read
strategy:
@@ -202,33 +185,12 @@ jobs:
runs-on: ${{ matrix.platform }}
timeout-minutes: 90
environment: release
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
persist-credentials: true # zizmor: ignore[artipacked]
- name: Configure AWS credentials
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
APPLE_ID, deploy/apple-id
APPLE_PASSWORD, deploy/apple-password
APPLE_CERTIFICATE, deploy/apple-certificate
APPLE_CERTIFICATE_PASSWORD, deploy/apple-certificate-password
KEYCHAIN_PASSWORD, deploy/keychain-password
APPLE_TEAM_ID, deploy/apple-team-id
parse-json-secrets: true
- name: install dependencies (ubuntu only)
if: startsWith(matrix.platform, 'ubuntu-')
run: |
@@ -323,40 +285,15 @@ jobs:
Write-Host "Versions set to: $VERSION"
- name: Import Apple Developer Certificate
if: startsWith(matrix.platform, 'macos-')
run: |
echo $APPLE_CERTIFICATE | base64 --decode > certificate.p12
security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
security default-keychain -s build.keychain
security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
security set-keychain-settings -t 3600 -u build.keychain
security import certificate.p12 -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign
security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain
security find-identity -v -p codesigning build.keychain
- name: Verify Certificate
if: startsWith(matrix.platform, 'macos-')
run: |
CERT_INFO=$(security find-identity -v -p codesigning build.keychain | grep -E "(Developer ID Application|Apple Distribution|Apple Development)" | head -n 1)
CERT_ID=$(echo "$CERT_INFO" | awk -F'"' '{print $2}')
echo "CERT_ID=$CERT_ID" >> $GITHUB_ENV
echo "Certificate imported."
- uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1
- uses: tauri-apps/tauri-action@19b93bb55601e3e373a93cfb6eb4242e45f5af20 # ratchet:tauri-apps/tauri-action@action-v0.6.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
APPLE_ID: ${{ env.APPLE_ID }}
APPLE_PASSWORD: ${{ env.APPLE_PASSWORD }}
APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}
APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}
with:
tagName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseBody: "See the assets to download this version and install."
releaseDraft: true
prerelease: false
assetNamePattern: "[name]_[arch][ext]"
args: ${{ matrix.args }}
build-web-amd64:
@@ -368,7 +305,6 @@ jobs:
- run-id=${{ github.run_id }}-web-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -381,20 +317,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -404,13 +326,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -441,7 +363,6 @@ jobs:
- run-id=${{ github.run_id }}-web-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -454,20 +375,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -477,13 +384,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -516,34 +423,19 @@ jobs:
- run-id=${{ github.run_id }}-merge-web
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -579,7 +471,6 @@ jobs:
- run-id=${{ github.run_id }}-web-cloud-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -592,20 +483,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -615,13 +492,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -660,7 +537,6 @@ jobs:
- run-id=${{ github.run_id }}-web-cloud-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -673,20 +549,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -696,13 +558,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -743,34 +605,19 @@ jobs:
- run-id=${{ github.run_id }}-merge-web-cloud
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -803,7 +650,6 @@ jobs:
- run-id=${{ github.run_id }}-backend-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -816,20 +662,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -839,13 +671,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -875,7 +707,6 @@ jobs:
- run-id=${{ github.run_id }}-backend-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -888,20 +719,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -911,13 +728,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -949,34 +766,19 @@ jobs:
- run-id=${{ github.run_id }}-merge-backend
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -1013,7 +815,6 @@ jobs:
- volume=40gb
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -1026,20 +827,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -1049,15 +836,15 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -1092,7 +879,6 @@ jobs:
- volume=40gb
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -1105,20 +891,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -1128,15 +900,15 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -1172,34 +944,19 @@ jobs:
- run-id=${{ github.run_id }}-merge-model-server
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -1237,26 +994,11 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-web
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1272,8 +1014,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1292,26 +1034,11 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-web-cloud
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1327,8 +1054,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1347,7 +1074,6 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-backend
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
steps:
@@ -1358,20 +1084,6 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1388,8 +1100,8 @@ jobs:
-v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1409,26 +1121,11 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-model-server
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1444,8 +1141,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1473,26 +1170,12 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 90
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Determine failed jobs
id: failed-jobs
shell: bash
@@ -1558,7 +1241,7 @@ jobs:
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
title: "🚨 Deployment Workflow Failed"
ref-name: ${{ github.ref_name }}

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -29,7 +29,6 @@ jobs:
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
with:
stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'

View File

@@ -94,7 +94,7 @@ jobs:
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -45,9 +45,6 @@ env:
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
# OpenSearch
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -128,13 +125,11 @@ jobs:
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
-f docker-compose.opensearch.yml \
up -d \
minio \
relational_db \
cache \
index \
opensearch \
code-interpreter
- name: Run migrations
@@ -163,7 +158,7 @@ jobs:
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
# Collect logs from each container
for container in $containers; do
@@ -177,7 +172,7 @@ jobs:
- name: Upload Docker logs
if: failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
with:
name: docker-logs-${{ matrix.test-dir }}
path: docker-logs/

View File

@@ -88,7 +88,6 @@ jobs:
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
@@ -181,11 +180,6 @@ jobs:
trap cleanup EXIT
# Run the actual installation with detailed logging
# Note that opensearch.enabled is true whereas others in this install
# are false. There is some work that needs to be done to get this
# entire step working in CI, enabling opensearch here is a small step
# in that direction. If this is causing issues, disabling it in this
# step should be ok in the short term.
echo "=== Starting ct install ==="
set +e
ct install --all \
@@ -193,8 +187,6 @@ jobs:
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=opensearch.enabled=true \
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \

View File

@@ -103,7 +103,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -310,9 +310,8 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
MCP_SERVER_ENABLED=true
USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
EOF
- name: Start Docker containers
@@ -439,7 +438,7 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log
@@ -568,7 +567,7 @@ jobs:
- name: Upload logs (multi-tenant)
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-multitenant
path: ${{ github.workspace }}/docker-compose-multitenant.log

View File

@@ -44,7 +44,7 @@ jobs:
- name: Upload coverage reports
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: jest-coverage-${{ github.run_id }}
path: ./web/coverage

View File

@@ -95,7 +95,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
@@ -301,7 +301,7 @@ jobs:
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
EOF
- name: Start Docker containers
@@ -424,7 +424,7 @@ jobs:
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs-${{ matrix.test-dir.name }}
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -85,7 +85,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -435,7 +435,7 @@ jobs:
fi
npx playwright test --project ${PROJECT}
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
if: always()
with:
# Includes test results and trace.zip files
@@ -455,7 +455,7 @@ jobs:
- name: Upload logs
if: success() || failure()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
path: ${{ github.workspace }}/docker-compose.log

View File

@@ -5,6 +5,11 @@ on:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
workflow_dispatch:
inputs:
branch:
description: 'Branch to run the workflow on'
required: false
default: 'main'
permissions:
contents: read
@@ -26,11 +31,7 @@ env:
jobs:
model-check:
# See https://runs-on.com/runners/linux/
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-model-check"
- "extras=ecr-cache"
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
timeout-minutes: 45
env:
@@ -42,87 +43,108 @@ jobs:
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
# tag every docker image with "test" so that we can spin up the correct set
# of images during testing
- name: Build and load
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
env:
TAG: model-server-${{ github.run_id }}
# We don't need to build the Web Docker image since it's not yet used
# in the integration tests. We have a separate action to verify that it builds
# successfully.
- name: Pull Model Server Docker image
run: |
docker pull onyxdotapp/onyx-model-server:latest
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
with:
load: true
targets: model-server
set: |
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
python-version: "3.11"
cache: "pip"
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
- name: Start Docker containers
id: start_docker
env:
IMAGE_TAG: model-server-${{ github.run_id }}
run: |
cd deployment/docker_compose
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
up -d --wait \
inference_model_server
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
id: start_docker
- name: Wait for service to be ready
run: |
echo "Starting wait-for-service script..."
start_time=$(date +%s)
timeout=300 # 5 minutes in seconds
while true; do
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Service did not become ready in 5 minutes."
exit 1
fi
# Use curl with error handling to ignore specific exit code 56
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
if [ "$response" = "200" ]; then
echo "Service is ready!"
break
elif [ "$response" = "curl_error" ]; then
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
else
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
fi
sleep 5
done
echo "Finished waiting for service."
- name: Run Tests
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
- name: Alert on Failure
if: failure() && github.event_name == 'schedule'
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
failed-jobs: model-check
title: "🚨 Scheduled Model Tests failed!"
ref-name: ${{ github.ref_name }}
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
run: |
curl -X POST \
-H 'Content-type: application/json' \
--data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
$SLACK_WEBHOOK
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
with:
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log

4
.gitignore vendored
View File

@@ -1,8 +1,5 @@
# editors
.vscode
!/.vscode/env_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed
.cursor
@@ -24,7 +21,6 @@ backend/tests/regression/search_quality/*.json
backend/onyx/evals/data/
backend/onyx/evals/one_off/*.json
*.log
*.csv
# secret files
.env

View File

@@ -11,6 +11,7 @@ repos:
- id: uv-sync
args: ["--locked", "--all-extras"]
- id: uv-lock
files: ^pyproject\.toml$
- id: uv-export
name: uv-export default.txt
args:
@@ -66,8 +67,7 @@ repos:
- id: uv-run
name: Check lazy imports
args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
pass_filenames: true
files: ^backend/(?!\.venv/|scripts/).*\.py$
files: ^backend/(?!\.venv/).*\.py$
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
@@ -75,13 +75,6 @@ repos:
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-added-large-files
name: Check for added large files
args: ["--maxkb=1500"]
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
hooks:
@@ -154,22 +147,6 @@ repos:
pass_filenames: false
files: \.tf$
- id: npm-install
name: npm install
description: "Automatically run 'npm install' after a checkout, pull or rebase"
language: system
entry: bash -c 'cd web && npm install --no-save'
pass_filenames: false
files: ^web/package(-lock)?\.json$
stages: [post-checkout, post-merge, post-rewrite]
- id: npm-install-check
name: npm install --package-lock-only
description: "Check the 'web/package-lock.json' is updated"
language: system
entry: bash -c 'cd web && npm install --package-lock-only'
pass_filenames: false
files: ^web/package(-lock)?\.json$
# Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
# This is a preview package - if it breaks:
# 1. Try updating: cd web && npm update @typescript/native-preview

View File

@@ -17,6 +17,12 @@ LOG_ONYX_MODEL_INTERACTIONS=True
LOG_LEVEL=debug
# This passes top N results to LLM an additional time for reranking prior to
# answer generation.
# This step is quite heavy on token usage so we disable it for dev generally.
DISABLE_LLM_DOC_RELEVANCE=False
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
OAUTH_CLIENT_ID=<REPLACE THIS>
OAUTH_CLIENT_SECRET=<REPLACE THIS>

View File

@@ -1,3 +1,5 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
@@ -22,7 +24,7 @@
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery background",
"Celery docfetching",
"Celery docprocessing",
"Celery beat"
@@ -149,24 +151,6 @@
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Discord Bot",
"consoleName": "Discord Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/discord/client.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Discord Bot Console"
},
{
"name": "MCP Server",
"consoleName": "MCP Server",
@@ -415,6 +399,7 @@
"onyx.background.celery.versioned_apps.docfetching",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docfetching@%n",
@@ -445,6 +430,7 @@
"onyx.background.celery.versioned_apps.docprocessing",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"--hostname=docprocessing@%n",
@@ -593,137 +579,6 @@
"group": "3"
}
},
{
"name": "Build Sandbox Templates",
"type": "debugpy",
"request": "launch",
"module": "onyx.server.features.build.sandbox.build_templates",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"console": "integratedTerminal",
"presentation": {
"group": "3"
},
"consoleTitle": "Build Sandbox Templates"
},
{
// Dummy entry used to label the group
"name": "--- Database ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "4",
"order": 0
}
},
{
"name": "Restore seeded database dump",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore seeded database dump (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--clean",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Create database snapshot",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"dump",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore database snapshot (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--clean",
"--yes",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Upgrade database to head revision",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"upgrade"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -1,31 +1,262 @@
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
# Contributing to Onyx
Hey there! We are so excited that you're interested in Onyx.
As an open source project in a rapidly changing space, we welcome all contributions.
## Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.
## 💃 Guidelines
If you have your own feature that you would like to build please create an issue and community members can provide feedback and
thumb it up if they feel a common need.
### Contribution Opportunities
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
## Contributing Code
Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
To contribute, please follow the
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
will be marked with the `approved by maintainers` label.
Issues marked `good first issue` are an especially great place to start.
**Connectors** to other tools are another great place to contribute. For details on how, refer to this
[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
If you have a new/different contribution in mind, we'd love to hear about it!
Your input is vital to making sure that Onyx moves in the right direction.
Before starting on implementation, please raise a GitHub issue.
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
### Contributing Code
To contribute to this project, please follow the
["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
When opening a pull request, mention related issues and feel free to tag relevant maintainers.
Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
### Getting Help 🙋
Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
That way we can help future contributors and users can avoid the same issue.
We also have support channels and generally interesting discussions on our
[Discord](https://discord.gg/4NA5SbzrWb).
We would love to see you there!
## Get Started 🚀
Onyx being a fully functional app, relies on some external software, specifically:
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
> **Note:**
> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
### Local Set Up
Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
If using a lower version, modifications will have to be made to the code.
If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
#### Backend: Python requirements
Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
For convenience here's a command for it:
```bash
uv venv .venv --python 3.11
source .venv/bin/activate
```
_For Windows, activate the virtual environment using Command Prompt:_
```bash
.venv\Scripts\activate
```
If using PowerShell, the command slightly differs:
```powershell
.venv\Scripts\Activate.ps1
```
Install the required python dependencies:
```bash
uv sync --all-extras
```
Install Playwright for Python (headless browser required by the Web Connector):
```bash
uv run playwright install
```
#### Frontend: Node dependencies
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
Navigate to `onyx/web` and run:
```bash
npm i
```
## Formatting and Linting
### Backend
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
Then run:
```bash
uv run pre-commit install
```
Additionally, we use `mypy` for static type checking.
Onyx is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
### Web
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
Re-stage your changes and commit again.
# Running the application for development
## Developing using VSCode Debugger (recommended)
**We highly recommend using VSCode debugger for development.**
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
Otherwise, you can follow the instructions below to run the application for development.
## Manually running the application for development
### Docker containers for external software
You will need Docker installed to run these containers.
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
### Running Onyx locally
To start the frontend, navigate to `onyx/web` and run:
```bash
npm run dev
```
Next, start the model server which runs the local NLP models.
Navigate to `onyx/backend` and run:
```bash
uvicorn model_server.main:app --reload --port 9000
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
```
The first time running Onyx, you will need to run the DB migrations for Postgres.
After the first time, this is no longer required unless the DB models change.
Navigate to `onyx/backend` and with the venv active, run:
```bash
alembic upgrade head
```
Next, start the task queue which orchestrates the background jobs.
Jobs that take more time are run async from the API server.
Still in `onyx/backend`, run:
```bash
python ./scripts/dev_run_background_jobs.py
```
To run the backend API server, navigate back to `onyx/backend` and run:
```bash
AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "
$env:AUTH_TYPE='disabled'
uvicorn onyx.main:app --reload --port 8080
"
```
> **Note:**
> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
#### Wrapping up
You should now have 4 servers running:
- Web server
- Backend API
- Model server
- Background jobs
Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
You've successfully set up a local Onyx instance! 🏁
#### Running the Onyx application in a container
You can run the full Onyx application stack from pre-built images including all external software dependencies.
Navigate to `onyx/deployment/docker_compose` and run:
```bash
docker compose up -d
```
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
```bash
docker compose up -d --build
```
## Getting Help 🙋
We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).
### Release Process
See you there!
## Release Process
Onyx loosely follows the SemVer versioning standard.
Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
A set of Docker containers will be pushed automatically to DockerHub with every tag.

View File

@@ -7,6 +7,8 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
1. **Environment Setup**:
- Copy `.vscode/env_template.txt` to `.vscode/.env`
- Fill in the necessary environment variables in `.vscode/.env`
2. **launch.json**:
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
## Using the Debugger

View File

@@ -16,8 +16,3 @@ dist/
.coverage
htmlcov/
model_server/legacy/
# Craft: demo_data directory should be unzipped at container startup, not copied
**/demo_data/
# Craft: templates/outputs/venv is created at container startup
**/templates/outputs/venv

View File

@@ -37,6 +37,10 @@ CVE-2023-50868
CVE-2023-52425
CVE-2024-28757
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
# No impact in our settings
CVE-2023-7104
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193

View File

@@ -7,10 +7,6 @@ have a contract or agreement with DanswerAI, you are not permitted to use the En
Edition features outside of personal development or testing purposes. Please reach out to \
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
# Build argument for Craft support (disabled by default)
# Use --build-arg ENABLE_CRAFT=true to include Node.js and opencode CLI
ARG ENABLE_CRAFT=false
# DO_NOT_TRACK is used to disable telemetry for Unstructured
ENV DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true" \
@@ -50,23 +46,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# Conditionally install Node.js 20 for Craft (required for Next.js)
# Only installed when ENABLE_CRAFT=true
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing Node.js 20 for Craft support..." && \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*; \
fi
# Conditionally install opencode CLI for Craft agent functionality
# Only installed when ENABLE_CRAFT=true
# TODO: download a specific, versioned release of the opencode CLI
RUN if [ "$ENABLE_CRAFT" = "true" ]; then \
echo "Installing opencode CLI for Craft support..." && \
curl -fsSL https://opencode.ai/install | bash; \
fi
ENV PATH="/root/.opencode/bin:${PATH}"
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
@@ -111,8 +91,8 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
@@ -139,8 +119,7 @@ COPY --chown=onyx:onyx ./static /app/static
COPY --chown=onyx:onyx ./scripts/debugging /app/scripts/debugging
COPY --chown=onyx:onyx ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
COPY --chown=onyx:onyx ./scripts/supervisord_entrypoint.sh /app/scripts/supervisord_entrypoint.sh
COPY --chown=onyx:onyx ./scripts/setup_craft_templates.sh /app/scripts/setup_craft_templates.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh /app/scripts/setup_craft_templates.sh
RUN chmod +x /app/scripts/supervisord_entrypoint.sh
# Put logo in assets
COPY --chown=onyx:onyx ./assets /app/assets

View File

@@ -225,6 +225,7 @@ def do_run_migrations(
) -> None:
if create_schema:
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
connection.execute(text("COMMIT"))
connection.execute(text(f'SET search_path TO "{schema_name}"'))
@@ -308,7 +309,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:
@@ -346,7 +346,6 @@ async def run_async_migrations() -> None:
schema_name=schema,
create_schema=create_schema,
)
await connection.commit()
except Exception as e:
logger.error(f"Error migrating schema {schema}: {e}")
if not continue_on_error:

View File

@@ -1,351 +0,0 @@
"""single onyx craft migration
Consolidates all buildmode/onyx craft tables into a single migration.
Tables created:
- build_session: User build sessions with status tracking
- sandbox: User-owned containerized environments (one per user)
- artifact: Build output files (web apps, documents, images)
- snapshot: Sandbox filesystem snapshots
- build_message: Conversation messages for build sessions
Existing table modified:
- connector_credential_pair: Added processing_mode column
Revision ID: 2020d417ec84
Revises: 41fa44bef321
Create Date: 2026-01-26 14:43:54.641405
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "2020d417ec84"
down_revision = "41fa44bef321"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ==========================================================================
# ENUMS
# ==========================================================================
# Build session status enum
build_session_status_enum = sa.Enum(
"active",
"idle",
name="buildsessionstatus",
native_enum=False,
)
# Sandbox status enum
sandbox_status_enum = sa.Enum(
"provisioning",
"running",
"idle",
"sleeping",
"terminated",
"failed",
name="sandboxstatus",
native_enum=False,
)
# Artifact type enum
artifact_type_enum = sa.Enum(
"web_app",
"pptx",
"docx",
"markdown",
"excel",
"image",
name="artifacttype",
native_enum=False,
)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.create_table(
"build_session",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=True,
),
sa.Column("name", sa.String(), nullable=True),
sa.Column(
"status",
build_session_status_enum,
nullable=False,
server_default="active",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"last_activity_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("nextjs_port", sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_session_user_created",
"build_session",
["user_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_build_session_status",
"build_session",
["status"],
unique=False,
)
# ==========================================================================
# SANDBOX TABLE (user-owned, one per user)
# ==========================================================================
op.create_table(
"sandbox",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("container_id", sa.String(), nullable=True),
sa.Column(
"status",
sandbox_status_enum,
nullable=False,
server_default="provisioning",
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", name="sandbox_user_id_key"),
)
op.create_index(
"ix_sandbox_status",
"sandbox",
["status"],
unique=False,
)
op.create_index(
"ix_sandbox_container_id",
"sandbox",
["container_id"],
unique=False,
)
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.create_table(
"artifact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("type", artifact_type_enum, nullable=False),
sa.Column("path", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_artifact_session_created",
"artifact",
["session_id", sa.text("created_at DESC")],
unique=False,
)
op.create_index(
"ix_artifact_type",
"artifact",
["type"],
unique=False,
)
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.create_table(
"snapshot",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("storage_path", sa.String(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_snapshot_session_created",
"snapshot",
["session_id", sa.text("created_at DESC")],
unique=False,
)
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.create_table(
"build_message",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"session_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("build_session.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"turn_index",
sa.Integer(),
nullable=False,
),
sa.Column(
"type",
sa.Enum(
"SYSTEM",
"USER",
"ASSISTANT",
"DANSWER",
name="messagetype",
create_type=False,
native_enum=False,
),
nullable=False,
),
sa.Column(
"message_metadata",
postgresql.JSONB(),
nullable=False,
),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
"ix_build_message_session_turn",
"build_message",
["session_id", "turn_index", sa.text("created_at ASC")],
unique=False,
)
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.add_column(
"connector_credential_pair",
sa.Column(
"processing_mode",
sa.String(),
nullable=False,
server_default="regular",
),
)
def downgrade() -> None:
# ==========================================================================
# CONNECTOR_CREDENTIAL_PAIR MODIFICATION
# ==========================================================================
op.drop_column("connector_credential_pair", "processing_mode")
# ==========================================================================
# BUILD_MESSAGE TABLE
# ==========================================================================
op.drop_index("ix_build_message_session_turn", table_name="build_message")
op.drop_table("build_message")
# ==========================================================================
# SNAPSHOT TABLE
# ==========================================================================
op.drop_index("ix_snapshot_session_created", table_name="snapshot")
op.drop_table("snapshot")
# ==========================================================================
# ARTIFACT TABLE
# ==========================================================================
op.drop_index("ix_artifact_type", table_name="artifact")
op.drop_index("ix_artifact_session_created", table_name="artifact")
op.drop_table("artifact")
sa.Enum(name="artifacttype").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# SANDBOX TABLE
# ==========================================================================
op.drop_index("ix_sandbox_container_id", table_name="sandbox")
op.drop_index("ix_sandbox_status", table_name="sandbox")
op.drop_table("sandbox")
sa.Enum(name="sandboxstatus").drop(op.get_bind(), checkfirst=True)
# ==========================================================================
# BUILD_SESSION TABLE
# ==========================================================================
op.drop_index("ix_build_session_status", table_name="build_session")
op.drop_index("ix_build_session_user_created", table_name="build_session")
op.drop_table("build_session")
sa.Enum(name="buildsessionstatus").drop(op.get_bind(), checkfirst=True)

View File

@@ -1,42 +0,0 @@
"""add_unique_constraint_to_inputprompt_prompt_user_id
Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique constraint on (prompt, user_id) for user-owned prompts
# This ensures each user can only have one shortcut with a given name
op.create_unique_constraint(
"uq_inputprompt_prompt_user_id",
"inputprompt",
["prompt", "user_id"],
)
# Create partial unique index for public prompts (where user_id IS NULL)
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
# so we need a partial index to ensure public prompt names are also unique
op.execute(
"""
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
ON inputprompt (prompt)
WHERE user_id IS NULL
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")

View File

@@ -1,29 +0,0 @@
"""remove default prompt shortcuts
Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Delete any user associations for the default prompts first (foreign key constraint)
op.execute(
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
)
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
op.execute("DELETE FROM inputprompt WHERE id < 0")
def downgrade() -> None:
# We don't restore the default prompts on downgrade
pass

View File

@@ -85,122 +85,103 @@ class UserRow(NamedTuple):
def upgrade() -> None:
conn = op.get_bind()
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
# Start transaction
conn.execute(sa.text("BEGIN"))
if search_assistant:
# Update existing Search assistant to be the unified assistant
try:
# Step 1: Create or update the unified assistant (ID 0)
search_assistant = conn.execute(
sa.text("SELECT * FROM persona WHERE id = 0")
).fetchone()
if search_assistant:
# Update existing Search assistant to be the unified assistant
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET name = :name,
description = :description,
system_prompt = :system_prompt,
num_chunks = :num_chunks,
is_default_persona = true,
is_visible = true,
deleted = false,
display_priority = :display_priority,
llm_filter_extraction = :llm_filter_extraction,
llm_relevance_filter = :llm_relevance_filter,
recency_bias = :recency_bias,
chunks_above = :chunks_above,
chunks_below = :chunks_below,
datetime_aware = :datetime_aware,
starter_messages = null
WHERE id = 0
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
),
INSERT_DICT,
)
else:
# Create new unified assistant with ID 0
conn.execute(
sa.text(
"""
INSERT INTO persona (
id, name, description, system_prompt, num_chunks,
is_default_persona, is_visible, deleted, display_priority,
llm_filter_extraction, llm_relevance_filter, recency_bias,
chunks_above, chunks_below, datetime_aware, starter_messages,
builtin_persona
) VALUES (
0, :name, :description, :system_prompt, :num_chunks,
true, true, false, :display_priority, :llm_filter_extraction,
:llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
:datetime_aware, null, true
)
"""
),
INSERT_DICT,
)
)
# Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = true, is_visible = false, is_default_persona = false
WHERE builtin_persona = true AND id != 0
"""
)
)
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
# Step 3: Add all built-in tools to the unified assistant
# First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
).fetchone()
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
if not search_tool:
raise ValueError(
"SearchTool not found in database. Ensure tools migration has run first."
)
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
image_gen_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
).fetchone()
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
if not image_gen_tool:
raise ValueError(
"ImageGenerationTool not found in database. Ensure tools migration has run first."
)
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# WebSearchTool is optional - may not be configured
web_search_tool = conn.execute(
sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
).fetchone()
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Clear existing tool associations for persona 0
conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": search_tool[0]},
)
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": image_gen_tool[0]},
)
if web_search_tool:
# Add tools to the unified assistant
conn.execute(
sa.text(
"""
@@ -209,148 +190,191 @@ def upgrade() -> None:
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
{"tool_id": search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
),
{"tool_id": image_gen_tool[0]},
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
if web_search_tool:
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": web_search_tool[0]},
)
# Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
conn.execute(
sa.text(
"""
UPDATE chat_session
SET persona_id = 0
WHERE persona_id IN (
SELECT id FROM persona WHERE builtin_persona = true AND id != 0
)
"""
)
)
# Step 5: Migrate user preferences - remove references to all builtin assistants
# First, get all builtin assistant IDs (except 0)
builtin_assistants_result = conn.execute(
sa.text(
"""
SELECT id FROM persona
WHERE builtin_persona = true AND id != 0
"""
)
).fetchall()
builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
# Get all users with preferences
users_result = conn.execute(
sa.text(
"""
SELECT id, chosen_assistants, visible_assistants,
hidden_assistants, pinned_assistants
FROM "user"
"""
)
).fetchall()
for user_row in users_result:
user = UserRow(*user_row)
user_id: UUID = user.id
updates: dict[str, Any] = {}
# Remove all builtin assistants from chosen_assistants
if user.chosen_assistants:
new_chosen: list[int] = [
assistant_id
for assistant_id in user.chosen_assistants
if assistant_id not in builtin_assistant_ids
]
if new_chosen != user.chosen_assistants:
updates["chosen_assistants"] = json.dumps(new_chosen)
# Remove all builtin assistants from visible_assistants
if user.visible_assistants:
new_visible: list[int] = [
assistant_id
for assistant_id in user.visible_assistants
if assistant_id not in builtin_assistant_ids
]
if new_visible != user.visible_assistants:
updates["visible_assistants"] = json.dumps(new_visible)
# Add all builtin assistants to hidden_assistants
if user.hidden_assistants:
new_hidden: list[int] = list(user.hidden_assistants)
for old_id in builtin_assistant_ids:
if old_id not in new_hidden:
new_hidden.append(old_id)
if new_hidden != user.hidden_assistants:
updates["hidden_assistants"] = json.dumps(new_hidden)
else:
updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
# Remove all builtin assistants from pinned_assistants
if user.pinned_assistants:
new_pinned: list[int] = [
assistant_id
for assistant_id in user.pinned_assistants
if assistant_id not in builtin_assistant_ids
]
if new_pinned != user.pinned_assistants:
updates["pinned_assistants"] = json.dumps(new_pinned)
# Apply updates if any
if updates:
set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
updates["user_id"] = str(user_id) # Convert UUID to string for SQL
conn.execute(
sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
updates,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:
conn = op.get_bind()
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
# Start transaction
conn.execute(sa.text("BEGIN"))
try:
# Only restore General (ID -1) and Art (ID -3) assistants
# Step 1: Keep Search assistant (ID 0) as default but restore original state
conn.execute(
sa.text(
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
UPDATE persona
SET is_default_persona = true,
is_visible = true,
deleted = false
WHERE id = 0
"""
)
)
)
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
# Step 2: Restore General assistant (ID -1)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :general_assistant_id
"""
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
),
{"general_assistant_id": GENERAL_ASSISTANT_ID},
)
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
# Step 3: Restore Art assistant (ID -3)
conn.execute(
sa.text(
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
UPDATE persona
SET deleted = false,
is_visible = true,
is_default_persona = true
WHERE id = :art_assistant_id
"""
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
),
{"art_assistant_id": ART_ASSISTANT_ID},
)
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Note: We don't restore the original tool associations, names, or descriptions
# as those would require more complex logic to determine original state.
# We also cannot restore original chat session persona_ids as we don't
# have the original mappings.
# Other builtin assistants remain deleted as per the requirement.
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e

View File

@@ -1,45 +0,0 @@
"""make processing mode default all caps
Revision ID: 72aa7de2e5cf
Revises: 2020d417ec84
Create Date: 2026-01-26 18:58:47.705253
This migration fixes the ProcessingMode enum value mismatch:
- SQLAlchemy's Enum with native_enum=False uses enum member NAMES as valid values
- The original migration stored lowercase VALUES ('regular', 'file_system')
- This converts existing data to uppercase NAMES ('REGULAR', 'FILE_SYSTEM')
- Also drops any spurious native PostgreSQL enum type that may have been auto-created
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "72aa7de2e5cf"
down_revision = "2020d417ec84"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Convert existing lowercase values to uppercase to match enum member names
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'REGULAR' "
"WHERE processing_mode = 'regular'"
)
op.execute(
"UPDATE connector_credential_pair SET processing_mode = 'FILE_SYSTEM' "
"WHERE processing_mode = 'file_system'"
)
# Update the server default to use uppercase
op.alter_column(
"connector_credential_pair",
"processing_mode",
server_default="REGULAR",
)
def downgrade() -> None:
# State prior to this was broken, so we don't want to revert back to it
pass

View File

@@ -1,47 +0,0 @@
"""add_search_query_table
Revision ID: 73e9983e5091
Revises: d1b637d7050a
Create Date: 2026-01-14 14:16:52.837489
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "73e9983e5091"
down_revision = "d1b637d7050a"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"search_query",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"user_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("user.id"),
nullable=False,
),
sa.Column("query", sa.String(), nullable=False),
sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
def downgrade() -> None:
op.drop_index("ix_search_query_created_at", table_name="search_query")
op.drop_index("ix_search_query_user_id", table_name="search_query")
op.drop_table("search_query")

View File

@@ -10,7 +10,8 @@ from alembic import op
import sqlalchemy as sa
from onyx.db.models import IndexModelStatus
from onyx.context.search.enums import RecencyBiasSetting, SearchType
from onyx.context.search.enums import RecencyBiasSetting
from onyx.context.search.enums import SearchType
# revision identifiers, used by Alembic.
revision = "776b3bbe9092"

View File

@@ -1,49 +0,0 @@
"""notifications constraint, sort index, and cleanup old notifications
Revision ID: 8405ca81cc83
Revises: a3c1a7904cd0
Create Date: 2026-01-07 16:43:44.855156
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "8405ca81cc83"
down_revision = "a3c1a7904cd0"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique index for notification deduplication.
# This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
#
# Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
# in unique constraints, but we want NULL == NULL for deduplication).
# The '{}' represents an empty JSONB object as the NULL replacement.
# Clean up legacy notifications first
op.execute("DELETE FROM notification WHERE title = 'New Notification'")
op.execute(
"""
CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
"""
)
# Create index for efficient notification sorting by user
# Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
op.execute(
"""
CREATE INDEX IF NOT EXISTS ix_notification_user_sort
ON notification (user_id, dismissed, first_shown DESC)
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")

View File

@@ -1,116 +0,0 @@
"""Add Discord bot tables
Revision ID: 8b5ce697290e
Revises: a1b2c3d4e5f7
Create Date: 2025-01-14
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "8b5ce697290e"
down_revision = "a1b2c3d4e5f7"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
# DiscordBotConfig (singleton table - one per tenant)
op.create_table(
"discord_bot_config",
sa.Column(
"id",
sa.String(),
primary_key=True,
server_default=sa.text("'SINGLETON'"),
),
sa.Column("bot_token", sa.LargeBinary(), nullable=False), # EncryptedString
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
)
# DiscordGuildConfig
op.create_table(
"discord_guild_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
sa.Column("guild_name", sa.String(), nullable=True),
sa.Column("registration_key", sa.String(), nullable=False, unique=True),
sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
sa.Column(
"default_persona_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
),
)
# DiscordChannelConfig
op.create_table(
"discord_channel_config",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column(
"guild_config_id",
sa.Integer(),
sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("channel_id", sa.BigInteger(), nullable=False),
sa.Column("channel_name", sa.String(), nullable=False),
sa.Column(
"channel_type",
sa.String(20),
server_default=sa.text("'text'"),
nullable=False,
),
sa.Column(
"is_private",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"thread_only_mode",
sa.Boolean(),
server_default=sa.text("false"),
nullable=False,
),
sa.Column(
"require_bot_invocation",
sa.Boolean(),
server_default=sa.text("true"),
nullable=False,
),
sa.Column(
"persona_override_id",
sa.Integer(),
sa.ForeignKey("persona.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column(
"enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
),
)
# Unique constraint: one config per channel per guild
op.create_unique_constraint(
"uq_discord_channel_guild_channel",
"discord_channel_config",
["guild_config_id", "channel_id"],
)
def downgrade() -> None:
op.drop_table("discord_channel_config")
op.drop_table("discord_guild_config")
op.drop_table("discord_bot_config")

View File

@@ -1,27 +0,0 @@
"""add processing_duration_seconds to chat_message
Revision ID: 9d1543a37106
Revises: 72aa7de2e5cf
Create Date: 2026-01-21 11:42:18.546188
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "9d1543a37106"
down_revision = "72aa7de2e5cf"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column("processing_duration_seconds", sa.Float(), nullable=True),
)
def downgrade() -> None:
op.drop_column("chat_message", "processing_duration_seconds")

View File

@@ -42,13 +42,20 @@ TOOL_DESCRIPTIONS = {
def upgrade() -> None:
conn = op.get_bind()
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("BEGIN"))
try:
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("COMMIT"))
except Exception as e:
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:

View File

@@ -1,47 +0,0 @@
"""drop agent_search_metrics table
Revision ID: a1b2c3d4e5f7
Revises: 73e9983e5091
Create Date: 2026-01-17
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f7"
down_revision = "73e9983e5091"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_table("agent__search_metrics")
def downgrade() -> None:
op.create_table(
"agent__search_metrics",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("user_id", sa.UUID(), nullable=True),
sa.Column("persona_id", sa.Integer(), nullable=True),
sa.Column("agent_type", sa.String(), nullable=False),
sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
sa.Column("base_duration_s", sa.Float(), nullable=False),
sa.Column("full_duration_s", sa.Float(), nullable=False),
sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(
["user_id"],
["user.id"],
ondelete="CASCADE",
),
sa.ForeignKeyConstraint(
["persona_id"],
["persona.id"],
),
sa.PrimaryKeyConstraint("id"),
)

View File

@@ -7,6 +7,7 @@ Create Date: 2025-12-18 16:00:00.000000
"""
from alembic import op
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
import sqlalchemy as sa
@@ -18,7 +19,7 @@ depends_on = None
DEEP_RESEARCH_TOOL = {
"name": "ResearchAgent",
"name": RESEARCH_AGENT_DB_NAME,
"display_name": "Research Agent",
"description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
"in_code_tool_id": "ResearchAgent",

View File

@@ -70,66 +70,80 @@ BUILT_IN_TOOLS = [
def upgrade() -> None:
conn = op.get_bind()
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Start transaction
conn.execute(sa.text("BEGIN"))
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
try:
# Get existing tools to check what already exists
existing_tools = conn.execute(
sa.text(
"SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
)
).fetchall()
existing_tool_ids = {row[0] for row in existing_tools}
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
# Insert or update built-in tools
for tool in BUILT_IN_TOOLS:
in_code_id = tool["in_code_tool_id"]
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Handle historical rename: InternetSearchTool -> WebSearchTool
if (
in_code_id == "WebSearchTool"
and "WebSearchTool" not in existing_tool_ids
and "InternetSearchTool" in existing_tool_ids
):
# Rename the existing InternetSearchTool row in place and update fields
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description,
in_code_tool_id = :in_code_tool_id
WHERE in_code_tool_id = 'InternetSearchTool'
"""
),
tool,
)
# Keep the local view of existing ids in sync to avoid duplicate insert
existing_tool_ids.discard("InternetSearchTool")
existing_tool_ids.add("WebSearchTool")
continue
if in_code_id in existing_tool_ids:
# Update existing tool
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :name,
display_name = :display_name,
description = :description
WHERE in_code_tool_id = :in_code_tool_id
"""
),
tool,
)
else:
# Insert new tool
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id)
VALUES (:name, :display_name, :description, :in_code_tool_id)
"""
),
tool,
)
# Commit transaction
conn.execute(sa.text("COMMIT"))
except Exception as e:
# Rollback on error
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:

View File

@@ -1,64 +0,0 @@
"""sync_exa_api_key_to_content_provider
Revision ID: d1b637d7050a
Revises: d25168c2beee
Create Date: 2026-01-09 15:54:15.646249
"""
from alembic import op
from sqlalchemy import text
# revision identifiers, used by Alembic.
revision = "d1b637d7050a"
down_revision = "d25168c2beee"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Exa uses a shared API key between search and content providers.
# For existing Exa search providers with API keys, create the corresponding
# content provider if it doesn't exist yet.
connection = op.get_bind()
# Check if Exa search provider exists with an API key
result = connection.execute(
text(
"""
SELECT api_key FROM internet_search_provider
WHERE provider_type = 'exa' AND api_key IS NOT NULL
LIMIT 1
"""
)
)
row = result.fetchone()
if row:
api_key = row[0]
# Create Exa content provider with the shared key
connection.execute(
text(
"""
INSERT INTO internet_content_provider
(name, provider_type, api_key, is_active)
VALUES ('Exa', 'exa', :api_key, false)
ON CONFLICT (name) DO NOTHING
"""
),
{"api_key": api_key},
)
def downgrade() -> None:
# Remove the Exa content provider that was created by this migration
connection = op.get_bind()
connection.execute(
text(
"""
DELETE FROM internet_content_provider
WHERE provider_type = 'exa'
"""
)
)

View File

@@ -1,86 +0,0 @@
"""tool_name_consistency
Revision ID: d25168c2beee
Revises: 8405ca81cc83
Create Date: 2026-01-11 17:54:40.135777
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "d25168c2beee"
down_revision = "8405ca81cc83"
branch_labels = None
depends_on = None
# Currently the seeded tools have the in_code_tool_id == name
CURRENT_TOOL_NAME_MAPPING = [
"SearchTool",
"WebSearchTool",
"ImageGenerationTool",
"PythonTool",
"OpenURLTool",
"KnowledgeGraphTool",
"ResearchAgent",
]
# Mapping of in_code_tool_id -> name
# These are the expected names that we want in the database
EXPECTED_TOOL_NAME_MAPPING = {
"SearchTool": "internal_search",
"WebSearchTool": "web_search",
"ImageGenerationTool": "generate_image",
"PythonTool": "python",
"OpenURLTool": "open_url",
"KnowledgeGraphTool": "run_kg_search",
"ResearchAgent": "research_agent",
}
def upgrade() -> None:
conn = op.get_bind()
# Mapping of in_code_tool_id to the NAME constant from each tool class
# These match the .name property of each tool implementation
tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
# Update the name column for each tool based on its in_code_tool_id
for in_code_tool_id, expected_name in tool_name_mapping.items():
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :expected_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"expected_name": expected_name,
"in_code_tool_id": in_code_tool_id,
},
)
def downgrade() -> None:
conn = op.get_bind()
# Reverse the migration by setting name back to in_code_tool_id
# This matches the original pattern where name was the class name
for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
conn.execute(
sa.text(
"""
UPDATE tool
SET name = :current_name
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{
"current_name": in_code_tool_id,
"in_code_tool_id": in_code_tool_id,
},
)

View File

@@ -1,31 +0,0 @@
"""add chat_background to user
Revision ID: fb80bdd256de
Revises: 8b5ce697290e
Create Date: 2026-01-16 16:15:59.222617
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "fb80bdd256de"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"chat_background",
sa.String(),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("user", "chat_background")

View File

@@ -109,6 +109,7 @@ CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(
STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")
STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")
# JWT Public Key URL
JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
@@ -128,8 +129,3 @@ MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")
GATED_TENANTS_KEY = "gated_tenants"
# License enforcement - when True, blocks API access for gated/expired licenses
LICENSE_ENFORCEMENT_ENABLED = (
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
)

View File

@@ -3,42 +3,30 @@ from uuid import UUID
from sqlalchemy.orm import Session
from onyx.configs.constants import NotificationType
from onyx.db.models import Persona
from onyx.db.models import Persona__User
from onyx.db.models import Persona__UserGroup
from onyx.db.notification import create_notification
from onyx.server.features.persona.models import PersonaSharedNotificationData
def update_persona_access(
def make_persona_private(
persona_id: int,
creator_user_id: UUID | None,
user_ids: list[UUID] | None,
group_ids: list[int] | None,
db_session: Session,
is_public: bool | None = None,
user_ids: list[UUID] | None = None,
group_ids: list[int] | None = None,
) -> None:
"""Updates the access settings for a persona including public status, user shares,
and group shares.
"""NOTE(rkuo): This function batches all updates into a single commit. If we don't
dedupe the inputs, the commit will exception."""
NOTE: This function batches all updates. If we don't dedupe the inputs,
the commit will exception.
NOTE: Callers are responsible for committing."""
if is_public is not None:
persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
if persona:
persona.is_public = is_public
# NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
# and a non-empty list means "replace with these shares".
if user_ids is not None:
db_session.query(Persona__User).filter(
Persona__User.persona_id == persona_id
).delete(synchronize_session="fetch")
db_session.query(Persona__User).filter(
Persona__User.persona_id == persona_id
).delete(synchronize_session="fetch")
db_session.query(Persona__UserGroup).filter(
Persona__UserGroup.persona_id == persona_id
).delete(synchronize_session="fetch")
if user_ids:
user_ids_set = set(user_ids)
for user_id in user_ids_set:
db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
@@ -53,13 +41,11 @@ def update_persona_access(
).model_dump(),
)
if group_ids is not None:
db_session.query(Persona__UserGroup).filter(
Persona__UserGroup.persona_id == persona_id
).delete(synchronize_session="fetch")
if group_ids:
group_ids_set = set(group_ids)
for group_id in group_ids_set:
db_session.add(
Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
)
db_session.commit()

View File

@@ -1,64 +0,0 @@
import uuid
from datetime import timedelta
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.orm import Session
from onyx.db.engine.time_utils import get_db_current_time
from onyx.db.models import SearchQuery
def create_search_query(
db_session: Session,
user_id: UUID,
query: str,
query_expansions: list[str] | None = None,
) -> SearchQuery:
"""Create and persist a `SearchQuery` row.
Notes:
- `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
- `created_at` is filled by the DB (server_default=now()).
"""
search_query = SearchQuery(
id=uuid.uuid4(),
user_id=user_id,
query=query,
query_expansions=query_expansions,
)
db_session.add(search_query)
db_session.commit()
db_session.refresh(search_query)
return search_query
def fetch_search_queries_for_user(
db_session: Session,
user_id: UUID,
filter_days: int | None = None,
limit: int | None = None,
) -> list[SearchQuery]:
"""Fetch `SearchQuery` rows for a user.
Args:
user_id: User UUID.
filter_days: Optional time filter. If provided, only rows created within
the last `filter_days` days are returned.
limit: Optional max number of rows to return.
"""
if filter_days is not None and filter_days <= 0:
raise ValueError("filter_days must be > 0")
stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)
if filter_days is not None and filter_days > 0:
cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
stmt = stmt.where(SearchQuery.created_at >= cutoff)
stmt = stmt.order_by(SearchQuery.created_at.desc())
if limit is not None:
stmt = stmt.limit(limit)
return list(db_session.scalars(stmt).all())

View File

@@ -16,17 +16,16 @@ from ee.onyx.server.enterprise_settings.api import (
from ee.onyx.server.evals.api import router as evals_router
from ee.onyx.server.license.api import router as license_router
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
from ee.onyx.server.middleware.license_enforcement import (
add_license_enforcement_middleware,
)
from ee.onyx.server.middleware.tenant_tracking import (
add_api_server_tenant_id_middleware,
)
from ee.onyx.server.oauth.api import router as ee_oauth_router
from ee.onyx.server.query_and_chat.chat_backend import (
router as chat_router,
)
from ee.onyx.server.query_and_chat.query_backend import (
basic_router as ee_query_router,
)
from ee.onyx.server.query_and_chat.search_backend import router as search_router
from ee.onyx.server.query_history.api import router as query_history_router
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
from ee.onyx.server.seeding import seed_db
@@ -86,10 +85,6 @@ def get_application() -> FastAPI:
if MULTI_TENANT:
add_api_server_tenant_id_middleware(application, logger)
# Add license enforcement middleware (runs after tenant tracking)
# This blocks access when license is expired/gated
add_license_enforcement_middleware(application, logger)
if AUTH_TYPE == AuthType.CLOUD:
# For Google OAuth, refresh tokens are requested by:
# 1. Adding the right scopes
@@ -129,7 +124,7 @@ def get_application() -> FastAPI:
# EE only backend APIs
include_router_with_global_prefix_prepended(application, query_router)
include_router_with_global_prefix_prepended(application, ee_query_router)
include_router_with_global_prefix_prepended(application, search_router)
include_router_with_global_prefix_prepended(application, chat_router)
include_router_with_global_prefix_prepended(application, standard_answer_router)
include_router_with_global_prefix_prepended(application, ee_oauth_router)
include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)

View File

@@ -1,27 +0,0 @@
# Single message is likely most reliable and generally better for this task
# No final reminders at the end since the user query is expected to be short
# If it is not short, it should go into the chat flow so we do not need to account for this.
KEYWORD_EXPANSION_PROMPT = """
Generate a set of keyword-only queries to help find relevant documents for the provided query. \
These queries will be passed to a bm25-based keyword search engine. \
Provide a single query per line (where each query consists of one or more keywords). \
The queries must be purely keywords and not contain any filler natural language. \
The each query should have as few keywords as necessary to represent the user's search intent. \
If there are no useful expansions, simply return the original query with no additional keyword queries. \
CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.
The user query is:
{user_query}
""".strip()
QUERY_TYPE_PROMPT = """
Determine if the provided query is better suited for a keyword search or a semantic search.
Respond with "keyword" or "semantic" literally and nothing else.
Do not provide any additional text or reasoning to your response.
CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".
The user query is:
{user_query}
""".strip()

View File

@@ -1,42 +0,0 @@
# ruff: noqa: E501, W605 start
SEARCH_CLASS = "search"
CHAT_CLASS = "chat"
# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
# use cases.
SEARCH_CHAT_PROMPT = f"""
Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".
# Classification Guidelines:
## {SEARCH_CLASS}
- If the query consists entirely of keywords or query doesn't require any answer from the AI
- If the query is a short statement that seems like a search query rather than a question
- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document
### Examples of {SEARCH_CLASS} queries:
- Find me the document that goes over the onboarding process for a new hire
- Pull requests since last week
- Sales Runbook AMEA Region
- Procurement process
- Retrieve the PRD for project X
## {CHAT_CLASS}
- If the query is asking a question that requires an answer rather than a document
- If the query is asking for a solution, suggestion, or general help
- If the query is seeking information that is on the web and likely not in a company internal document
- If the query should be answered without any context from additional documents or searches
### Examples of {CHAT_CLASS} queries:
- What led us to win the deal with company X? (seeking answer)
- Google Drive not sync-ing files to my computer (seeking solution)
- Review my email: <whatever the email is> (general help)
- Write me a script to... (general help)
- Cheap flights Europe to Tokyo (information likely found on the web, not internal)
# User Query:
{{user_query}}
REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
""".strip()
# ruff: noqa: E501, W605 end

View File

@@ -1,286 +0,0 @@
from collections.abc import Generator
from sqlalchemy.orm import Session
from ee.onyx.db.search import create_search_query
from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkSearchRequest
from onyx.context.search.models import InferenceChunk
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.factory import get_default_llm
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
from onyx.tools.tool_implementations.search.search_utils import (
weighted_reciprocal_rank_fusion,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
logger = setup_logger()
# This is just a heuristic that also happens to work well for the UI/UX
# Users would not find it useful to see a huge list of suggested docs
# but more than 1 is also likely good as many questions may target more than 1 doc.
TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3
def _run_single_search(
query: str,
filters: BaseFilters | None,
document_index: DocumentIndex,
user: User | None,
db_session: Session,
num_hits: int | None = None,
) -> list[InferenceChunk]:
"""Execute a single search query and return chunks."""
chunk_search_request = ChunkSearchRequest(
query=query,
user_selected_filters=filters,
limit=num_hits,
)
return search_pipeline(
chunk_search_request=chunk_search_request,
document_index=document_index,
user=user,
persona=None, # No persona for direct search
db_session=db_session,
)
def stream_search_query(
request: SendSearchQueryRequest,
user: User | None,
db_session: Session,
) -> Generator[
SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
None,
None,
]:
"""
Core search function that yields streaming packets.
Used by both streaming and non-streaming endpoints.
"""
# Get document index
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
# Determine queries to execute
original_query = request.search_query
keyword_expansions: list[str] = []
if request.run_query_expansion:
try:
llm = get_default_llm()
keyword_expansions = expand_keywords(
user_query=original_query,
llm=llm,
)
if keyword_expansions:
logger.debug(
f"Query expansion generated {len(keyword_expansions)} keyword queries"
)
except Exception as e:
logger.warning(f"Query expansion failed: {e}; using original query only.")
keyword_expansions = []
# Build list of all executed queries for tracking
all_executed_queries = [original_query] + keyword_expansions
# TODO remove this check, user should not be None
if user is not None:
create_search_query(
db_session=db_session,
user_id=user.id,
query=request.search_query,
query_expansions=keyword_expansions if keyword_expansions else None,
)
# Execute search(es)
if not keyword_expansions:
# Single query (original only) - no threading needed
chunks = _run_single_search(
query=original_query,
filters=request.filters,
document_index=document_index,
user=user,
db_session=db_session,
num_hits=request.num_hits,
)
else:
# Multiple queries - run in parallel and merge with RRF
# First query is the original (semantic), rest are keyword expansions
search_functions = [
(
_run_single_search,
(
query,
request.filters,
document_index,
user,
db_session,
request.num_hits,
),
)
for query in all_executed_queries
]
# Run all searches in parallel
all_search_results: list[list[InferenceChunk]] = (
run_functions_tuples_in_parallel(
search_functions,
allow_failures=True,
)
)
# Separate original query results from keyword expansion results
# Note that in rare cases, the original query may have failed and so we may be
# just overweighting one set of keyword results, should be not a big deal though.
original_result = all_search_results[0] if all_search_results else []
keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []
# Build valid results and weights
# Original query (semantic): weight 2.0
# Keyword expansions: weight 1.0 each
valid_results: list[list[InferenceChunk]] = []
weights: list[float] = []
if original_result:
valid_results.append(original_result)
weights.append(2.0)
for keyword_result in keyword_results:
if keyword_result:
valid_results.append(keyword_result)
weights.append(1.0)
if not valid_results:
logger.warning("All parallel searches returned empty results")
chunks = []
else:
chunks = weighted_reciprocal_rank_fusion(
ranked_results=valid_results,
weights=weights,
id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
)
# Merge chunks into sections
sections = merge_individual_chunks(chunks)
# Truncate to the requested number of hits
sections = sections[: request.num_hits]
# Apply LLM document selection if requested
# num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
# The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
# llm_selected_doc_ids will be:
# - None if LLM selection was not requested or failed
# - Empty list if LLM selection ran but selected nothing
# - List of doc IDs if LLM selection succeeded
run_llm_selection = (
request.num_docs_fed_to_llm_selection is not None
and request.num_docs_fed_to_llm_selection >= 1
)
llm_selected_doc_ids: list[str] | None = None
llm_selection_failed = False
if run_llm_selection and sections:
try:
llm = get_default_llm()
sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
selected_sections, _ = select_sections_for_expansion(
sections=sections_to_evaluate,
user_query=original_query,
llm=llm,
max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
try_to_fill_to_max=True,
)
# Extract unique document IDs from selected sections (may be empty)
llm_selected_doc_ids = list(
dict.fromkeys(
section.center_chunk.document_id for section in selected_sections
)
)
logger.debug(
f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
)
except Exception as e:
# Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
logger.warning(f"LLM document selection failed: {e}")
llm_selection_failed = True
elif run_llm_selection and not sections:
# LLM selection requested but no sections to evaluate
llm_selected_doc_ids = []
# Convert to SearchDocWithContent list, optionally including content
search_docs = SearchDocWithContent.from_inference_sections(
sections,
include_content=request.include_content,
is_internet=False,
)
# Yield queries packet
yield SearchQueriesPacket(all_executed_queries=all_executed_queries)
# Yield docs packet
yield SearchDocsPacket(search_docs=search_docs)
# Yield LLM selected docs packet if LLM selection was requested
# - llm_selected_doc_ids is None if selection failed
# - llm_selected_doc_ids is empty list if no docs were selected
# - llm_selected_doc_ids is list of IDs if docs were selected
if run_llm_selection:
yield LLMSelectedDocsPacket(
llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
)
def gather_search_stream(
packets: Generator[
SearchQueriesPacket
| SearchDocsPacket
| LLMSelectedDocsPacket
| SearchErrorPacket,
None,
None,
],
) -> SearchFullResponse:
"""
Aggregate all streaming packets into SearchFullResponse.
"""
all_executed_queries: list[str] = []
search_docs: list[SearchDocWithContent] = []
llm_selected_doc_ids: list[str] | None = None
error: str | None = None
for packet in packets:
if isinstance(packet, SearchQueriesPacket):
all_executed_queries = packet.all_executed_queries
elif isinstance(packet, SearchDocsPacket):
search_docs = packet.search_docs
elif isinstance(packet, LLMSelectedDocsPacket):
llm_selected_doc_ids = packet.llm_selected_doc_ids
elif isinstance(packet, SearchErrorPacket):
error = packet.error
return SearchFullResponse(
all_executed_queries=all_executed_queries,
search_docs=search_docs,
doc_selection_reasoning=None,
llm_selected_doc_ids=llm_selected_doc_ids,
error=error,
)

View File

@@ -1,92 +0,0 @@
import re
from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')
def _clean_keyword_line(line: str) -> str:
"""Clean a keyword line by removing common LLM artifacts.
Removes brackets, quotes, and other characters that LLMs may accidentally
include in their output.
"""
# Remove common artifacts
cleaned = CLEANUP_PATTERN.sub("", line)
# Remove leading list markers like "1.", "2.", "-", "*"
cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
return cleaned.strip()
def expand_keywords(
user_query: str,
llm: LLM,
) -> list[str]:
"""Expand a user query into multiple keyword-only queries for BM25 search.
Uses an LLM to generate keyword-based search queries that capture different
aspects of the user's search intent. Returns only the expanded queries,
not the original query.
Args:
user_query: The original search query from the user
llm: Language model to use for keyword expansion
Returns:
List of expanded keyword queries (excluding the original query).
Returns empty list if expansion fails or produces no useful expansions.
"""
messages: LanguageModelInput = [
UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
]
try:
response = llm.invoke(
prompt=messages,
reasoning_effort=ReasoningEffort.OFF,
# Limit output - we only expect a few short keyword queries
max_tokens=150,
)
content = llm_response_to_string(response).strip()
if not content:
logger.warning("Keyword expansion returned empty response.")
return []
# Parse response - each line is a separate keyword query
# Clean each line to remove LLM artifacts and drop empty lines
parsed_queries = []
for line in content.strip().split("\n"):
cleaned = _clean_keyword_line(line)
if cleaned:
parsed_queries.append(cleaned)
if not parsed_queries:
logger.warning("Keyword expansion parsing returned no queries.")
return []
# Filter out duplicates and queries that match the original
expanded_queries: list[str] = []
seen_lower: set[str] = {user_query.lower()}
for query in parsed_queries:
query_lower = query.lower()
if query_lower not in seen_lower:
seen_lower.add(query_lower)
expanded_queries.append(query)
logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
return expanded_queries
except Exception as e:
logger.warning(f"Keyword expansion failed: {e}")
return []

View File

@@ -1,50 +0,0 @@
from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
from onyx.llm.interfaces import LLM
from onyx.llm.models import LanguageModelInput
from onyx.llm.models import ReasoningEffort
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.utils.logger import setup_logger
from onyx.utils.timing import log_function_time
logger = setup_logger()
@log_function_time(print_only=True)
def classify_is_search_flow(
query: str,
llm: LLM,
) -> bool:
messages: LanguageModelInput = [
UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
]
response = llm.invoke(
prompt=messages,
reasoning_effort=ReasoningEffort.OFF,
# Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
timeout_override=2,
# Well more than necessary but just to ensure completion and in case it succeeds with classifying but
# ends up rambling
max_tokens=20,
)
content = llm_response_to_string(response).strip().lower()
if not content:
logger.warning(
"Search flow classification returned empty response; defaulting to chat flow."
)
return False
# Prefer chat if both appear.
if CHAT_CLASS in content:
return False
if SEARCH_CLASS in content:
return True
logger.warning(
"Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
content,
)
return False

View File

@@ -19,9 +19,9 @@ from ee.onyx.db.analytics import fetch_query_analytics
from ee.onyx.db.analytics import user_can_view_assistant_stats
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.server.utils import PUBLIC_API_TAGS
router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)

View File

@@ -10,8 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
("/enterprise-settings/logo", {"GET"}),
("/enterprise-settings/logotype", {"GET"}),
("/enterprise-settings/custom-analytics-script", {"GET"}),
# Stripe publishable key is safe to expose publicly
("/tenants/stripe-publishable-key", {"GET"}),
]

View File

@@ -1,102 +0,0 @@
"""Middleware to enforce license status application-wide."""
import logging
from collections.abc import Awaitable
from collections.abc import Callable
from fastapi import FastAPI
from fastapi import Request
from fastapi import Response
from fastapi.responses import JSONResponse
from redis.exceptions import RedisError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from ee.onyx.server.tenants.product_gating import is_tenant_gated
from onyx.server.settings.models import ApplicationStatus
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
# Paths that are ALWAYS accessible, even when license is expired/gated.
# These enable users to:
# /auth - Log in/out (users can't fix billing if locked out of auth)
# /license - Fetch, upload, or check license status
# /health - Health checks for load balancers/orchestrators
# /me - Basic user info needed for UI rendering
# /settings, /enterprise-settings - View app status and branding
# /tenants/billing-* - Manage subscription to resolve gating
ALLOWED_PATH_PREFIXES = {
"/auth",
"/license",
"/health",
"/me",
"/settings",
"/enterprise-settings",
"/tenants/billing-information",
"/tenants/create-customer-portal-session",
"/tenants/create-subscription-session",
}
def _is_path_allowed(path: str) -> bool:
"""Check if path is in allowlist (prefix match)."""
return any(path.startswith(prefix) for prefix in ALLOWED_PATH_PREFIXES)
def add_license_enforcement_middleware(
app: FastAPI, logger: logging.LoggerAdapter
) -> None:
logger.info("License enforcement middleware registered")
@app.middleware("http")
async def enforce_license(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
"""Block requests when license is expired/gated."""
if not LICENSE_ENFORCEMENT_ENABLED:
return await call_next(request)
path = request.url.path
if path.startswith("/api"):
path = path[4:]
if _is_path_allowed(path):
return await call_next(request)
is_gated = False
tenant_id = get_current_tenant_id()
if MULTI_TENANT:
try:
is_gated = is_tenant_gated(tenant_id)
except RedisError as e:
logger.warning(f"Failed to check tenant gating status: {e}")
# Fail open - don't block users due to Redis connectivity issues
is_gated = False
else:
try:
metadata = get_cached_license_metadata(tenant_id)
if metadata:
if metadata.status == ApplicationStatus.GATED_ACCESS:
is_gated = True
else:
# No license metadata = gated for self-hosted EE
is_gated = True
except RedisError as e:
logger.warning(f"Failed to check license metadata: {e}")
# Fail open - don't block users due to Redis connectivity issues
is_gated = False
if is_gated:
logger.info(f"Blocking request for gated tenant: {tenant_id}, path={path}")
return JSONResponse(
status_code=402,
content={
"detail": {
"error": "license_expired",
"message": "Your subscription has expired. Please update your billing.",
}
},
)
return await call_next(request)

View File

@@ -0,0 +1,214 @@
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session
from ee.onyx.server.query_and_chat.models import BasicCreateChatMessageRequest
from ee.onyx.server.query_and_chat.models import (
BasicCreateChatMessageWithHistoryRequest,
)
from onyx.auth.users import current_user
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.models import ChatBasicResponse
from onyx.chat.process_message import gather_stream
from onyx.chat.process_message import stream_chat_message_objects
from onyx.configs.constants import MessageType
from onyx.context.search.models import OptionalSearchSetting
from onyx.context.search.models import RetrievalDetails
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_or_create_root_message
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.llm.factory import get_llm_for_persona
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.utils.logger import setup_logger
logger = setup_logger()
router = APIRouter(prefix="/chat")
@router.post("/send-message-simple-api")
def handle_simplified_chat_message(
chat_message_req: BasicCreateChatMessageRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> ChatBasicResponse:
"""This is a Non-Streaming version that only gives back a minimal set of information"""
logger.notice(f"Received new simple api chat message: {chat_message_req.message}")
if not chat_message_req.message:
raise HTTPException(status_code=400, detail="Empty chat message is invalid")
# Handle chat session creation if chat_session_id is not provided
if chat_message_req.chat_session_id is None:
if chat_message_req.persona_id is None:
raise HTTPException(
status_code=400,
detail="Either chat_session_id or persona_id must be provided",
)
# Create a new chat session with the provided persona_id
try:
new_chat_session = create_chat_session(
db_session=db_session,
description="", # Leave empty for simple API
user_id=user.id if user else None,
persona_id=chat_message_req.persona_id,
)
chat_session_id = new_chat_session.id
except Exception as e:
logger.exception(e)
raise HTTPException(status_code=400, detail="Invalid Persona provided.")
else:
chat_session_id = chat_message_req.chat_session_id
try:
parent_message = create_chat_history_chain(
chat_session_id=chat_session_id, db_session=db_session
)[-1]
except Exception:
parent_message = get_or_create_root_message(
chat_session_id=chat_session_id, db_session=db_session
)
if (
chat_message_req.retrieval_options is None
and chat_message_req.search_doc_ids is None
):
retrieval_options: RetrievalDetails | None = RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
real_time=False,
)
else:
retrieval_options = chat_message_req.retrieval_options
full_chat_msg_info = CreateChatMessageRequest(
chat_session_id=chat_session_id,
parent_message_id=parent_message.id,
message=chat_message_req.message,
file_descriptors=[],
search_doc_ids=chat_message_req.search_doc_ids,
retrieval_options=retrieval_options,
# Simple API does not support reranking, hide complexity from user
rerank_settings=None,
query_override=chat_message_req.query_override,
# Currently only applies to search flow not chat
chunks_above=0,
chunks_below=0,
full_doc=chat_message_req.full_doc,
structured_response_format=chat_message_req.structured_response_format,
)
packets = stream_chat_message_objects(
new_msg_req=full_chat_msg_info,
user=user,
db_session=db_session,
)
return gather_stream(packets)
@router.post("/send-message-simple-with-history")
def handle_send_message_simple_with_history(
req: BasicCreateChatMessageWithHistoryRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> ChatBasicResponse:
"""This is a Non-Streaming version that only gives back a minimal set of information.
takes in chat history maintained by the caller
and does query rephrasing similar to answer-with-quote"""
if len(req.messages) == 0:
raise HTTPException(status_code=400, detail="Messages cannot be zero length")
# This is a sanity check to make sure the chat history is valid
# It must start with a user message and alternate beteen user and assistant
expected_role = MessageType.USER
for msg in req.messages:
if not msg.message:
raise HTTPException(
status_code=400, detail="One or more chat messages were empty"
)
if msg.role != expected_role:
raise HTTPException(
status_code=400,
detail="Message roles must start and end with MessageType.USER and alternate in-between.",
)
if expected_role == MessageType.USER:
expected_role = MessageType.ASSISTANT
else:
expected_role = MessageType.USER
query = req.messages[-1].message
msg_history = req.messages[:-1]
logger.notice(f"Received new simple with history chat message: {query}")
user_id = user.id if user is not None else None
chat_session = create_chat_session(
db_session=db_session,
description="handle_send_message_simple_with_history",
user_id=user_id,
persona_id=req.persona_id,
)
llm = get_llm_for_persona(persona=chat_session.persona, user=user)
llm_tokenizer = get_tokenizer(
model_name=llm.config.model_name,
provider_type=llm.config.model_provider,
)
# Every chat Session begins with an empty root message
root_message = get_or_create_root_message(
chat_session_id=chat_session.id, db_session=db_session
)
chat_message = root_message
for msg in msg_history:
chat_message = create_new_chat_message(
chat_session_id=chat_session.id,
parent_message=chat_message,
message=msg.message,
token_count=len(llm_tokenizer.encode(msg.message)),
message_type=msg.role,
db_session=db_session,
commit=False,
)
db_session.commit()
if req.retrieval_options is None and req.search_doc_ids is None:
retrieval_options: RetrievalDetails | None = RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
real_time=False,
)
else:
retrieval_options = req.retrieval_options
full_chat_msg_info = CreateChatMessageRequest(
chat_session_id=chat_session.id,
parent_message_id=chat_message.id,
message=query,
file_descriptors=[],
search_doc_ids=req.search_doc_ids,
retrieval_options=retrieval_options,
# Simple API does not support reranking, hide complexity from user
rerank_settings=None,
query_override=None,
chunks_above=0,
chunks_below=0,
full_doc=req.full_doc,
structured_response_format=req.structured_response_format,
)
packets = stream_chat_message_objects(
new_msg_req=full_chat_msg_info,
user=user,
db_session=db_session,
)
return gather_stream(packets)

View File

@@ -1,12 +1,18 @@
from collections.abc import Sequence
from datetime import datetime
from collections import OrderedDict
from typing import Literal
from uuid import UUID
from pydantic import BaseModel
from pydantic import Field
from pydantic import model_validator
from onyx.chat.models import ThreadMessage
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDoc
from onyx.context.search.models import BasicChunkRequest
from onyx.context.search.models import ChunkContext
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import RetrievalDetails
from onyx.server.manage.models import StandardAnswer
@@ -19,89 +25,119 @@ class StandardAnswerResponse(BaseModel):
standard_answers: list[StandardAnswer] = Field(default_factory=list)
class SearchFlowClassificationRequest(BaseModel):
user_query: str
class DocumentSearchRequest(BasicChunkRequest):
user_selected_filters: BaseFilters | None = None
class SearchFlowClassificationResponse(BaseModel):
is_search_flow: bool
class DocumentSearchResponse(BaseModel):
top_documents: list[InferenceChunk]
class SendSearchQueryRequest(BaseModel):
search_query: str
filters: BaseFilters | None = None
num_docs_fed_to_llm_selection: int | None = None
run_query_expansion: bool = False
num_hits: int = 50
class BasicCreateChatMessageRequest(ChunkContext):
"""If a chat_session_id is not provided, a persona_id must be provided to automatically create a new chat session
Note, for simplicity this option only allows for a single linear chain of messages
"""
include_content: bool = False
stream: bool = False
chat_session_id: UUID | None = None
# Optional persona_id to create a new chat session if chat_session_id is not provided
persona_id: int | None = None
# New message contents
message: str
# Defaults to using retrieval with no additional filters
retrieval_options: RetrievalDetails | None = None
# Allows the caller to specify the exact search query they want to use
# will disable Query Rewording if specified
query_override: str | None = None
# If search_doc_ids provided, then retrieval options are unused
search_doc_ids: list[int] | None = None
# only works if using an OpenAI model. See the following for more details:
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
@model_validator(mode="after")
def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
if self.chat_session_id is None and self.persona_id is None:
raise ValueError("Either chat_session_id or persona_id must be provided")
return self
class SearchDocWithContent(SearchDoc):
# Allows None because this is determined by a flag but the object used in code
# of the search path uses this type
content: str | None
class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
# Last element is the new query. All previous elements are historical context
messages: list[ThreadMessage]
persona_id: int
retrieval_options: RetrievalDetails | None = None
query_override: str | None = None
skip_rerank: bool | None = None
# If search_doc_ids provided, then retrieval options are unused
search_doc_ids: list[int] | None = None
# only works if using an OpenAI model. See the following for more details:
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
@classmethod
def from_inference_sections(
cls,
sections: Sequence[InferenceSection],
include_content: bool = False,
is_internet: bool = False,
) -> list["SearchDocWithContent"]:
"""Convert InferenceSections to SearchDocWithContent objects.
Args:
sections: Sequence of InferenceSection objects
include_content: If True, populate content field with combined_content
is_internet: Whether these are internet search results
class SimpleDoc(BaseModel):
id: str
semantic_identifier: str
link: str | None
blurb: str
match_highlights: list[str]
source_type: DocumentSource
metadata: dict | None
Returns:
List of SearchDocWithContent with optional content
class AgentSubQuestion(BaseModel):
sub_question: str
document_ids: list[str]
class AgentAnswer(BaseModel):
answer: str
answer_type: Literal["agent_sub_answer", "agent_level_answer"]
class AgentSubQuery(BaseModel):
sub_query: str
query_id: int
@staticmethod
def make_dict_by_level_and_question_index(
original_dict: dict[tuple[int, int, int], "AgentSubQuery"],
) -> dict[int, dict[int, list["AgentSubQuery"]]]:
"""Takes a dict of tuple(level, question num, query_id) to sub queries.
returns a dict of level to dict[question num to list of query_id's]
Ordering is asc for readability.
"""
if not sections:
return []
# In this function, when we sort int | None, we deliberately push None to the end
return [
cls(
document_id=(chunk := section.center_chunk).document_id,
chunk_ind=chunk.chunk_id,
semantic_identifier=chunk.semantic_identifier or "Unknown",
link=chunk.source_links[0] if chunk.source_links else None,
blurb=chunk.blurb,
source_type=chunk.source_type,
boost=chunk.boost,
hidden=chunk.hidden,
metadata=chunk.metadata,
score=chunk.score,
match_highlights=chunk.match_highlights,
updated_at=chunk.updated_at,
primary_owners=chunk.primary_owners,
secondary_owners=chunk.secondary_owners,
is_internet=is_internet,
content=section.combined_content if include_content else None,
# map entries to the level_question_dict
level_question_dict: dict[int, dict[int, list["AgentSubQuery"]]] = {}
for k1, obj in original_dict.items():
level = k1[0]
question = k1[1]
if level not in level_question_dict:
level_question_dict[level] = {}
if question not in level_question_dict[level]:
level_question_dict[level][question] = []
level_question_dict[level][question].append(obj)
# sort each query_id list and question_index
for key1, obj1 in level_question_dict.items():
for key2, value2 in obj1.items():
# sort the query_id list of each question_index
level_question_dict[key1][key2] = sorted(
value2, key=lambda o: o.query_id
)
# sort the question_index dict of level
level_question_dict[key1] = OrderedDict(
sorted(level_question_dict[key1].items(), key=lambda x: (x is None, x))
)
for section in sections
]
class SearchFullResponse(BaseModel):
all_executed_queries: list[str]
search_docs: list[SearchDocWithContent]
# Reasoning tokens output by the LLM for the document selection
doc_selection_reasoning: str | None = None
# This a list of document ids that are in the search_docs list
llm_selected_doc_ids: list[str] | None = None
# Error message if the search failed partway through
error: str | None = None
class SearchQueryResponse(BaseModel):
query: str
query_expansions: list[str] | None
created_at: datetime
class SearchHistoryResponse(BaseModel):
search_queries: list[SearchQueryResponse]
# sort the top dict of levels
sorted_dict = OrderedDict(
sorted(level_question_dict.items(), key=lambda x: (x is None, x))
)
return sorted_dict

View File

@@ -1,170 +0,0 @@
from collections.abc import Generator
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from ee.onyx.db.search import fetch_search_queries_for_user
from ee.onyx.search.process_search_query import gather_search_stream
from ee.onyx.search.process_search_query import stream_search_query
from ee.onyx.secondary_llm_flows.search_flow_classification import (
classify_is_search_flow,
)
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse
from ee.onyx.server.query_and_chat.models import SearchFullResponse
from ee.onyx.server.query_and_chat.models import SearchHistoryResponse
from ee.onyx.server.query_and_chat.models import SearchQueryResponse
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
from onyx.auth.users import current_user
from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import User
from onyx.llm.factory import get_default_llm
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
from onyx.server.utils import get_json_line
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
router = APIRouter(prefix="/search")
@router.post("/search-flow-classification")
def search_flow_classification(
request: SearchFlowClassificationRequest,
# This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
_: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SearchFlowClassificationResponse:
query = request.user_query
# This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document
# Most likely something needs to be done with the text included so we'll just classify it as a chat flow
if len(query) > 200:
return SearchFlowClassificationResponse(is_search_flow=False)
llm = get_default_llm()
check_llm_cost_limit_for_provider(
db_session=db_session,
tenant_id=get_current_tenant_id(),
llm_provider_api_key=llm.config.api_key,
)
try:
is_search_flow = classify_is_search_flow(query=query, llm=llm)
except Exception as e:
logger.exception(
"Search flow classification failed; defaulting to chat flow",
exc_info=e,
)
is_search_flow = False
return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
@router.post("/send-search-message", response_model=None)
def handle_send_search_message(
request: SendSearchQueryRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> StreamingResponse | SearchFullResponse:
"""
Execute a search query with optional streaming.
When stream=True: Returns StreamingResponse with SSE
When stream=False: Returns SearchFullResponse
"""
logger.debug(f"Received search query: {request.search_query}")
# Non-streaming path
if not request.stream:
try:
packets = stream_search_query(request, user, db_session)
return gather_search_stream(packets)
except NotImplementedError as e:
return SearchFullResponse(
all_executed_queries=[],
search_docs=[],
error=str(e),
)
# Streaming path
def stream_generator() -> Generator[str, None, None]:
try:
with get_session_with_current_tenant() as streaming_db_session:
for packet in stream_search_query(request, user, streaming_db_session):
yield get_json_line(packet.model_dump())
except NotImplementedError as e:
yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
except HTTPException:
raise
except Exception as e:
logger.exception("Error in search streaming")
yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
return StreamingResponse(stream_generator(), media_type="text/event-stream")
@router.get("/search-history")
def get_search_history(
limit: int = 100,
filter_days: int | None = None,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SearchHistoryResponse:
"""
Fetch past search queries for the authenticated user.
Args:
limit: Maximum number of queries to return (default 100)
filter_days: Only return queries from the last N days (optional)
Returns:
SearchHistoryResponse with list of search queries, ordered by most recent first.
"""
# Validate limit
if limit <= 0:
raise HTTPException(
status_code=400,
detail="limit must be greater than 0",
)
if limit > 1000:
raise HTTPException(
status_code=400,
detail="limit must be at most 1000",
)
# Validate filter_days
if filter_days is not None and filter_days <= 0:
raise HTTPException(
status_code=400,
detail="filter_days must be greater than 0",
)
# TODO(yuhong) remove this
if user is None:
# Return empty list for unauthenticated users
return SearchHistoryResponse(search_queries=[])
search_queries = fetch_search_queries_for_user(
db_session=db_session,
user_id=user.id,
filter_days=filter_days,
limit=limit,
)
return SearchHistoryResponse(
search_queries=[
SearchQueryResponse(
query=sq.query,
query_expansions=sq.query_expansions,
created_at=sq.created_at,
)
for sq in search_queries
]
)

View File

@@ -1,35 +0,0 @@
from typing import Literal
from pydantic import BaseModel
from pydantic import ConfigDict
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
class SearchQueriesPacket(BaseModel):
model_config = ConfigDict(frozen=True)
type: Literal["search_queries"] = "search_queries"
all_executed_queries: list[str]
class SearchDocsPacket(BaseModel):
model_config = ConfigDict(frozen=True)
type: Literal["search_docs"] = "search_docs"
search_docs: list[SearchDocWithContent]
class SearchErrorPacket(BaseModel):
model_config = ConfigDict(frozen=True)
type: Literal["search_error"] = "search_error"
error: str
class LLMSelectedDocsPacket(BaseModel):
model_config = ConfigDict(frozen=True)
type: Literal["llm_selected_docs"] = "llm_selected_docs"
# None if LLM selection failed, empty list if no docs selected, list of IDs otherwise
llm_selected_doc_ids: list[str] | None

View File

@@ -32,7 +32,6 @@ from onyx.configs.constants import MessageType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import QAFeedbackType
from onyx.configs.constants import QueryHistoryType
from onyx.configs.constants import SessionType
@@ -49,6 +48,7 @@ from onyx.file_store.file_store import get_default_file_store
from onyx.server.documents.models import PaginatedReturn
from onyx.server.query_and_chat.models import ChatSessionDetails
from onyx.server.query_and_chat.models import ChatSessionsResponse
from onyx.server.utils import PUBLIC_API_TAGS
from onyx.utils.threadpool_concurrency import parallel_yield
from shared_configs.contextvars import get_current_tenant_id

View File

@@ -1,54 +0,0 @@
"""EE Settings API - provides license-aware settings override."""
from redis.exceptions import RedisError
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
from ee.onyx.db.license import get_cached_license_metadata
from onyx.server.settings.models import ApplicationStatus
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
# Statuses that indicate a billing/license problem - propagate these to settings
_GATED_STATUSES = frozenset(
{
ApplicationStatus.GATED_ACCESS,
ApplicationStatus.GRACE_PERIOD,
ApplicationStatus.PAYMENT_REMINDER,
}
)
def apply_license_status_to_settings(settings: Settings) -> Settings:
"""EE version: checks license status for self-hosted deployments.
For self-hosted, looks up license metadata and overrides application_status
if the license is missing or indicates a problem (expired, grace period, etc.).
For multi-tenant (cloud), the settings already have the correct status
from the control plane, so no override is needed.
If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
allowing the product to function normally without license checks.
"""
if not LICENSE_ENFORCEMENT_ENABLED:
return settings
if MULTI_TENANT:
return settings
tenant_id = get_current_tenant_id()
try:
metadata = get_cached_license_metadata(tenant_id)
if metadata and metadata.status in _GATED_STATUSES:
settings.application_status = metadata.status
elif not metadata:
# No license = gated access for self-hosted EE
settings.application_status = ApplicationStatus.GATED_ACCESS
except RedisError as e:
logger.warning(f"Failed to check license metadata for settings: {e}")
return settings

View File

@@ -1,14 +1,10 @@
"""Tenant-specific usage limit overrides from the control plane (EE version)."""
import time
import requests
from ee.onyx.server.tenants.access import generate_data_plane_token
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.configs.app_configs import DEV_MODE
from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
from onyx.server.usage_limits import NO_LIMIT
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -16,12 +12,9 @@ logger = setup_logger()
# In-memory storage for tenant overrides (populated at startup)
_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None
_last_fetch_time: float = 0.0
_FETCH_INTERVAL = 60 * 60 * 24 # 24 hours
_ERROR_FETCH_INTERVAL = 30 * 60 # 30 minutes (if the last fetch failed)
def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:
def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides]:
"""
Fetch tenant-specific usage limit overrides from the control plane.
@@ -52,52 +45,33 @@ def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None
f"Failed to parse usage limit overrides for tenant {tenant_id}: {e}"
)
return (
result or None
) # if empty dictionary, something went wrong and we shouldn't enforce limits
return result
except requests.exceptions.RequestException as e:
logger.warning(f"Failed to fetch usage limit overrides from control plane: {e}")
return None
return {}
except Exception as e:
logger.error(f"Error parsing usage limit overrides: {e}")
return None
return {}
def load_usage_limit_overrides() -> None:
def load_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides]:
"""
Load tenant usage limit overrides from the control plane.
Called at server startup to populate the in-memory cache.
"""
global _tenant_usage_limit_overrides
global _last_fetch_time
logger.info("Loading tenant usage limit overrides from control plane...")
overrides = fetch_usage_limit_overrides()
_last_fetch_time = time.time()
# use the new result if it exists, otherwise use the old result
# (prevents us from updating to a failed fetch result)
_tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides
_tenant_usage_limit_overrides = overrides
if overrides:
logger.info(f"Loaded usage limit overrides for {len(overrides)} tenants")
else:
logger.info("No tenant-specific usage limit overrides found")
def unlimited(tenant_id: str) -> TenantUsageLimitOverrides:
return TenantUsageLimitOverrides(
tenant_id=tenant_id,
llm_cost_cents_trial=NO_LIMIT,
llm_cost_cents_paid=NO_LIMIT,
chunks_indexed_trial=NO_LIMIT,
chunks_indexed_paid=NO_LIMIT,
api_calls_trial=NO_LIMIT,
api_calls_paid=NO_LIMIT,
non_streaming_calls_trial=NO_LIMIT,
non_streaming_calls_paid=NO_LIMIT,
)
return overrides
def get_tenant_usage_limit_overrides(
@@ -112,22 +86,7 @@ def get_tenant_usage_limit_overrides(
Returns:
TenantUsageLimitOverrides if the tenant has overrides, None otherwise.
"""
if DEV_MODE: # in dev mode, we return unlimited limits for all tenants
return unlimited(tenant_id)
global _tenant_usage_limit_overrides
time_since = time.time() - _last_fetch_time
if (
_tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL
) or (time_since > _FETCH_INTERVAL):
logger.debug(
f"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}"
)
load_usage_limit_overrides()
# If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.
if _tenant_usage_limit_overrides is None or DEV_MODE:
return unlimited(tenant_id)
if _tenant_usage_limit_overrides is None:
_tenant_usage_limit_overrides = load_usage_limit_overrides()
return _tenant_usage_limit_overrides.get(tenant_id)

View File

@@ -1,9 +1,9 @@
from typing import cast
from typing import Literal
import requests
import stripe
from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.server.tenants.models import BillingInformation
@@ -16,21 +16,15 @@ stripe.api_key = STRIPE_SECRET_KEY
logger = setup_logger()
def fetch_stripe_checkout_session(
tenant_id: str,
billing_period: Literal["monthly", "annual"] = "monthly",
) -> str:
def fetch_stripe_checkout_session(tenant_id: str) -> str:
token = generate_data_plane_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
payload = {
"tenant_id": tenant_id,
"billing_period": billing_period,
}
response = requests.post(url, headers=headers, json=payload)
params = {"tenant_id": tenant_id}
response = requests.post(url, headers=headers, params=params)
response.raise_for_status()
return response.json()["sessionId"]
@@ -76,46 +70,24 @@ def fetch_billing_information(
return BillingInformation(**response_data)
def fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:
"""
Fetch a Stripe customer portal session URL from the control plane.
NOTE: This is currently only used for multi-tenant (cloud) deployments.
Self-hosted proxy endpoints will be added in a future phase.
"""
token = generate_data_plane_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
url = f"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session"
payload = {"tenant_id": tenant_id}
if return_url:
payload["return_url"] = return_url
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()["url"]
def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
"""
Update the number of seats for a tenant's subscription.
Preserves the existing price (monthly, annual, or grandfathered).
Send a request to the control service to register the number of users for a tenant.
"""
if not STRIPE_PRICE_ID:
raise Exception("STRIPE_PRICE_ID is not set")
response = fetch_tenant_stripe_information(tenant_id)
stripe_subscription_id = cast(str, response.get("stripe_subscription_id"))
subscription = stripe.Subscription.retrieve(stripe_subscription_id)
subscription_item = subscription["items"]["data"][0]
# Use existing price to preserve the customer's current plan
current_price_id = subscription_item.price.id
updated_subscription = stripe.Subscription.modify(
stripe_subscription_id,
items=[
{
"id": subscription_item.id,
"price": current_price_id,
"id": subscription["items"]["data"][0].id,
"price": STRIPE_PRICE_ID,
"quantity": number_of_users,
}
],

View File

@@ -1,41 +1,33 @@
import asyncio
import httpx
import stripe
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from ee.onyx.auth.users import current_admin_user
from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
from ee.onyx.server.tenants.access import control_plane_dep
from ee.onyx.server.tenants.billing import fetch_billing_information
from ee.onyx.server.tenants.billing import fetch_customer_portal_session
from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
from ee.onyx.server.tenants.models import ProductGatingRequest
from ee.onyx.server.tenants.models import ProductGatingResponse
from ee.onyx.server.tenants.models import StripePublishableKeyResponse
from ee.onyx.server.tenants.models import SubscriptionSessionResponse
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
from ee.onyx.server.tenants.product_gating import store_product_gating
from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from shared_configs.contextvars import get_current_tenant_id
stripe.api_key = STRIPE_SECRET_KEY
logger = setup_logger()
router = APIRouter(prefix="/tenants")
# Cache for Stripe publishable key to avoid hitting S3 on every request
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()
@router.post("/product-gating")
def gate_product(
@@ -90,17 +82,21 @@ async def billing_information(
async def create_customer_portal_session(
_: User = Depends(current_admin_user),
) -> dict:
"""
Create a Stripe customer portal session via the control plane.
NOTE: This is currently only used for multi-tenant (cloud) deployments.
Self-hosted proxy endpoints will be added in a future phase.
"""
tenant_id = get_current_tenant_id()
return_url = f"{WEB_DOMAIN}/admin/billing"
try:
portal_url = fetch_customer_portal_session(tenant_id, return_url)
return {"url": portal_url}
stripe_info = fetch_tenant_stripe_information(tenant_id)
stripe_customer_id = stripe_info.get("stripe_customer_id")
if not stripe_customer_id:
raise HTTPException(status_code=400, detail="Stripe customer ID not found")
logger.info(stripe_customer_id)
portal_session = stripe.billing_portal.Session.create(
customer=stripe_customer_id,
return_url=f"{WEB_DOMAIN}/admin/billing",
)
logger.info(portal_session)
return {"url": portal_session.url}
except Exception as e:
logger.exception("Failed to create customer portal session")
raise HTTPException(status_code=500, detail=str(e))
@@ -108,82 +104,15 @@ async def create_customer_portal_session(
@router.post("/create-subscription-session")
async def create_subscription_session(
request: CreateSubscriptionSessionRequest | None = None,
_: User = Depends(current_admin_user),
) -> SubscriptionSessionResponse:
try:
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
if not tenant_id:
raise HTTPException(status_code=400, detail="Tenant ID not found")
billing_period = request.billing_period if request else "monthly"
session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
session_id = fetch_stripe_checkout_session(tenant_id)
return SubscriptionSessionResponse(sessionId=session_id)
except Exception as e:
logger.exception("Failed to create subscription session")
logger.exception("Failed to create resubscription session")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stripe-publishable-key")
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
"""
Fetch the Stripe publishable key.
Priority: env var override (for testing) > S3 bucket (production).
This endpoint is public (no auth required) since publishable keys are safe to expose.
The key is cached in memory to avoid hitting S3 on every request.
"""
global _stripe_publishable_key_cache
# Fast path: return cached value without lock
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Use lock to prevent concurrent S3 requests
async with _stripe_key_lock:
# Double-check after acquiring lock (another request may have populated cache)
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Check for env var override first (for local testing with pk_test_* keys)
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
)
try:
async with httpx.AsyncClient() as client:
response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
response.raise_for_status()
key = response.text.strip()
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
)

View File

@@ -1,5 +1,4 @@
from datetime import datetime
from typing import Literal
from pydantic import BaseModel
@@ -74,12 +73,6 @@ class SubscriptionSessionResponse(BaseModel):
sessionId: str
class CreateSubscriptionSessionRequest(BaseModel):
"""Request to create a subscription checkout session."""
billing_period: Literal["monthly", "annual"] = "monthly"
class TenantByDomainResponse(BaseModel):
tenant_id: str
number_of_users: int
@@ -105,7 +98,3 @@ class PendingUserSnapshot(BaseModel):
class ApproveUserRequest(BaseModel):
email: str
class StripePublishableKeyResponse(BaseModel):
publishable_key: str

View File

@@ -65,9 +65,3 @@ def get_gated_tenants() -> set[str]:
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
def is_tenant_gated(tenant_id: str) -> bool:
"""Fast O(1) check if tenant is in gated set (multi-tenant only)."""
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))

View File

@@ -9,7 +9,6 @@ from ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user
from ee.onyx.db.token_limit import insert_user_group_token_rate_limit
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.token_limit import fetch_all_user_token_rate_limits
@@ -17,6 +16,7 @@ from onyx.db.token_limit import insert_user_token_rate_limit
from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
from onyx.server.token_rate_limits.models import TokenRateLimitDisplay
from onyx.server.utils import PUBLIC_API_TAGS
router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)

View File

@@ -1,5 +1,8 @@
"""EE Usage limits - trial detection via billing information."""
from datetime import datetime
from datetime import timezone
from ee.onyx.server.tenants.billing import fetch_billing_information
from ee.onyx.server.tenants.models import BillingInformation
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
@@ -28,7 +31,13 @@ def is_tenant_on_trial(tenant_id: str) -> bool:
return True
if isinstance(billing_info, BillingInformation):
return billing_info.status == "trialing"
# Check if trial is active
if billing_info.trial_end is not None:
now = datetime.now(timezone.utc)
# Trial active if trial_end is in the future
# and subscription status indicates trialing
if billing_info.trial_end > now and billing_info.status == "trialing":
return True
return False

View File

@@ -18,10 +18,10 @@ from ee.onyx.server.user_group.models import UserGroupCreate
from ee.onyx.server.user_group.models import UserGroupUpdate
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_curator_or_admin_user
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.db.models import UserRole
from onyx.server.utils import PUBLIC_API_TAGS
from onyx.utils.logger import setup_logger
logger = setup_logger()

View File

@@ -5,7 +5,6 @@ import json
import os
from datetime import datetime
from datetime import timezone
from pathlib import Path
from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives import hashes
@@ -20,27 +19,21 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
# Path to the license public key file
_LICENSE_PUBLIC_KEY_PATH = (
Path(__file__).parent.parent.parent.parent / "keys" / "license_public_key.pem"
)
# RSA-4096 Public Key for license verification
# Load from environment variable - key is generated on the control plane
# In production, inject via Kubernetes secrets or secrets manager
LICENSE_PUBLIC_KEY_PEM = os.environ.get("LICENSE_PUBLIC_KEY_PEM", "")
def _get_public_key() -> RSAPublicKey:
"""Load the public key from file, with env var override."""
# Allow env var override for flexibility
key_pem = os.environ.get("LICENSE_PUBLIC_KEY_PEM")
if not key_pem:
# Read from file
if not _LICENSE_PUBLIC_KEY_PATH.exists():
raise ValueError(
f"License public key not found at {_LICENSE_PUBLIC_KEY_PATH}. "
"License verification requires the control plane public key."
)
key_pem = _LICENSE_PUBLIC_KEY_PATH.read_text()
key = serialization.load_pem_public_key(key_pem.encode())
"""Load the public key from environment variable."""
if not LICENSE_PUBLIC_KEY_PEM:
raise ValueError(
"LICENSE_PUBLIC_KEY_PEM environment variable not set. "
"License verification requires the control plane public key."
)
key = serialization.load_pem_public_key(LICENSE_PUBLIC_KEY_PEM.encode())
if not isinstance(key, RSAPublicKey):
raise ValueError("Expected RSA public key")
return key
@@ -60,21 +53,17 @@ def verify_license_signature(license_data: str) -> LicensePayload:
ValueError: If license data is invalid or signature verification fails
"""
try:
# Decode the license data
decoded = json.loads(base64.b64decode(license_data))
# Parse into LicenseData to validate structure
license_obj = LicenseData(**decoded)
# IMPORTANT: Use the ORIGINAL payload JSON for signature verification,
# not re-serialized through Pydantic. Pydantic may format fields differently
# (e.g., datetime "+00:00" vs "Z") which would break signature verification.
original_payload = decoded.get("payload", {})
payload_json = json.dumps(original_payload, sort_keys=True)
payload_json = json.dumps(
license_obj.payload.model_dump(mode="json"), sort_keys=True
)
signature_bytes = base64.b64decode(license_obj.signature)
# Verify signature using PSS padding (modern standard)
public_key = _get_public_key()
public_key.verify(
signature_bytes,
payload_json.encode(),
@@ -88,18 +77,16 @@ def verify_license_signature(license_data: str) -> LicensePayload:
return license_obj.payload
except InvalidSignature:
logger.error("[verify_license] FAILED: Signature verification failed")
logger.error("License signature verification failed")
raise ValueError("Invalid license signature")
except json.JSONDecodeError as e:
logger.error(f"[verify_license] FAILED: JSON decode error: {e}")
except json.JSONDecodeError:
logger.error("Failed to decode license JSON")
raise ValueError("Invalid license format: not valid JSON")
except (ValueError, KeyError, TypeError) as e:
logger.error(
f"[verify_license] FAILED: Validation error: {type(e).__name__}: {e}"
)
raise ValueError(f"Invalid license format: {type(e).__name__}: {e}")
logger.error(f"License data validation error: {type(e).__name__}")
raise ValueError(f"Invalid license format: {type(e).__name__}")
except Exception:
logger.exception("[verify_license] FAILED: Unexpected error")
logger.exception("Unexpected error during license verification")
raise ValueError("License verification failed: unexpected error")

View File

@@ -1,14 +0,0 @@
-----BEGIN PUBLIC KEY-----
MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA5DpchQujdxjCwpc4/RQP
Hej6rc3SS/5ENCXL0I8NAfMogel0fqG6PKRhonyEh/Bt3P4q18y8vYzAShwf4b6Q
aS0WwshbvnkjyWlsK0BY4HLBKPkTpes7kaz8MwmPZDeelvGJ7SNv3FvyJR4QsoSQ
GSoB5iTH7hi63TjzdxtckkXoNG+GdVd/koxVDUv2uWcAoWIFTTcbKWyuq2SS/5Sf
xdVaIArqfAhLpnNbnM9OS7lZ1xP+29ZXpHxDoeluz35tJLMNBYn9u0y+puo1kW1E
TOGizlAq5kmEMsTJ55e9ZuyIV3gZAUaUKe8CxYJPkOGt0Gj6e1jHoHZCBJmaq97Y
stKj//84HNBzajaryEZuEfRecJ94ANEjkD8u9cGmW+9VxRe5544zWguP5WMT/nv1
0Q+jkOBW2hkY5SS0Rug4cblxiB7bDymWkaX6+sC0VWd5g6WXp36EuP2T0v3mYuHU
GDEiWbD44ToREPVwE/M07ny8qhLo/HYk2l8DKFt83hXe7ePBnyQdcsrVbQWOO1na
j43OkoU5gOFyOkrk2RmmtCjA8jSnw+tGCTpRaRcshqoWC1MjZyU+8/kDteXNkmv9
/B5VxzYSyX+abl7yAu5wLiUPW8l+mOazzWu0nPkmiA160ArxnRyxbGnmp4dUIrt5
azYku4tQYLSsSabfhcpeiCsCAwEAAQ==
-----END PUBLIC KEY-----

View File

@@ -97,14 +97,10 @@ def get_access_for_documents(
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
"""Returns a list of ACL entries that the user has access to.
This is meant to be used downstream to filter out documents that the user
does not have access to. The user should have access to a document if at
least one entry in the document's ACL matches one entry in the returned set.
NOTE: These strings must be formatted in the same way as the output of
DocumentAccess::to_acl.
"""Returns a list of ACL entries that the user has access to. This is meant to be
used downstream to filter out documents that the user does not have access to. The
user should have access to a document if at least one entry in the document's ACL
matches one entry in the returned set.
"""
if user:
return {prefix_user_email(user.email), PUBLIC_DOC_PAT}

View File

@@ -105,8 +105,6 @@ class DocExternalAccess:
)
# TODO(andrei): First refactor this into a pydantic model, then get rid of
# duplicate fields.
@dataclass(frozen=True, init=False)
class DocumentAccess(ExternalAccess):
# User emails for Onyx users, None indicates admin
@@ -125,11 +123,9 @@ class DocumentAccess(ExternalAccess):
)
def to_acl(self) -> set[str]:
"""Converts the access state to a set of formatted ACL strings.
# the acl's emitted by this function are prefixed by type
# to get the native objects, access the member variables directly
NOTE: When querying for documents, the supplied ACL filter strings must
be formatted in the same way as this function.
"""
acl_set: set[str] = set()
for user_email in self.user_emails:
if user_email:

View File

@@ -11,7 +11,6 @@ from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Protocol
from typing import Tuple
@@ -1457,9 +1456,6 @@ def get_default_admin_user_emails_() -> list[str]:
STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
STATE_TOKEN_LIFETIME_SECONDS = 3600
CSRF_TOKEN_KEY = "csrftoken"
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
class OAuth2AuthorizeResponse(BaseModel):
@@ -1467,24 +1463,18 @@ class OAuth2AuthorizeResponse(BaseModel):
def generate_state_token(
data: Dict[str, str],
secret: SecretType, # type: ignore[valid-type]
lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
) -> str:
data["aud"] = STATE_TOKEN_AUDIENCE
return generate_jwt(data, secret, lifetime_seconds)
def generate_csrf_token() -> str:
return secrets.token_urlsafe(32)
# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
def create_onyx_oauth_router(
oauth_client: BaseOAuth2,
backend: AuthenticationBackend,
state_secret: SecretType, # type: ignore[valid-type]
state_secret: SecretType,
redirect_url: Optional[str] = None,
associate_by_email: bool = False,
is_verified_by_default: bool = False,
@@ -1504,17 +1494,10 @@ def get_oauth_router(
oauth_client: BaseOAuth2,
backend: AuthenticationBackend,
get_user_manager: UserManagerDependency[models.UP, models.ID],
state_secret: SecretType, # type: ignore[valid-type]
state_secret: SecretType,
redirect_url: Optional[str] = None,
associate_by_email: bool = False,
is_verified_by_default: bool = False,
*,
csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
csrf_token_cookie_path: str = "/",
csrf_token_cookie_domain: Optional[str] = None,
csrf_token_cookie_secure: Optional[bool] = None,
csrf_token_cookie_httponly: bool = True,
csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
) -> APIRouter:
"""Generate a router with the OAuth routes."""
router = APIRouter()
@@ -1531,9 +1514,6 @@ def get_oauth_router(
route_name=callback_route_name,
)
if csrf_token_cookie_secure is None:
csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
@router.get(
"/authorize",
name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1541,10 +1521,8 @@ def get_oauth_router(
)
async def authorize(
request: Request,
response: Response,
redirect: bool = Query(False),
scopes: List[str] = Query(None),
) -> Response | OAuth2AuthorizeResponse:
) -> OAuth2AuthorizeResponse:
referral_source = request.cookies.get("referral_source", None)
if redirect_url is not None:
@@ -1554,11 +1532,9 @@ def get_oauth_router(
next_url = request.query_params.get("next", "/")
csrf_token = generate_csrf_token()
state_data: Dict[str, str] = {
"next_url": next_url,
"referral_source": referral_source or "default_referral",
CSRF_TOKEN_KEY: csrf_token,
}
state = generate_state_token(state_data, state_secret)
@@ -1575,31 +1551,6 @@ def get_oauth_router(
authorization_url, {"access_type": "offline", "prompt": "consent"}
)
if redirect:
redirect_response = RedirectResponse(authorization_url, status_code=302)
redirect_response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return redirect_response
response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return OAuth2AuthorizeResponse(authorization_url=authorization_url)
@log_function_time(print_only=True)
@@ -1649,33 +1600,7 @@ def get_oauth_router(
try:
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
except jwt.DecodeError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
),
)
except jwt.ExpiredSignatureError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode,
"ACCESS_TOKEN_ALREADY_EXPIRED",
"ACCESS_TOKEN_ALREADY_EXPIRED",
),
)
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
if (
not cookie_csrf_token
or not state_csrf_token
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)
next_url = state_data.get("next_url", "/")
referral_source = state_data.get("referral_source", None)

View File

@@ -26,13 +26,10 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.background.celery.celery_utils import make_probe_path
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.document_index.opensearch.client import (
wait_for_opensearch_with_timeout,
)
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_connector import RedisConnector
@@ -519,17 +516,14 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
"""Waits for Vespa to become ready subject to a timeout.
Raises WorkerShutdown if the timeout is reached."""
if ENABLE_OPENSEARCH_FOR_ONYX:
return
if not wait_for_vespa_with_timeout():
msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
if not wait_for_opensearch_with_timeout():
msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
# File for validating worker liveness
class LivenessProbe(bootsteps.StartStopStep):

View File

@@ -124,7 +124,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.kg_processing",
"onyx.background.celery.tasks.monitoring",
"onyx.background.celery.tasks.user_file_processing",
"onyx.background.celery.tasks.llm_model_update",
# Light worker tasks
"onyx.background.celery.tasks.shared",
"onyx.background.celery.tasks.vespa",
@@ -134,7 +133,5 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.docprocessing",
# Docfetching worker tasks
"onyx.background.celery.tasks.docfetching",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)

View File

@@ -98,7 +98,5 @@ for bootstep in base_bootsteps:
celery_app.autodiscover_tasks(
[
"onyx.background.celery.tasks.pruning",
# Sandbox tasks (file sync, cleanup)
"onyx.server.features.build.sandbox.tasks",
]
)

View File

@@ -116,7 +116,5 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.docprocessing",
# Sandbox cleanup tasks (isolated in build feature)
"onyx.server.features.build.sandbox.tasks",
]
)

View File

@@ -139,27 +139,6 @@ beat_task_templates: list[dict] = [
"queue": OnyxCeleryQueues.MONITORING,
},
},
# Sandbox cleanup tasks
{
"name": "cleanup-idle-sandboxes",
"task": OnyxCeleryTask.CLEANUP_IDLE_SANDBOXES,
"schedule": timedelta(minutes=1),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"queue": OnyxCeleryQueues.SANDBOX,
},
},
{
"name": "cleanup-old-snapshots",
"task": OnyxCeleryTask.CLEANUP_OLD_SNAPSHOTS,
"schedule": timedelta(hours=24),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"queue": OnyxCeleryQueues.SANDBOX,
},
},
]
if ENTERPRISE_EDITION_ENABLED:
@@ -195,7 +174,7 @@ if AUTO_LLM_CONFIG_URL:
"schedule": timedelta(seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS),
"options": {
"priority": OnyxCeleryPriority.LOW,
"expires": BEAT_EXPIRES_DEFAULT,
"expires": AUTO_LLM_UPDATE_INTERVAL_SECONDS,
},
}
)

View File

@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.httpx.httpx_pool import HttpxPool
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
callback=callback,
)
document_indices = get_all_document_indices(
document_index = get_default_document_index(
index_attempt.search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_indices=document_indices,
document_index=document_index,
ignore_time_skip=True, # Documents are already filtered during extraction
db_session=db_session,
tenant_id=tenant_id,

View File

@@ -5,9 +5,6 @@ from onyx.background.celery.apps.app_base import task_logger
from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
from onyx.configs.constants import OnyxCeleryTask
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.llm.well_known_providers.auto_update_service import (
sync_llm_models_from_github,
)
@shared_task(
@@ -29,9 +26,24 @@ def check_for_auto_llm_updates(self: Task, *, tenant_id: str) -> bool | None:
return None
try:
# Import here to avoid circular imports
from onyx.llm.well_known_providers.auto_update_service import (
fetch_llm_recommendations_from_github,
)
from onyx.llm.well_known_providers.auto_update_service import (
sync_llm_models_from_github,
)
# Fetch config from GitHub
config = fetch_llm_recommendations_from_github()
if not config:
task_logger.warning("Failed to fetch GitHub config")
return None
# Sync to database
with get_session_with_current_tenant() as db_session:
results = sync_llm_models_from_github(db_session)
results = sync_llm_models_from_github(db_session, config)
if results:
task_logger.info(f"Auto mode sync results: {results}")

View File

@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.relationships import delete_document_references_from_kg
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
@@ -97,17 +97,13 @@ def document_by_cc_pair_cleanup_task(
action = "skip"
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates and deletion so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
active_search_settings.primary,
active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
count = get_document_connector_count(db_session, document_id)
if count == 1:
@@ -117,12 +113,11 @@ def document_by_cc_pair_cleanup_task(
chunk_count = fetch_chunk_count_for_document(document_id, db_session)
for retry_document_index in retry_document_indices:
_ = retry_document_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
_ = retry_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
delete_document_references_from_kg(
db_session=db_session,
@@ -160,18 +155,14 @@ def document_by_cc_pair_cleanup_task(
hidden=doc.hidden,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# there are still other cc_pair references to the doc, so just resync to Vespa
delete_document_by_connector_credential_pair__no_commit(

View File

@@ -32,7 +32,7 @@ from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_active_search_settings_list
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.file_store.file_store import get_default_file_store
@@ -244,8 +244,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
search_settings=current_search_settings,
)
# This flow is for indexing so we get all indices.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
current_search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -259,7 +258,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_indices=document_indices,
document_index=document_index,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -413,16 +412,12 @@ def process_single_user_file_delete(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(document_index)
index_name = active_search_settings.primary.index_name
selection = f"{index_name}.document_id=='{user_file_id}'"
@@ -443,12 +438,11 @@ def process_single_user_file_delete(
else:
chunk_count = user_file.chunk_count
for retry_document_index in retry_document_indices:
retry_document_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
retry_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
file_store = get_default_file_store()
@@ -570,16 +564,12 @@ def process_single_user_file_project_sync(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if not user_file:
@@ -589,14 +579,13 @@ def process_single_user_file_project_sync(
return None
project_ids = [project.id for project in user_file.projects]
for retry_document_index in retry_document_indices:
retry_document_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
retry_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
task_logger.info(
f"process_single_user_file_project_sync - User file id={user_file_id}"

View File

@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_document_set import RedisDocumentSet
@@ -70,8 +70,6 @@ logger = setup_logger()
# celery auto associates tasks created inside another task,
# which bloats the result metadata considerably. trail=False prevents this.
# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
# generic *document_index*.
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
ignore_result=True,
@@ -467,17 +465,13 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
try:
with get_session_with_current_tenant() as db_session:
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
doc = get_document(document_id, db_session)
if not doc:
@@ -506,18 +500,14 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
# aggregated_boost_factor=doc.aggregated_boost_factor,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update db last. Worst case = we crash right before this and
# the sync might repeat again later

View File

@@ -31,20 +31,17 @@ from onyx.connectors.interfaces import CheckpointedConnector
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import Document
from onyx.connectors.models import IndexAttemptMetadata
from onyx.connectors.models import TextSection
from onyx.db.connector import mark_ccpair_with_indexing_trigger
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
from onyx.db.connector_credential_pair import get_last_successful_attempt_poll_range_end
from onyx.db.connector_credential_pair import update_connector_credential_pair
from onyx.db.constants import CONNECTOR_VALIDATION_ERROR_MESSAGE_PREFIX
from onyx.db.document import mark_document_as_indexed_for_cc_pair__no_commit
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import IndexingStatus
from onyx.db.enums import IndexModelStatus
from onyx.db.enums import ProcessingMode
from onyx.db.index_attempt import create_index_attempt_error
from onyx.db.index_attempt import get_index_attempt
from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
@@ -56,12 +53,7 @@ from onyx.db.models import IndexAttempt
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
from onyx.server.features.build.indexing.persistent_document_writer import (
get_persistent_document_writer,
)
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.variable_functionality import global_version
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR
@@ -375,7 +367,6 @@ def connector_document_extraction(
db_connector = index_attempt.connector_credential_pair.connector
db_credential = index_attempt.connector_credential_pair.credential
processing_mode = index_attempt.connector_credential_pair.processing_mode
is_primary = index_attempt.search_settings.status == IndexModelStatus.PRESENT
from_beginning = index_attempt.from_beginning
@@ -609,103 +600,34 @@ def connector_document_extraction(
logger.debug(f"Indexing batch of documents: {batch_description}")
memory_tracer.increment_and_maybe_trace()
# cc4a
if processing_mode == ProcessingMode.FILE_SYSTEM:
# File system only - write directly to persistent storage,
# skip chunking/embedding/Vespa but still track documents in DB
# Store documents in storage
batch_storage.store_batch(batch_num, doc_batch_cleaned)
with get_session_with_current_tenant() as db_session:
# Create metadata for the batch
index_attempt_metadata = IndexAttemptMetadata(
attempt_id=index_attempt_id,
connector_id=db_connector.id,
credential_id=db_credential.id,
request_id=make_randomized_onyx_request_id("FSI"),
structured_id=f"{tenant_id}:{cc_pair_id}:{index_attempt_id}:{batch_num}",
batch_num=batch_num,
)
# Create processing task data
processing_batch_data = {
"index_attempt_id": index_attempt_id,
"cc_pair_id": cc_pair_id,
"tenant_id": tenant_id,
"batch_num": batch_num, # 0-indexed
}
# Upsert documents to PostgreSQL (document table + cc_pair relationship)
# This is a subset of what docprocessing does - just DB tracking, no chunking/embedding
index_doc_batch_prepare(
documents=doc_batch_cleaned,
index_attempt_metadata=index_attempt_metadata,
db_session=db_session,
ignore_time_skip=True, # Documents already filtered during extraction
)
# Queue document processing task
app.send_task(
OnyxCeleryTask.DOCPROCESSING_TASK,
kwargs=processing_batch_data,
queue=OnyxCeleryQueues.DOCPROCESSING,
priority=docprocessing_priority,
)
# Mark documents as indexed for the CC pair
mark_document_as_indexed_for_cc_pair__no_commit(
connector_id=db_connector.id,
credential_id=db_credential.id,
document_ids=[doc.id for doc in doc_batch_cleaned],
db_session=db_session,
)
db_session.commit()
batch_num += 1
total_doc_batches_queued += 1
# Write documents to persistent file system
# Use creator_id for user-segregated storage paths (sandbox isolation)
creator_id = index_attempt.connector_credential_pair.creator_id
if creator_id is None:
raise ValueError(
f"ConnectorCredentialPair {index_attempt.connector_credential_pair.id} "
"must have a creator_id for persistent document storage"
)
user_id_str: str = str(creator_id)
writer = get_persistent_document_writer(
user_id=user_id_str,
tenant_id=tenant_id,
)
written_paths = writer.write_documents(doc_batch_cleaned)
# Update coordination directly (no docprocessing task)
with get_session_with_current_tenant() as db_session:
IndexingCoordination.update_batch_completion_and_docs(
db_session=db_session,
index_attempt_id=index_attempt_id,
total_docs_indexed=len(doc_batch_cleaned),
new_docs_indexed=len(doc_batch_cleaned),
total_chunks=0, # No chunks for file system mode
)
batch_num += 1
total_doc_batches_queued += 1
logger.info(
f"Wrote documents to file system: "
f"batch_num={batch_num} "
f"docs={len(written_paths)} "
f"attempt={index_attempt_id}"
)
else:
# REGULAR mode (default): Full pipeline - store and queue docprocessing
batch_storage.store_batch(batch_num, doc_batch_cleaned)
# Create processing task data
processing_batch_data = {
"index_attempt_id": index_attempt_id,
"cc_pair_id": cc_pair_id,
"tenant_id": tenant_id,
"batch_num": batch_num, # 0-indexed
}
# Queue document processing task
app.send_task(
OnyxCeleryTask.DOCPROCESSING_TASK,
kwargs=processing_batch_data,
queue=OnyxCeleryQueues.DOCPROCESSING,
priority=docprocessing_priority,
)
batch_num += 1
total_doc_batches_queued += 1
logger.info(
f"Queued document processing batch: "
f"batch_num={batch_num} "
f"docs={len(doc_batch_cleaned)} "
f"attempt={index_attempt_id}"
)
logger.info(
f"Queued document processing batch: "
f"batch_num={batch_num} "
f"docs={len(doc_batch_cleaned)} "
f"attempt={index_attempt_id}"
)
# Check checkpoint size periodically
CHECKPOINT_SIZE_CHECK_INTERVAL = 100
@@ -741,24 +663,6 @@ def connector_document_extraction(
total_batches=batch_num,
)
# Trigger file sync to user's sandbox (if running) - only for FILE_SYSTEM mode
# This syncs the newly written documents from S3 to any running sandbox pod
if processing_mode == ProcessingMode.FILE_SYSTEM:
creator_id = index_attempt.connector_credential_pair.creator_id
if creator_id:
app.send_task(
OnyxCeleryTask.SANDBOX_FILE_SYNC,
kwargs={
"user_id": str(creator_id),
"tenant_id": tenant_id,
},
queue=OnyxCeleryQueues.SANDBOX,
)
logger.info(
f"Triggered sandbox file sync for user {creator_id} "
f"after indexing complete"
)
except Exception as e:
logger.exception(
f"Document extraction failed: "

View File

@@ -1,57 +0,0 @@
from uuid import UUID
from redis.client import Redis
# Redis key prefixes for chat message processing
PREFIX = "chatprocessing"
FENCE_PREFIX = f"{PREFIX}_fence"
FENCE_TTL = 30 * 60 # 30 minutes
def _get_fence_key(chat_session_id: UUID) -> str:
"""
Generate the Redis key for a chat session processing a message.
Args:
chat_session_id: The UUID of the chat session
Returns:
The fence key string (tenant_id is automatically added by the Redis client)
"""
return f"{FENCE_PREFIX}_{chat_session_id}"
def set_processing_status(
chat_session_id: UUID, redis_client: Redis, value: bool
) -> None:
"""
Set or clear the fence for a chat session processing a message.
If the key exists, we are processing a message. If the key does not exist, we are not processing a message.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
value: True to set the fence, False to clear it
"""
fence_key = _get_fence_key(chat_session_id)
if value:
redis_client.set(fence_key, 0, ex=FENCE_TTL)
else:
redis_client.delete(fence_key)
def is_chat_session_processing(chat_session_id: UUID, redis_client: Redis) -> bool:
"""
Check if the chat session is processing a message.
Args:
chat_session_id: The UUID of the chat session
redis_client: The Redis client to use
Returns:
True if the chat session is processing a message, False otherwise
"""
fence_key = _get_fence_key(chat_session_id)
return bool(redis_client.exists(fence_key))

View File

@@ -7,7 +7,6 @@ from typing import Any
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.emitter import Emitter
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
@@ -16,11 +15,6 @@ from onyx.tools.models import ToolCallInfo
from onyx.utils.threadpool_concurrency import run_in_background
from onyx.utils.threadpool_concurrency import wait_on_background
# Type alias for search doc deduplication key
# Simple key: just document_id (str)
# Full key: (document_id, chunk_ind, match_highlights)
SearchDocKey = str | tuple[str, int, tuple[str, ...]]
class ChatStateContainer:
"""Container for accumulating state during LLM loop execution.
@@ -45,13 +39,7 @@ class ChatStateContainer:
self.citation_to_doc: CitationMapping = {}
# True if this turn is a clarification question (deep research flow)
self.is_clarification: bool = False
# Tool processing duration (time before answer starts) in seconds
self.tool_processing_duration: float | None = None
# Note: LLM cost tracking is now handled in multi_llm.py
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
# Track which citation numbers were actually emitted during streaming
self._emitted_citations: set[int] = set()
def add_tool_call(self, tool_call: ToolCallInfo) -> None:
"""Add a tool call to the accumulated state."""
@@ -103,68 +91,9 @@ class ChatStateContainer:
with self._lock:
return self.is_clarification
def set_tool_processing_duration(self, duration: float | None) -> None:
"""Set the tool processing duration (time before answer starts)."""
with self._lock:
self.tool_processing_duration = duration
def get_tool_processing_duration(self) -> float | None:
"""Thread-safe getter for tool_processing_duration."""
with self._lock:
return self.tool_processing_duration
@staticmethod
def create_search_doc_key(
search_doc: SearchDoc, use_simple_key: bool = True
) -> SearchDocKey:
"""Create a unique key for a SearchDoc for deduplication.
Args:
search_doc: The SearchDoc to create a key for
use_simple_key: If True (default), use only document_id for deduplication.
If False, include chunk_ind and match_highlights so that the same
document/chunk with different highlights are stored separately.
"""
if use_simple_key:
return search_doc.document_id
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def add_search_docs(
self, search_docs: list[SearchDoc], use_simple_key: bool = True
) -> None:
"""Add search docs to the accumulated collection with deduplication.
Args:
search_docs: List of SearchDoc objects to add
use_simple_key: If True (default), deduplicate by document_id only.
If False, deduplicate by document_id + chunk_ind + match_highlights.
"""
with self._lock:
for doc in search_docs:
key = self.create_search_doc_key(doc, use_simple_key)
if key not in self._all_search_docs:
self._all_search_docs[key] = doc
def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
"""Thread-safe getter for all accumulated search docs (returns a copy)."""
with self._lock:
return self._all_search_docs.copy()
def add_emitted_citation(self, citation_num: int) -> None:
"""Add a citation number that was actually emitted during streaming."""
with self._lock:
self._emitted_citations.add(citation_num)
def get_emitted_citations(self) -> set[int]:
"""Thread-safe getter for emitted citations (returns a copy)."""
with self._lock:
return self._emitted_citations.copy()
def run_chat_loop_with_state_containers(
func: Callable[..., None],
completion_callback: Callable[[ChatStateContainer], None],
is_connected: Callable[[], bool],
emitter: Emitter,
state_container: ChatStateContainer,
@@ -267,12 +196,3 @@ def run_chat_loop_with_state_containers(
# Skip waiting if user disconnected to exit quickly.
if is_connected():
wait_on_background(thread)
try:
completion_callback(state_container)
except Exception as e:
emitter.emit(
Packet(
placement=Placement(turn_index=last_turn_index + 1),
obj=PacketException(type="error", exception=e),
)
)

View File

@@ -18,10 +18,12 @@ from onyx.background.celery.tasks.kg_processing.kg_indexing import (
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import PersonaOverrideConfig
from onyx.chat.models import ThreadMessage
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import MessageType
from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
from onyx.context.search.enums import RecencyBiasSetting
from onyx.context.search.models import RerankingDetails
from onyx.context.search.models import RetrievalDetails
from onyx.db.chat import create_chat_session
from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_or_create_root_message
@@ -46,10 +48,13 @@ from onyx.kg.models import KGException
from onyx.kg.setup.kg_default_entity_definitions import (
populate_missing_default_entity_types__commit,
)
from onyx.llm.override_models import LLMOverride
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.prompts.chat_prompts import ADDITIONAL_CONTEXT_PROMPT
from onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE
from onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.custom.custom_tool import (
@@ -98,6 +103,89 @@ def create_chat_session_from_request(
)
def prepare_chat_message_request(
message_text: str,
user: User | None,
persona_id: int | None,
# Does the question need to have a persona override
persona_override_config: PersonaOverrideConfig | None,
message_ts_to_respond_to: str | None,
retrieval_details: RetrievalDetails | None,
rerank_settings: RerankingDetails | None,
db_session: Session,
skip_gen_ai_answer_generation: bool = False,
llm_override: LLMOverride | None = None,
allowed_tool_ids: list[int] | None = None,
forced_tool_ids: list[int] | None = None,
) -> CreateChatMessageRequest:
# Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
new_chat_session = create_chat_session(
db_session=db_session,
description=None,
user_id=user.id if user else None,
# If using an override, this id will be ignored later on
persona_id=persona_id or DEFAULT_PERSONA_ID,
onyxbot_flow=True,
slack_thread_id=message_ts_to_respond_to,
)
return CreateChatMessageRequest(
chat_session_id=new_chat_session.id,
parent_message_id=None, # It's a standalone chat session each time
message=message_text,
file_descriptors=[], # Currently SlackBot/answer api do not support files in the context
# Can always override the persona for the single query, if it's a normal persona
# then it will be treated the same
persona_override_config=persona_override_config,
search_doc_ids=None,
retrieval_options=retrieval_details,
rerank_settings=rerank_settings,
skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
llm_override=llm_override,
allowed_tool_ids=allowed_tool_ids,
forced_tool_ids=forced_tool_ids,
)
def combine_message_thread(
messages: list[ThreadMessage],
max_tokens: int | None,
llm_tokenizer: BaseTokenizer,
) -> str:
"""Used to create a single combined message context from threads"""
if not messages:
return ""
message_strs: list[str] = []
total_token_count = 0
for message in reversed(messages):
if message.role == MessageType.USER:
role_str = message.role.value.upper()
if message.sender:
role_str += " " + message.sender
else:
# Since other messages might have the user identifying information
# better to use Unknown for symmetry
role_str += " Unknown"
else:
role_str = message.role.value.upper()
msg_str = f"{role_str}:\n{message.message}"
message_token_count = len(llm_tokenizer.encode(msg_str))
if (
max_tokens is not None
and total_token_count + message_token_count > max_tokens
):
break
message_strs.insert(0, msg_str)
total_token_count += message_token_count
return "\n\n".join(message_strs)
def create_chat_history_chain(
chat_session_id: UUID,
db_session: Session,
@@ -159,6 +247,31 @@ def create_chat_history_chain(
return mainline_messages
def combine_message_chain(
messages: list[ChatMessage],
token_limit: int,
msg_limit: int | None = None,
) -> str:
"""Used for secondary LLM flows that require the chat history,"""
message_strs: list[str] = []
total_token_count = 0
if msg_limit is not None:
messages = messages[-msg_limit:]
for message in cast(list[ChatMessage], reversed(messages)):
message_token_count = message.token_count
if total_token_count + message_token_count > token_limit:
break
role = message.message_type.value.upper()
message_strs.insert(0, f"{role}:\n{message.message}")
total_token_count += message_token_count
return "\n\n".join(message_strs)
def reorganize_citations(
answer: str, citations: list[CitationInfo]
) -> tuple[str, list[CitationInfo]]:
@@ -299,7 +412,7 @@ def create_temporary_persona(
num_chunks=persona_config.num_chunks,
llm_relevance_filter=persona_config.llm_relevance_filter,
llm_filter_extraction=persona_config.llm_filter_extraction,
recency_bias=RecencyBiasSetting.BASE_DECAY,
recency_bias=persona_config.recency_bias,
llm_model_provider_override=persona_config.llm_model_provider_override,
llm_model_version_override=persona_config.llm_model_version_override,
)
@@ -469,71 +582,6 @@ def load_all_chat_files(
return files
def convert_chat_history_basic(
chat_history: list[ChatMessage],
token_counter: Callable[[str], int],
max_individual_message_tokens: int | None = None,
max_total_tokens: int | None = None,
) -> list[ChatMessageSimple]:
"""Convert ChatMessage history to ChatMessageSimple format with no tool calls or files included.
Args:
chat_history: List of ChatMessage objects to convert
token_counter: Function to count tokens in a message string
max_individual_message_tokens: If set, messages exceeding this number of tokens are dropped.
If None, no messages are dropped based on individual token count.
max_total_tokens: If set, maximum number of tokens allowed for the entire history.
If None, the history is not trimmed based on total token count.
Returns:
List of ChatMessageSimple objects
"""
# Defensive: treat a non-positive total budget as "no history".
if max_total_tokens is not None and max_total_tokens <= 0:
return []
# Convert only the core USER/ASSISTANT messages; omit files and tool calls.
converted: list[ChatMessageSimple] = []
for chat_message in chat_history:
if chat_message.message_type not in (MessageType.USER, MessageType.ASSISTANT):
continue
message = chat_message.message or ""
token_count = getattr(chat_message, "token_count", None)
if token_count is None:
token_count = token_counter(message)
# Drop any single message that would dominate the context window.
if (
max_individual_message_tokens is not None
and token_count > max_individual_message_tokens
):
continue
converted.append(
ChatMessageSimple(
message=message,
token_count=token_count,
message_type=chat_message.message_type,
image_files=None,
)
)
if max_total_tokens is None:
return converted
# Enforce a max total budget by keeping a contiguous suffix of the conversation.
trimmed_reversed: list[ChatMessageSimple] = []
total_tokens = 0
for msg in reversed(converted):
if total_tokens + msg.token_count > max_total_tokens:
break
trimmed_reversed.append(msg)
total_tokens += msg.token_count
return list(reversed(trimmed_reversed))
def convert_chat_history(
chat_history: list[ChatMessage],
files: list[ChatLoadedFile],

View File

@@ -4,15 +4,14 @@ Dynamic Citation Processor for LLM Responses
This module provides a citation processor that can:
- Accept citation number to SearchDoc mappings dynamically
- Process token streams from LLMs to extract citations
- Handle citations in three modes: REMOVE, KEEP_MARKERS, or HYPERLINK
- Emit CitationInfo objects for detected citations (in HYPERLINK mode)
- Track all seen citations regardless of mode
- Optionally replace citation markers with formatted markdown links
- Emit CitationInfo objects for detected citations (when replacing)
- Track all seen citations regardless of replacement mode
- Maintain a list of cited documents in order of first citation
"""
import re
from collections.abc import Generator
from enum import Enum
from typing import TypeAlias
from onyx.configs.chat_configs import STOP_STREAM_PAT
@@ -24,29 +23,6 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
class CitationMode(Enum):
"""Defines how citations should be handled in the output.
REMOVE: Citations are completely removed from output text.
No CitationInfo objects are emitted.
Use case: When you need to remove citations from the output if they are not shared with the user
(e.g. in discord bot, public slack bot).
KEEP_MARKERS: Original citation markers like [1], [2] are preserved unchanged.
No CitationInfo objects are emitted.
Use case: When you need to track citations in research agent and later process
them with collapse_citations() to renumber.
HYPERLINK: Citations are replaced with markdown links like [[1]](url).
CitationInfo objects are emitted for UI tracking.
Use case: Final reports shown to users with clickable links.
"""
REMOVE = "remove"
KEEP_MARKERS = "keep_markers"
HYPERLINK = "hyperlink"
CitationMapping: TypeAlias = dict[int, SearchDoc]
@@ -72,37 +48,29 @@ class DynamicCitationProcessor:
This processor is designed for multi-turn conversations where the citation
number to document mapping is provided externally. It processes streaming
tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and handles
them according to the configured CitationMode:
tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and based
on the `replace_citation_tokens` setting:
CitationMode.HYPERLINK (default):
When replace_citation_tokens=True (default):
1. Replaces citation markers with formatted markdown links (e.g., [[1]](url))
2. Emits CitationInfo objects for tracking
3. Maintains the order in which documents were first cited
Use case: Final reports shown to users with clickable links.
CitationMode.KEEP_MARKERS:
1. Preserves original citation markers like [1], [2] unchanged
When replace_citation_tokens=False:
1. Preserves original citation markers in the output text
2. Does NOT emit CitationInfo objects
3. Still tracks all seen citations via get_seen_citations()
Use case: When citations need later processing (e.g., renumbering).
CitationMode.REMOVE:
1. Removes citation markers entirely from the output text
2. Does NOT emit CitationInfo objects
3. Still tracks all seen citations via get_seen_citations()
Use case: Research agent intermediate reports.
Features:
- Accepts citation number → SearchDoc mapping via update_citation_mapping()
- Configurable citation mode at initialization
- Always tracks seen citations regardless of mode
- Configurable citation replacement behavior at initialization
- Always tracks seen citations regardless of replacement mode
- Holds back tokens that might be partial citations
- Maintains list of cited SearchDocs in order of first citation
- Handles unicode bracket variants (【】, )
- Skips citation processing inside code blocks
Example (HYPERLINK mode - default):
Example (with citation replacement - default):
processor = DynamicCitationProcessor()
# Set up citation mapping
@@ -119,8 +87,8 @@ class DynamicCitationProcessor:
# Get cited documents at the end
cited_docs = processor.get_cited_documents()
Example (KEEP_MARKERS mode):
processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
Example (without citation replacement):
processor = DynamicCitationProcessor(replace_citation_tokens=False)
processor.update_citation_mapping({1: search_doc1, 2: search_doc2})
# Process tokens from LLM
@@ -131,42 +99,26 @@ class DynamicCitationProcessor:
# Get all seen citations after processing
seen_citations = processor.get_seen_citations() # {1: search_doc1, ...}
Example (REMOVE mode):
processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
processor.update_citation_mapping({1: search_doc1, 2: search_doc2})
# Process tokens - citations are removed but tracked
for token in llm_stream:
for result in processor.process_token(token):
print(result) # Text without any citation markers
# Citations are still tracked
seen_citations = processor.get_seen_citations()
"""
def __init__(
self,
citation_mode: CitationMode = CitationMode.HYPERLINK,
replace_citation_tokens: bool = True,
stop_stream: str | None = STOP_STREAM_PAT,
):
"""
Initialize the citation processor.
Args:
citation_mode: How to handle citations in the output. One of:
- CitationMode.HYPERLINK (default): Replace [1] with [[1]](url)
and emit CitationInfo objects.
- CitationMode.KEEP_MARKERS: Keep original [1] markers unchanged,
no CitationInfo objects emitted.
- CitationMode.REMOVE: Remove citations entirely from output,
no CitationInfo objects emitted.
All modes track seen citations via get_seen_citations().
replace_citation_tokens: If True (default), citations like [1] are replaced
with formatted markdown links like [[1]](url) and CitationInfo objects
are emitted. If False, original citation text is preserved in output
and no CitationInfo objects are emitted. Regardless of this setting,
all seen citations are tracked and available via get_seen_citations().
stop_stream: Optional stop token pattern to halt processing early.
When this pattern is detected in the token stream, processing stops.
Defaults to STOP_STREAM_PAT from chat configs.
"""
# Citation mapping from citation number to SearchDoc
self.citation_to_doc: CitationMapping = {}
self.seen_citations: CitationMapping = {} # citation num -> SearchDoc
@@ -176,7 +128,7 @@ class DynamicCitationProcessor:
self.curr_segment = "" # tokens held for citation processing
self.hold = "" # tokens held for stop token processing
self.stop_stream = stop_stream
self.citation_mode = citation_mode
self.replace_citation_tokens = replace_citation_tokens
# Citation tracking
self.cited_documents_in_order: list[SearchDoc] = (
@@ -247,21 +199,19 @@ class DynamicCitationProcessor:
5. Handles stop tokens
6. Always tracks seen citations in self.seen_citations
Behavior depends on the `citation_mode` setting from __init__:
- HYPERLINK: Citations are replaced with [[n]](url) format and CitationInfo
Behavior depends on the `replace_citation_tokens` setting from __init__:
- If True: Citations are replaced with [[n]](url) format and CitationInfo
objects are yielded before each formatted citation
- KEEP_MARKERS: Original citation markers like [1] are preserved unchanged,
no CitationInfo objects are yielded
- REMOVE: Citations are removed entirely from output,
no CitationInfo objects are yielded
- If False: Original citation text (e.g., [1]) is preserved in output
and no CitationInfo objects are yielded
Args:
token: The next token from the LLM stream, or None to signal end of stream.
Pass None to flush any remaining buffered text at end of stream.
Yields:
str: Text chunks to display. Citation format depends on citation_mode.
CitationInfo: Citation metadata (only when citation_mode=HYPERLINK)
str: Text chunks to display. Citation format depends on replace_citation_tokens.
CitationInfo: Citation metadata (only when replace_citation_tokens=True)
"""
# None -> end of stream, flush remaining segment
if token is None:
@@ -349,17 +299,17 @@ class DynamicCitationProcessor:
if self.non_citation_count > 5:
self.recent_cited_documents.clear()
# Yield text before citation FIRST (preserve order)
if intermatch_str:
yield intermatch_str
# Process the citation (returns formatted citation text and CitationInfo objects)
# Always tracks seen citations regardless of citation_mode
# Always tracks seen citations regardless of strip_citations flag
citation_text, citation_info_list = self._process_citation(
match, has_leading_space
match, has_leading_space, self.replace_citation_tokens
)
if self.citation_mode == CitationMode.HYPERLINK:
# HYPERLINK mode: Replace citations with markdown links [[n]](url)
# Yield text before citation FIRST (preserve order)
if intermatch_str:
yield intermatch_str
if self.replace_citation_tokens:
# Yield CitationInfo objects BEFORE the citation text
# This allows the frontend to receive citation metadata before the token
# that contains [[n]](link), enabling immediate rendering
@@ -368,34 +318,10 @@ class DynamicCitationProcessor:
# Then yield the formatted citation text
if citation_text:
yield citation_text
elif self.citation_mode == CitationMode.KEEP_MARKERS:
# KEEP_MARKERS mode: Preserve original citation markers unchanged
# Yield text before citation
if intermatch_str:
yield intermatch_str
# Yield the original citation marker as-is
else:
# When not stripping, yield the original citation text unchanged
yield match.group()
else: # CitationMode.REMOVE
# REMOVE mode: Remove citations entirely from output
# This strips citation markers like [1], [2], 【1】 from the output text
# When removing citations, we need to handle spacing to avoid issues like:
# - "text [1] more" -> "text more" (double space)
# - "text [1]." -> "text ." (space before punctuation)
if intermatch_str:
remaining_text = self.curr_segment[match_span[1] :]
# Strip trailing space from intermatch if:
# 1. Remaining text starts with space (avoids double space)
# 2. Remaining text starts with punctuation (avoids space before punctuation)
if intermatch_str[-1].isspace() and remaining_text:
first_char = remaining_text[0]
# Check if next char is space or common punctuation
if first_char.isspace() or first_char in ".,;:!?)]}":
intermatch_str = intermatch_str.rstrip()
if intermatch_str:
yield intermatch_str
self.non_citation_count = 0
# Leftover text could be part of next citation
@@ -412,7 +338,7 @@ class DynamicCitationProcessor:
yield result
def _process_citation(
self, match: re.Match, has_leading_space: bool
self, match: re.Match, has_leading_space: bool, replace_tokens: bool = True
) -> tuple[str, list[CitationInfo]]:
"""
Process a single citation match and return formatted citation text and citation info objects.
@@ -423,28 +349,31 @@ class DynamicCitationProcessor:
This method always:
1. Extracts citation numbers from the match
2. Looks up the corresponding SearchDoc from the mapping
3. Tracks seen citations in self.seen_citations (regardless of citation_mode)
3. Tracks seen citations in self.seen_citations (regardless of replace_tokens)
When citation_mode is HYPERLINK:
When replace_tokens=True (controlled by self.replace_citation_tokens):
4. Creates formatted citation text as [[n]](url)
5. Creates CitationInfo objects for new citations
6. Handles deduplication of recently cited documents
When citation_mode is REMOVE or KEEP_MARKERS:
4. Returns empty string and empty list (caller handles output based on mode)
When replace_tokens=False:
4. Returns empty string and empty list (caller yields original match text)
Args:
match: Regex match object containing the citation pattern
has_leading_space: Whether the text immediately before this citation
ends with whitespace. Used to determine if a leading space should
be added to the formatted output.
replace_tokens: If True, return formatted text and CitationInfo objects.
If False, only track seen citations and return empty results.
This is passed from self.replace_citation_tokens by the caller.
Returns:
Tuple of (formatted_citation_text, citation_info_list):
- formatted_citation_text: Markdown-formatted citation text like
"[[1]](https://example.com)" or empty string if not in HYPERLINK mode
"[[1]](https://example.com)" or empty string if replace_tokens=False
- citation_info_list: List of CitationInfo objects for newly cited
documents, or empty list if not in HYPERLINK mode
documents, or empty list if replace_tokens=False
"""
citation_str: str = match.group() # e.g., '[1]', '[1, 2, 3]', '[[1]]', '【1】'
formatted = (
@@ -482,11 +411,11 @@ class DynamicCitationProcessor:
doc_id = search_doc.document_id
link = search_doc.link or ""
# Always track seen citations regardless of citation_mode setting
# Always track seen citations regardless of replace_tokens setting
self.seen_citations[num] = search_doc
# Only generate formatted citations and CitationInfo in HYPERLINK mode
if self.citation_mode != CitationMode.HYPERLINK:
# When not replacing citation tokens, skip the rest of the processing
if not replace_tokens:
continue
# Format the citation text as [[n]](link)
@@ -521,14 +450,14 @@ class DynamicCitationProcessor:
"""
Get the list of cited SearchDoc objects in the order they were first cited.
Note: This list is only populated when `citation_mode=HYPERLINK`.
When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
Note: This list is only populated when `replace_citation_tokens=True`.
When `replace_citation_tokens=False`, this will return an empty list.
Use get_seen_citations() instead if you need to track citations without
emitting CitationInfo objects.
replacing them.
Returns:
List of SearchDoc objects in the order they were first cited.
Empty list if citation_mode is not HYPERLINK.
Empty list if replace_citation_tokens=False.
"""
return self.cited_documents_in_order
@@ -536,14 +465,14 @@ class DynamicCitationProcessor:
"""
Get the list of cited document IDs in the order they were first cited.
Note: This list is only populated when `citation_mode=HYPERLINK`.
When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
Note: This list is only populated when `replace_citation_tokens=True`.
When `replace_citation_tokens=False`, this will return an empty list.
Use get_seen_citations() instead if you need to track citations without
emitting CitationInfo objects.
replacing them.
Returns:
List of document IDs (strings) in the order they were first cited.
Empty list if citation_mode is not HYPERLINK.
Empty list if replace_citation_tokens=False.
"""
return [doc.document_id for doc in self.cited_documents_in_order]
@@ -552,12 +481,12 @@ class DynamicCitationProcessor:
Get all seen citations as a mapping from citation number to SearchDoc.
This returns all citations that have been encountered during processing,
regardless of the `citation_mode` setting. Citations are tracked
regardless of the `replace_citation_tokens` setting. Citations are tracked
whenever they are parsed, making this useful for cases where you need to
know which citations appeared in the text without emitting CitationInfo objects.
know which citations appeared in the text without replacing them.
This is particularly useful when using REMOVE or KEEP_MARKERS mode, as
get_cited_documents() will be empty in those cases, but get_seen_citations()
This is particularly useful when `replace_citation_tokens=False`, as
get_cited_documents() will be empty in that case, but get_seen_citations()
will still contain all the citations that were found.
Returns:
@@ -572,13 +501,13 @@ class DynamicCitationProcessor:
"""
Get the number of unique documents that have been cited.
Note: This count is only updated when `citation_mode=HYPERLINK`.
When using REMOVE or KEEP_MARKERS mode, this will always return 0.
Note: This count is only updated when `replace_citation_tokens=True`.
When `replace_citation_tokens=False`, this will always return 0.
Use len(get_seen_citations()) instead if you need to count citations
without emitting CitationInfo objects.
without replacing them.
Returns:
Number of unique documents cited. 0 if citation_mode is not HYPERLINK.
Number of unique documents cited. 0 if replace_citation_tokens=False.
"""
return len(self.cited_document_ids)
@@ -590,9 +519,9 @@ class DynamicCitationProcessor:
CitationInfo objects for the same document when it's cited multiple times
in close succession. This method clears that tracker.
This is primarily useful when `citation_mode=HYPERLINK` to allow
This is primarily useful when `replace_citation_tokens=True` to allow
previously cited documents to emit CitationInfo objects again. Has no
effect when using REMOVE or KEEP_MARKERS mode.
effect when `replace_citation_tokens=False`.
The recent citation tracker is also automatically cleared when more than
5 non-citation characters are processed between citations.

View File

@@ -53,50 +53,6 @@ def update_citation_processor_from_tool_response(
citation_processor.update_citation_mapping(citation_to_doc)
def extract_citation_order_from_text(text: str) -> list[int]:
"""Extract citation numbers from text in order of first appearance.
Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
the citation numbers in the order they first appear in the text.
Args:
text: The text containing citations
Returns:
List of citation numbers in order of first appearance (no duplicates)
"""
# Same pattern used in collapse_citations and DynamicCitationProcessor
# Group 2 captures the number in double bracket format: [[1]], 【【1】】
# Group 4 captures the numbers in single bracket format: [1], [1, 2]
citation_pattern = re.compile(
r"([\[【[]{2}(\d+)[\]】]]{2})|([\[【[]([\d]+(?: *, *\d+)*)[\]】]])"
)
seen: set[int] = set()
order: list[int] = []
for match in citation_pattern.finditer(text):
# Group 2 is for double bracket single number, group 4 is for single bracket
if match.group(2):
nums_str = match.group(2)
elif match.group(4):
nums_str = match.group(4)
else:
continue
for num_str in nums_str.split(","):
num_str = num_str.strip()
if num_str:
try:
num = int(num_str)
if num not in seen:
seen.add(num)
order.append(num)
except ValueError:
continue
return order
def collapse_citations(
answer_text: str,
existing_citation_mapping: CitationMapping,

View File

@@ -1,4 +1,3 @@
import time
from collections.abc import Callable
from sqlalchemy.orm import Session
@@ -6,11 +5,9 @@ from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_utils import create_tool_call_failure_messages
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.citation_processor import CitationMode
from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.citation_utils import update_citation_processor_from_tool_response
from onyx.chat.emitter import Emitter
from onyx.chat.llm_step import extract_tool_calls_from_response_text
from onyx.chat.llm_step import run_llm_step
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ExtractedProjectFiles
@@ -40,13 +37,11 @@ from onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES
from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
from onyx.tools.interface import Tool
from onyx.tools.models import ToolCallInfo
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.images.models import (
FinalImageGenerationResponse,
)
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tracing.framework.create import trace
@@ -55,78 +50,6 @@ from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
def _try_fallback_tool_extraction(
llm_step_result: LlmStepResult,
tool_choice: ToolChoiceOptions,
fallback_extraction_attempted: bool,
tool_defs: list[dict],
turn_index: int,
) -> tuple[LlmStepResult, bool]:
"""Attempt to extract tool calls from response text as a fallback.
This is a last resort fallback for low quality LLMs or those that don't have
tool calling from the serving layer. Also triggers if there's reasoning but
no answer and no tool calls.
Args:
llm_step_result: The result from the LLM step
tool_choice: The tool choice option used for this step
fallback_extraction_attempted: Whether fallback extraction was already attempted
tool_defs: List of tool definitions
turn_index: The current turn index for placement
Returns:
Tuple of (possibly updated LlmStepResult, whether fallback was attempted this call)
"""
if fallback_extraction_attempted:
return llm_step_result, False
no_tool_calls = (
not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0
)
reasoning_but_no_answer_or_tools = (
llm_step_result.reasoning and not llm_step_result.answer and no_tool_calls
)
should_try_fallback = (
tool_choice == ToolChoiceOptions.REQUIRED and no_tool_calls
) or reasoning_but_no_answer_or_tools
if not should_try_fallback:
return llm_step_result, False
# Try to extract from answer first, then fall back to reasoning
extracted_tool_calls: list[ToolCallKickoff] = []
if llm_step_result.answer:
extracted_tool_calls = extract_tool_calls_from_response_text(
response_text=llm_step_result.answer,
tool_definitions=tool_defs,
placement=Placement(turn_index=turn_index),
)
if not extracted_tool_calls and llm_step_result.reasoning:
extracted_tool_calls = extract_tool_calls_from_response_text(
response_text=llm_step_result.reasoning,
tool_definitions=tool_defs,
placement=Placement(turn_index=turn_index),
)
if extracted_tool_calls:
logger.info(
f"Extracted {len(extracted_tool_calls)} tool call(s) from response text "
f"as fallback (tool_choice was REQUIRED but no tool calls returned)"
)
return (
LlmStepResult(
reasoning=llm_step_result.reasoning,
answer=llm_step_result.answer,
tool_calls=extracted_tool_calls,
),
True,
)
return llm_step_result, True
# Hardcoded oppinionated value, might breaks down to something like:
# Cycle 1: Calls web_search for something
# Cycle 2: Calls open_url for some results
@@ -374,7 +297,6 @@ def run_llm_loop(
forced_tool_id: int | None = None,
user_identity: LLMUserIdentity | None = None,
chat_session_id: str | None = None,
include_citations: bool = True,
) -> None:
with trace(
"run_llm_loop",
@@ -391,17 +313,8 @@ def run_llm_loop(
initialize_litellm()
# Track processing start time for tool duration calculation
processing_start_time = time.monotonic()
# Initialize citation processor for handling citations dynamically
# When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)
# When include_citations is False, use REMOVE mode to strip citations from output
citation_processor = DynamicCitationProcessor(
citation_mode=(
CitationMode.HYPERLINK if include_citations else CitationMode.REMOVE
)
)
citation_processor = DynamicCitationProcessor()
# Add project file citation mappings if project files are present
project_citation_mapping: CitationMapping = {}
@@ -431,7 +344,6 @@ def run_llm_loop(
ran_image_gen: bool = False
just_ran_web_search: bool = False
has_called_search_tool: bool = False
fallback_extraction_attempted: bool = False
citation_mapping: dict[int, str] = {} # Maps citation_num -> document_id/URL
default_base_system_prompt: str = get_default_base_system_prompt(db_session)
@@ -458,16 +370,12 @@ def run_llm_loop(
# The section below calculates the available tokens for history a bit more accurately
# now that project files are loaded in.
if persona and persona.replace_base_system_prompt:
if persona and persona.replace_base_system_prompt and persona.system_prompt:
# Handles the case where user has checked off the "Replace base system prompt" checkbox
system_prompt = (
ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
)
if persona.system_prompt
else None
system_prompt = ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
)
custom_agent_prompt_msg = None
else:
@@ -554,16 +462,10 @@ def run_llm_loop(
# This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
# It also pre-processes the tool calls in preparation for running them
tool_defs = [tool.tool_definition() for tool in final_tools]
# Calculate tool processing duration at this point
# This captures the time spent on tool calls before the answer starts streaming
tool_processing_duration = time.monotonic() - processing_start_time
llm_step_result, has_reasoned = run_llm_step(
emitter=emitter,
history=truncated_message_history,
tool_definitions=tool_defs,
tool_definitions=[tool.tool_definition() for tool in final_tools],
tool_choice=tool_choice,
llm=llm,
placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),
@@ -574,24 +476,10 @@ def run_llm_loop(
# final set of documents immediately if desired.
final_documents=gathered_documents,
user_identity=user_identity,
tool_processing_duration=tool_processing_duration,
)
if has_reasoned:
reasoning_cycles += 1
# Fallback extraction for LLMs that don't support tool calling natively or are lower quality
# and might incorrectly output tool calls in other channels
llm_step_result, attempted = _try_fallback_tool_extraction(
llm_step_result=llm_step_result,
tool_choice=tool_choice,
fallback_extraction_attempted=fallback_extraction_attempted,
tool_defs=tool_defs,
turn_index=llm_cycle_count + reasoning_cycles,
)
if attempted:
# To prevent the case of excessive looping with bad models, we only allow one fallback attempt
fallback_extraction_attempted = True
# Save citation mapping after each LLM step for incremental state updates
state_container.set_citation_mapping(citation_processor.citation_to_doc)
@@ -617,7 +505,7 @@ def run_llm_loop(
# in-flight citations
# It can be cleaned up but not super trivial or worthwhile right now
just_ran_web_search = False
parallel_tool_call_results = run_tool_calls(
tool_responses, citation_mapping = run_tool_calls(
tool_calls=tool_calls,
tools=final_tools,
message_history=truncated_message_history,
@@ -627,10 +515,7 @@ def run_llm_loop(
next_citation_num=citation_processor.get_next_citation_number(),
max_concurrent_tools=None,
skip_search_query_expansion=has_called_search_tool,
url_snippet_map=extract_url_snippet_map(gathered_documents or []),
)
tool_responses = parallel_tool_call_results.tool_responses
citation_mapping = parallel_tool_call_results.updated_citation_mapping
# Failure case, give something reasonable to the LLM to try again
if tool_calls and not tool_responses:
@@ -666,15 +551,8 @@ def run_llm_loop(
# Extract search_docs if this is a search tool response
search_docs = None
displayed_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
search_docs = tool_response.rich_response.search_docs
displayed_docs = tool_response.rich_response.displayed_docs
# Add ALL search docs to state container for DB persistence
if search_docs:
state_container.add_search_docs(search_docs)
if gathered_documents:
gathered_documents.extend(search_docs)
else:
@@ -692,12 +570,6 @@ def run_llm_loop(
):
generated_images = tool_response.rich_response.generated_images
saved_response = (
tool_response.rich_response
if isinstance(tool_response.rich_response, str)
else tool_response.llm_facing_response
)
tool_call_info = ToolCallInfo(
parent_tool_call_id=None, # Top-level tool calls are attached to the chat message
turn_index=llm_cycle_count + reasoning_cycles,
@@ -707,8 +579,8 @@ def run_llm_loop(
tool_id=tool.id,
reasoning_tokens=llm_step_result.reasoning, # All tool calls from this loop share the same reasoning
tool_call_arguments=tool_call.tool_args,
tool_call_response=saved_response,
search_docs=displayed_docs or search_docs,
tool_call_response=tool_response.llm_facing_response,
search_docs=search_docs,
generated_images=generated_images,
)
# Add to state container for partial save support
@@ -763,12 +635,7 @@ def run_llm_loop(
should_cite_documents = True
if not llm_step_result or not llm_step_result.answer:
raise RuntimeError(
"The LLM did not return an answer. "
"Typically this is an issue with LLMs that do not support tool calling natively, "
"or the model serving API is not configured correctly. "
"This may also happen with models that are lower quality outputting invalid tool calls."
)
raise RuntimeError("LLM did not return an answer.")
emitter.emit(
Packet(

View File

@@ -1,6 +1,5 @@
import json
import time
import uuid
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Mapping
@@ -14,7 +13,6 @@ from onyx.chat.emitter import Emitter
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import ChatFileType
@@ -50,7 +48,6 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tracing.framework.create import generation_span
from onyx.utils.b64 import get_image_type_from_bytes
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import find_all_json_objects
logger = setup_logger()
@@ -139,11 +136,12 @@ def _format_message_history_for_logging(
separator = "================================================"
# Handle single ChatCompletionMessage - wrap in list for uniform processing
if isinstance(
message_history, (SystemMessage, UserMessage, AssistantMessage, ToolMessage)
):
message_history = [message_history]
# Handle string input
if isinstance(message_history, str):
formatted_lines.append("Message [string]:")
formatted_lines.append(separator)
formatted_lines.append(f"{message_history}")
return "\n".join(formatted_lines)
# Handle sequence of messages
for i, msg in enumerate(message_history):
@@ -213,8 +211,7 @@ def _update_tool_call_with_delta(
if index not in tool_calls_in_progress:
tool_calls_in_progress[index] = {
# Fallback ID in case the provider never sends one via deltas.
"id": f"fallback_{uuid.uuid4().hex}",
"id": None,
"name": None,
"arguments": "",
}
@@ -280,144 +277,6 @@ def _extract_tool_call_kickoffs(
return tool_calls
def extract_tool_calls_from_response_text(
response_text: str | None,
tool_definitions: list[dict],
placement: Placement,
) -> list[ToolCallKickoff]:
"""Extract tool calls from LLM response text by matching JSON against tool definitions.
This is a fallback mechanism for when the LLM was expected to return tool calls
but didn't use the proper tool call format. It searches for JSON objects in the
response text that match the structure of available tools.
Args:
response_text: The LLM's text response to search for tool calls
tool_definitions: List of tool definitions to match against
placement: Placement information for the tool calls
Returns:
List of ToolCallKickoff objects for any matched tool calls
"""
if not response_text or not tool_definitions:
return []
# Build a map of tool names to their definitions
tool_name_to_def: dict[str, dict] = {}
for tool_def in tool_definitions:
if tool_def.get("type") == "function" and "function" in tool_def:
func_def = tool_def["function"]
tool_name = func_def.get("name")
if tool_name:
tool_name_to_def[tool_name] = func_def
if not tool_name_to_def:
return []
# Find all JSON objects in the response text
json_objects = find_all_json_objects(response_text)
tool_calls: list[ToolCallKickoff] = []
tab_index = 0
for json_obj in json_objects:
matched_tool_call = _try_match_json_to_tool(json_obj, tool_name_to_def)
if matched_tool_call:
tool_name, tool_args = matched_tool_call
tool_calls.append(
ToolCallKickoff(
tool_call_id=f"extracted_{uuid.uuid4().hex[:8]}",
tool_name=tool_name,
tool_args=tool_args,
placement=Placement(
turn_index=placement.turn_index,
tab_index=tab_index,
sub_turn_index=placement.sub_turn_index,
),
)
)
tab_index += 1
logger.info(
f"Extracted {len(tool_calls)} tool call(s) from response text as fallback"
)
return tool_calls
def _try_match_json_to_tool(
json_obj: dict[str, Any],
tool_name_to_def: dict[str, dict],
) -> tuple[str, dict[str, Any]] | None:
"""Try to match a JSON object to a tool definition.
Supports several formats:
1. Direct tool call format: {"name": "tool_name", "arguments": {...}}
2. Function call format: {"function": {"name": "tool_name", "arguments": {...}}}
3. Tool name as key: {"tool_name": {...arguments...}}
4. Arguments matching a tool's parameter schema
Args:
json_obj: The JSON object to match
tool_name_to_def: Map of tool names to their function definitions
Returns:
Tuple of (tool_name, tool_args) if matched, None otherwise
"""
# Format 1: Direct tool call format {"name": "...", "arguments": {...}}
if "name" in json_obj and json_obj["name"] in tool_name_to_def:
tool_name = json_obj["name"]
arguments = json_obj.get("arguments", json_obj.get("parameters", {}))
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except json.JSONDecodeError:
arguments = {}
if isinstance(arguments, dict):
return (tool_name, arguments)
# Format 2: Function call format {"function": {"name": "...", "arguments": {...}}}
if "function" in json_obj and isinstance(json_obj["function"], dict):
func_obj = json_obj["function"]
if "name" in func_obj and func_obj["name"] in tool_name_to_def:
tool_name = func_obj["name"]
arguments = func_obj.get("arguments", func_obj.get("parameters", {}))
if isinstance(arguments, str):
try:
arguments = json.loads(arguments)
except json.JSONDecodeError:
arguments = {}
if isinstance(arguments, dict):
return (tool_name, arguments)
# Format 3: Tool name as key {"tool_name": {...arguments...}}
for tool_name in tool_name_to_def:
if tool_name in json_obj:
arguments = json_obj[tool_name]
if isinstance(arguments, dict):
return (tool_name, arguments)
# Format 4: Check if the JSON object matches a tool's parameter schema
for tool_name, func_def in tool_name_to_def.items():
params = func_def.get("parameters", {})
properties = params.get("properties", {})
required = params.get("required", [])
if not properties:
continue
# Check if all required parameters are present (empty required = all optional)
if all(req in json_obj for req in required):
# Check if any of the tool's properties are in the JSON object
matching_props = [prop for prop in properties if prop in json_obj]
if matching_props:
# Filter to only include known properties
filtered_args = {k: v for k, v in json_obj.items() if k in properties}
return (tool_name, filtered_args)
return None
def translate_history_to_llm_format(
history: list[ChatMessageSimple],
llm_config: LLMConfig,
@@ -433,7 +292,7 @@ def translate_history_to_llm_format(
for idx, msg in enumerate(history):
# if the message is being added to the history
if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
if msg.message_type in [
MessageType.SYSTEM,
MessageType.USER,
MessageType.ASSISTANT,
@@ -622,7 +481,6 @@ def run_llm_step_pkt_generator(
# TODO: Temporary handling of nested tool calls with agents, figure out a better way to handle this
use_existing_tab_index: bool = False,
is_deep_research: bool = False,
tool_processing_duration: float | None = None,
) -> Generator[Packet, None, tuple[LlmStepResult, bool]]:
"""Run an LLM step and stream the response as packets.
NOTE: DO NOT TOUCH THIS FUNCTION BEFORE ASKING YUHONG, this is very finicky and
@@ -723,18 +581,6 @@ def run_llm_step_pkt_generator(
}
# Note: LLM cost tracking is now handled in multi_llm.py
delta = packet.choice.delta
# Weird behavior from some model providers, just log and ignore for now
if (
delta.content is None
and delta.reasoning_content is None
and delta.tool_calls is None
):
logger.warning(
f"LLM packet is empty (no contents, reasoning or tool calls). Skipping: {packet}"
)
continue
if not first_action_recorded and _delta_has_action(delta):
span_generation.span_data.time_to_first_action_seconds = (
time.monotonic() - stream_start_time
@@ -823,12 +669,6 @@ def run_llm_step_pkt_generator(
reasoning_start = False
if not answer_start:
# Store tool processing duration in state container for save_chat
if state_container and tool_processing_duration is not None:
state_container.set_tool_processing_duration(
tool_processing_duration
)
yield Packet(
placement=Placement(
turn_index=turn_index,
@@ -837,7 +677,6 @@ def run_llm_step_pkt_generator(
),
obj=AgentResponseStart(
final_documents=final_documents,
tool_processing_duration_seconds=tool_processing_duration,
),
)
answer_start = True
@@ -868,11 +707,6 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(
result.citation_number
)
else:
# When citation_processor is None, use delta.content directly without modification
accumulated_answer += delta.content
@@ -999,9 +833,6 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(result.citation_number)
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
@@ -1009,14 +840,14 @@ def run_llm_step_pkt_generator(
logger.debug(f"Accumulated reasoning: {accumulated_reasoning}")
logger.debug(f"Accumulated answer: {accumulated_answer}")
if tool_calls:
tool_calls_str = "\n".join(
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
for tc in tool_calls
)
logger.debug(f"Tool calls:\n{tool_calls_str}")
else:
logger.debug("Tool calls: []")
if tool_calls:
tool_calls_str = "\n".join(
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
for tc in tool_calls
)
logger.debug(f"Tool calls:\n{tool_calls_str}")
else:
logger.debug("Tool calls: []")
return (
LlmStepResult(
@@ -1046,7 +877,6 @@ def run_llm_step(
max_tokens: int | None = None,
use_existing_tab_index: bool = False,
is_deep_research: bool = False,
tool_processing_duration: float | None = None,
) -> tuple[LlmStepResult, bool]:
"""Wrapper around run_llm_step_pkt_generator that consumes packets and emits them.
@@ -1068,7 +898,6 @@ def run_llm_step(
max_tokens=max_tokens,
use_existing_tab_index=use_existing_tab_index,
is_deep_research=is_deep_research,
tool_processing_duration=tool_processing_duration,
)
while True:

View File

@@ -1,5 +1,6 @@
from collections.abc import Callable
from collections.abc import Iterator
from datetime import datetime
from enum import Enum
from typing import Any
from uuid import UUID
@@ -7,7 +8,10 @@ from uuid import UUID
from pydantic import BaseModel
from pydantic import Field
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.context.search.enums import QueryFlow
from onyx.context.search.enums import RecencyBiasSetting
from onyx.context.search.enums import SearchType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import FileDescriptor
@@ -20,6 +24,25 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType
# First chunk of info for streaming QA
class QADocsResponse(BaseModel):
top_documents: list[SearchDoc]
rephrased_query: str | None = None
predicted_flow: QueryFlow | None
predicted_search: SearchType | None
applied_source_filters: list[DocumentSource] | None
applied_time_cutoff: datetime | None
recency_bias_multiplier: float
def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
initial_dict = super().model_dump(mode="json", *args, **kwargs) # type: ignore
initial_dict["applied_time_cutoff"] = (
self.applied_time_cutoff.isoformat() if self.applied_time_cutoff else None
)
return initial_dict
class StreamStopReason(Enum):
CONTEXT_LENGTH = "context_length"
CANCELLED = "cancelled"
@@ -47,11 +70,22 @@ class UserKnowledgeFilePacket(BaseModel):
user_files: list[FileDescriptor]
class LLMRelevanceFilterResponse(BaseModel):
llm_selected_doc_indices: list[int]
class RelevanceAnalysis(BaseModel):
relevant: bool
content: str | None = None
class SectionRelevancePiece(RelevanceAnalysis):
"""LLM analysis mapped to an Inference Section"""
document_id: str
chunk_id: int # ID of the center chunk for a given inference section
class DocumentRelevance(BaseModel):
"""Contains all relevance information for a given search"""
@@ -82,6 +116,12 @@ class OnyxAnswer(BaseModel):
answer: str | None
class ThreadMessage(BaseModel):
message: str
sender: str | None = None
role: MessageType = MessageType.USER
class FileChatDisplay(BaseModel):
file_ids: list[str]
@@ -118,6 +158,7 @@ class PersonaOverrideConfig(BaseModel):
num_chunks: float | None = None
llm_relevance_filter: bool = False
llm_filter_extraction: bool = False
recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO
llm_model_provider_override: str | None = None
llm_model_version_override: str | None = None

View File

@@ -4,15 +4,11 @@ An overview can be found in the README.md file in this directory.
"""
import re
import time
import traceback
from collections.abc import Callable
from uuid import UUID
from redis.client import Redis
from sqlalchemy.orm import Session
from onyx.chat.chat_processing_checker import set_processing_status
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import run_chat_loop_with_state_containers
from onyx.chat.chat_utils import convert_chat_history
@@ -39,19 +35,16 @@ from onyx.chat.save_chat import save_chat_turn
from onyx.chat.stop_signal_checker import is_connected as check_stop_signal
from onyx.chat.stop_signal_checker import reset_cancel_status
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.context.search.models import BaseFilters
from onyx.context.search.enums import OptionalSearchSetting
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import reserve_message_id
from onyx.db.memory import get_memories
from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.projects import get_project_token_count
from onyx.db.projects import get_user_files_from_project
@@ -69,7 +62,6 @@ from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.server.query_and_chat.models import OptionalSearchSetting
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
@@ -86,30 +78,18 @@ from onyx.utils.logger import setup_logger
from onyx.utils.long_term_log import LongTermLogger
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.timing import log_function_time
from onyx.utils.variable_functionality import (
fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
ERROR_TYPE_CANCELLED = "cancelled"
def _should_enable_slack_search(
persona: Persona,
filters: BaseFilters | None,
) -> bool:
"""Determine if Slack search should be enabled.
class ToolCallException(Exception):
"""Exception raised for errors during tool calls."""
Returns True if:
- Source type filter exists and includes Slack, OR
- Default persona with no source type filter
"""
source_types = filters.source_type if filters else None
return (source_types is not None and DocumentSource.SLACK in source_types) or (
persona.id == DEFAULT_PERSONA_ID and source_types is None
)
def __init__(self, message: str, tool_name: str | None = None):
super().__init__(message)
self.tool_name = tool_name
def _extract_project_file_texts_and_images(
@@ -300,7 +280,6 @@ def handle_stream_message_objects(
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
litellm_additional_headers: dict[str, str] | None = None,
custom_tool_additional_headers: dict[str, str] | None = None,
mcp_headers: dict[str, str] | None = None,
bypass_acl: bool = False,
# Additional context that should be included in the chat history, for example:
# Slack threads where the conversation cannot be represented by a chain of User/Assistant
@@ -313,11 +292,8 @@ def handle_stream_message_objects(
external_state_container: ChatStateContainer | None = None,
) -> AnswerStream:
tenant_id = get_current_tenant_id()
processing_start_time = time.monotonic()
llm: LLM | None = None
chat_session: ChatSession | None = None
redis_client: Redis | None = None
user_id = user.id if user is not None else None
llm_user_identifier = (
@@ -363,24 +339,6 @@ def handle_stream_message_objects(
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
)
# Track user message in PostHog for analytics
fetch_versioned_implementation_with_fallback(
module="onyx.utils.telemetry",
attribute="event_telemetry",
fallback=noop_fallback,
)(
distinct_id=user.email if user else tenant_id,
event="user_message_sent",
properties={
"origin": new_msg_req.origin.value,
"has_files": len(new_msg_req.file_descriptors) > 0,
"has_project": chat_session.project_id is not None,
"has_persona": persona is not None and persona.id != DEFAULT_PERSONA_ID,
"deep_research": new_msg_req.deep_research,
"tenant_id": tenant_id,
},
)
llm = get_llm_for_persona(
persona=persona,
user=user,
@@ -422,10 +380,7 @@ def handle_stream_message_objects(
if new_msg_req.parent_message_id == AUTO_PLACE_AFTER_LATEST_MESSAGE:
# Auto-place after the latest message in the chain
parent_message = chat_history[-1] if chat_history else root_message
elif (
new_msg_req.parent_message_id is None
or new_msg_req.parent_message_id == root_message.id
):
elif new_msg_req.parent_message_id is None:
# None = regeneration from root
parent_message = root_message
# Truncate history since we're starting from root
@@ -525,15 +480,11 @@ def handle_stream_message_objects(
),
bypass_acl=bypass_acl,
slack_context=slack_context,
enable_slack_search=_should_enable_slack_search(
persona, new_msg_req.internal_search_filters
),
),
custom_tool_config=CustomToolConfig(
chat_session_id=chat_session.id,
message_id=user_message.id if user_message else None,
additional_headers=custom_tool_additional_headers,
mcp_headers=mcp_headers,
),
allowed_tool_ids=new_msg_req.allowed_tool_ids,
search_usage_forcing_setting=project_search_config.search_usage,
@@ -585,28 +536,10 @@ def handle_stream_message_objects(
def check_is_connected() -> bool:
return check_stop_signal(chat_session.id, redis_client)
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
value=True,
)
# Use external state container if provided, otherwise create internal one
# External container allows non-streaming callers to access accumulated state
state_container = external_state_container or ChatStateContainer()
def llm_loop_completion_callback(
state_container: ChatStateContainer,
) -> None:
llm_loop_completion_handle(
state_container=state_container,
db_session=db_session,
chat_session_id=str(chat_session.id),
is_connected=check_is_connected,
assistant_message=assistant_response,
processing_start_time=processing_start_time,
)
# Run the LLM loop with explicit wrapper for stop signal handling
# The wrapper runs run_llm_loop in a background thread and polls every 300ms
# for stop signals. run_llm_loop itself doesn't know about stopping.
@@ -622,7 +555,6 @@ def handle_stream_message_objects(
yield from run_chat_loop_with_state_containers(
run_deep_research_llm_loop,
llm_loop_completion_callback,
is_connected=check_is_connected,
emitter=emitter,
state_container=state_container,
@@ -639,7 +571,6 @@ def handle_stream_message_objects(
else:
yield from run_chat_loop_with_state_containers(
run_llm_loop,
llm_loop_completion_callback,
is_connected=check_is_connected, # Not passed through to run_llm_loop
emitter=emitter,
state_container=state_container,
@@ -655,9 +586,53 @@ def handle_stream_message_objects(
forced_tool_id=forced_tool_id,
user_identity=user_identity,
chat_session_id=str(chat_session.id),
include_citations=new_msg_req.include_citations,
)
# Determine if stopped by user
completed_normally = check_is_connected()
if not completed_normally:
logger.debug(f"Chat session {chat_session.id} stopped by user")
# Build final answer based on completion status
if completed_normally:
if state_container.answer_tokens is None:
raise RuntimeError(
"LLM run completed normally but did not return an answer."
)
final_answer = state_container.answer_tokens
else:
# Stopped by user - append stop message
if state_container.answer_tokens:
final_answer = (
state_container.answer_tokens
+ " ... The generation was stopped by the user here."
)
else:
final_answer = "The generation was stopped by the user."
# Build citation_docs_info from accumulated citations in state container
citation_docs_info: list[CitationDocInfo] = []
seen_citation_nums: set[int] = set()
for citation_num, search_doc in state_container.citation_to_doc.items():
if citation_num not in seen_citation_nums:
seen_citation_nums.add(citation_num)
citation_docs_info.append(
CitationDocInfo(
search_doc=search_doc,
citation_number=citation_num,
)
)
save_chat_turn(
message_text=final_answer,
reasoning_tokens=state_container.reasoning_tokens,
citation_docs_info=citation_docs_info,
tool_calls=state_container.tool_calls,
db_session=db_session,
assistant_message=assistant_response,
is_clarification=state_container.is_clarification,
)
except ValueError as e:
logger.exception("Failed to process chat message.")
@@ -675,7 +650,15 @@ def handle_stream_message_objects(
error_msg = str(e)
stack_trace = traceback.format_exc()
if llm:
if isinstance(e, ToolCallException):
yield StreamingError(
error=error_msg,
stack_trace=stack_trace,
error_code="TOOL_CALL_FAILED",
is_retryable=True,
details={"tool_name": e.tool_name} if e.tool_name else None,
)
elif llm:
client_error_msg, error_code, is_retryable = litellm_exception_to_error_msg(
e, llm
)
@@ -707,58 +690,7 @@ def handle_stream_message_objects(
)
db_session.rollback()
finally:
try:
if redis_client is not None and chat_session is not None:
set_processing_status(
chat_session_id=chat_session.id,
redis_client=redis_client,
value=False,
)
except Exception:
logger.exception("Error in setting processing status")
def llm_loop_completion_handle(
state_container: ChatStateContainer,
is_connected: Callable[[], bool],
db_session: Session,
chat_session_id: str,
assistant_message: ChatMessage,
processing_start_time: float | None = None,
) -> None:
# Determine if stopped by user
completed_normally = is_connected()
# Build final answer based on completion status
if completed_normally:
if state_container.answer_tokens is None:
raise RuntimeError(
"LLM run completed normally but did not return an answer."
)
final_answer = state_container.answer_tokens
else:
# Stopped by user - append stop message
logger.debug(f"Chat session {chat_session_id} stopped by user")
if state_container.answer_tokens:
final_answer = (
state_container.answer_tokens
+ " ... \n\nGeneration was stopped by the user."
)
else:
final_answer = "The generation was stopped by the user."
save_chat_turn(
message_text=final_answer,
reasoning_tokens=state_container.reasoning_tokens,
citation_to_doc=state_container.citation_to_doc,
tool_calls=state_container.tool_calls,
all_search_docs=state_container.get_all_search_docs(),
db_session=db_session,
assistant_message=assistant_message,
is_clarification=state_container.is_clarification,
emitted_citations=state_container.get_emitted_citations(),
tool_processing_duration=state_container.get_tool_processing_duration(),
)
return
def stream_chat_message_objects(
@@ -807,8 +739,6 @@ def stream_chat_message_objects(
deep_research=new_msg_req.deep_research,
parent_message_id=new_msg_req.parent_message_id,
chat_session_id=new_msg_req.chat_session_id,
origin=new_msg_req.origin,
include_citations=new_msg_req.include_citations,
)
return handle_stream_message_objects(
new_msg_req=translated_new_msg_req,

View File

@@ -18,7 +18,6 @@ from onyx.prompts.prompt_utils import handle_onyx_date_awareness
from onyx.prompts.prompt_utils import replace_citation_guidance_tag
from onyx.prompts.tool_prompts import GENERATE_IMAGE_GUIDANCE
from onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE
from onyx.prompts.tool_prompts import MEMORY_GUIDANCE
from onyx.prompts.tool_prompts import OPEN_URLS_GUIDANCE
from onyx.prompts.tool_prompts import PYTHON_TOOL_GUIDANCE
from onyx.prompts.tool_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE
@@ -29,7 +28,6 @@ from onyx.tools.interface import Tool
from onyx.tools.tool_implementations.images.image_generation_tool import (
ImageGenerationTool,
)
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
@@ -180,9 +178,8 @@ def build_system_prompt(
site_colon_disabled=WEB_SEARCH_SITE_DISABLED_GUIDANCE
)
+ OPEN_URLS_GUIDANCE
+ PYTHON_TOOL_GUIDANCE
+ GENERATE_IMAGE_GUIDANCE
+ MEMORY_GUIDANCE
+ PYTHON_TOOL_GUIDANCE
)
return system_prompt
@@ -196,7 +193,6 @@ def build_system_prompt(
has_generate_image = any(
isinstance(tool, ImageGenerationTool) for tool in tools
)
has_memory = any(isinstance(tool, MemoryTool) for tool in tools)
if has_web_search or has_internal_search or include_all_guidance:
system_prompt += TOOL_DESCRIPTION_SEARCH_GUIDANCE
@@ -226,7 +222,4 @@ def build_system_prompt(
if has_generate_image or include_all_guidance:
system_prompt += GENERATE_IMAGE_GUIDANCE
if has_memory or include_all_guidance:
system_prompt += MEMORY_GUIDANCE
return system_prompt

View File

@@ -2,9 +2,8 @@ import json
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import SearchDocKey
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import add_search_docs_to_chat_message
from onyx.db.chat import add_search_docs_to_tool_call
@@ -20,6 +19,22 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
"""
Create a unique key for a SearchDoc that accounts for different versions of the same
document/chunk with different match_highlights.
Args:
search_doc: The SearchDoc pydantic model to create a key for
Returns:
A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
this specific version of the document
"""
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def _create_and_link_tool_calls(
tool_calls: list[ToolCallInfo],
assistant_message: ChatMessage,
@@ -139,48 +154,44 @@ def save_chat_turn(
message_text: str,
reasoning_tokens: str | None,
tool_calls: list[ToolCallInfo],
citation_to_doc: dict[int, SearchDoc],
all_search_docs: dict[SearchDocKey, SearchDoc],
citation_docs_info: list[CitationDocInfo],
db_session: Session,
assistant_message: ChatMessage,
is_clarification: bool = False,
emitted_citations: set[int] | None = None,
tool_processing_duration: float | None = None,
) -> None:
"""
Save a chat turn by populating the assistant_message and creating related entities.
This function:
1. Updates the ChatMessage with text, reasoning tokens, and token count
2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
3. Builds tool_call -> search_doc mapping for displayed docs
4. Builds citation mapping from citation_to_doc
5. Links all unique SearchDocs to the ChatMessage
2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
4. Builds citation mapping from citation_docs_info
5. Links all unique SearchDocs from tool calls to the ChatMessage
6. Creates ToolCall entries and links SearchDocs to them
7. Builds the citations mapping for the ChatMessage
Deduplication Logic:
- SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
- This ensures that the same document/chunk with different match_highlights (from different
queries) are stored as separate SearchDoc entries
- Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
matches its specific query highlights
Args:
message_text: The message content to save
reasoning_tokens: Optional reasoning tokens for the message
tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
citation_to_doc: Mapping from citation number to SearchDoc for building citations
all_search_docs: Pre-deduplicated search docs from ChatStateContainer
citation_docs_info: List of citation document information for building citations mapping
db_session: Database session for persistence
assistant_message: The ChatMessage object to populate (should already exist in DB)
is_clarification: Whether this assistant message is a clarification question (deep research flow)
emitted_citations: Set of citation numbers that were actually emitted during streaming.
If provided, only citations in this set will be saved; others are filtered out.
tool_processing_duration: Duration of tool processing before answer starts (in seconds)
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
assistant_message.reasoning_tokens = reasoning_tokens
assistant_message.is_clarification = is_clarification
# Use tool processing duration (captured when MESSAGE_START was emitted)
if tool_processing_duration is not None:
assistant_message.processing_duration_seconds = tool_processing_duration
# Calculate token count using default tokenizer, when storing, this should not use the LLM
# specific one so we use a system default tokenizer here.
default_tokenizer = get_tokenizer(None, None)
@@ -189,53 +200,53 @@ def save_chat_turn(
else:
assistant_message.token_count = 0
# 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
search_doc_key_to_id: dict[SearchDocKey, int] = {}
for key, search_doc_py in all_search_docs.items():
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[key] = db_search_doc.id
# 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
# 2. Create SearchDoc entries from tool_calls
# Build mapping from SearchDoc to DB SearchDoc ID
# Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
# while ensuring different versions with different highlights are stored separately
search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
tool_call_to_search_doc_ids: dict[str, list[int]] = {}
# Process tool calls and their search docs
for tool_call_info in tool_calls:
if tool_call_info.search_docs:
search_doc_ids_for_tool: list[int] = []
for search_doc_py in tool_call_info.search_docs:
key = ChatStateContainer.create_search_doc_key(search_doc_py)
if key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[key])
# Create a unique key for this SearchDoc version
search_doc_key = _create_search_doc_key(search_doc_py)
# Check if we've already created this exact SearchDoc version
if search_doc_key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
else:
# Displayed doc not in all_search_docs - create it
# This can happen if displayed_docs contains docs not in search_docs
# Create new DB SearchDoc entry
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[key] = db_search_doc.id
search_doc_key_to_id[search_doc_key] = db_search_doc.id
search_doc_ids_for_tool.append(db_search_doc.id)
tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
set(search_doc_ids_for_tool)
)
# Collect all search doc IDs for ChatMessage linking
all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())
# 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
# Use a set to deduplicate by ID (since we've already deduplicated by key above)
all_search_doc_ids_set: set[int] = set()
for search_doc_ids in tool_call_to_search_doc_ids.values():
all_search_doc_ids_set.update(search_doc_ids)
# 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
# Only include citations that were actually emitted during streaming
# 4. Build citation mapping from citation_docs_info
citation_number_to_search_doc_id: dict[int, int] = {}
for citation_num, search_doc_py in citation_to_doc.items():
# Skip citations that weren't actually emitted (if emitted_citations is provided)
if emitted_citations is not None and citation_num not in emitted_citations:
continue
for citation_doc_info in citation_docs_info:
# Extract SearchDoc pydantic model
search_doc_py = citation_doc_info.search_doc
# Create the unique key for this SearchDoc version
search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)
search_doc_key = _create_search_doc_key(search_doc_py)
# Get the search doc ID (should already exist from processing tool_calls)
if search_doc_key in search_doc_key_to_id:
@@ -272,7 +283,10 @@ def save_chat_turn(
all_search_doc_ids_set.add(db_search_doc_id)
# Build mapping from citation number to search doc ID
citation_number_to_search_doc_id[citation_num] = db_search_doc_id
if citation_doc_info.citation_number is not None:
citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
db_search_doc_id
)
# 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
@@ -292,10 +306,23 @@ def save_chat_turn(
tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
)
# 7. Build citations mapping - use the mapping we already built in step 4
assistant_message.citations = (
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
)
# 7. Build citations mapping from citation_docs_info
# Any citation_doc_info with a citation_number appeared in the text and should be mapped
citations: dict[int, int] = {}
for citation_doc_info in citation_docs_info:
if citation_doc_info.citation_number is not None:
search_doc_id = citation_number_to_search_doc_id.get(
citation_doc_info.citation_number
)
if search_doc_id is not None:
citations[citation_doc_info.citation_number] = search_doc_id
else:
logger.warning(
f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
f"but no matching search doc ID in mapping"
)
assistant_message.citations = citations if citations else None
# Finally save the messages, tool calls, and docs
db_session.commit()

View File

@@ -22,14 +22,6 @@ APP_PORT = 8080
# prefix from requests directed towards the API server. In these cases, set this to `/api`
APP_API_PREFIX = os.environ.get("API_PREFIX", "")
# Certain services need to make HTTP requests to the API server, such as the MCP server and Discord bot
API_SERVER_PROTOCOL = os.environ.get("API_SERVER_PROTOCOL", "http")
API_SERVER_HOST = os.environ.get("API_SERVER_HOST", "127.0.0.1")
# This override allows self-hosting the MCP server with Onyx Cloud backend.
API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS = os.environ.get(
"API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS"
)
# Whether to send user metadata (user_id/email and session_id) to the LLM provider.
# Disabled by default.
SEND_USER_METADATA_TO_LLM_PROVIDER = (
@@ -207,23 +199,9 @@ OPENSEARCH_HOST = os.environ.get("OPENSEARCH_HOST") or "localhost"
OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 9200)
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")
USING_AWS_MANAGED_OPENSEARCH = (
os.environ.get("USING_AWS_MANAGED_OPENSEARCH", "").lower() == "true"
)
# This is the "base" config for now, the idea is that at least for our dev
# environments we always want to be dual indexing into both OpenSearch and Vespa
# to stress test the new codepaths. Only enable this if there is some instance
# of OpenSearch running for the relevant Onyx instance.
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
)
# Given that the "base" config above is true, this enables whether we want to
# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
# in the event we see issues with OpenSearch retrieval in our dev environments.
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
ENABLE_OPENSEARCH_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -590,7 +568,6 @@ JIRA_CONNECTOR_LABELS_TO_SKIP = [
JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
)
JIRA_SLIM_PAGE_SIZE = int(os.environ.get("JIRA_SLIM_PAGE_SIZE", 500))
GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")
@@ -752,10 +729,6 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default
LOG_ONYX_MODEL_INTERACTIONS = (
os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
)
PROMPT_CACHE_CHAT_HISTORY = (
os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
)
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
LOG_VESPA_TIMING_INFORMATION = (
@@ -876,7 +849,6 @@ AZURE_IMAGE_DEPLOYMENT_NAME = os.environ.get(
# configurable image model
IMAGE_MODEL_NAME = os.environ.get("IMAGE_MODEL_NAME", "gpt-image-1")
IMAGE_MODEL_PROVIDER = os.environ.get("IMAGE_MODEL_PROVIDER", "openai")
# Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH
MANAGED_VESPA = os.environ.get("MANAGED_VESPA", "").lower() == "true"
@@ -1023,36 +995,3 @@ COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")
VERTEXAI_DEFAULT_CREDENTIALS = os.environ.get("VERTEXAI_DEFAULT_CREDENTIALS")
VERTEXAI_DEFAULT_LOCATION = os.environ.get("VERTEXAI_DEFAULT_LOCATION", "global")
OPENROUTER_DEFAULT_API_KEY = os.environ.get("OPENROUTER_DEFAULT_API_KEY")
INSTANCE_TYPE = (
"managed"
if os.environ.get("IS_MANAGED_INSTANCE", "").lower() == "true"
else "cloud" if AUTH_TYPE == AuthType.CLOUD else "self_hosted"
)
## Discord Bot Configuration
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
## Stripe Configuration
# URL to fetch the Stripe publishable key from a public S3 bucket.
# Publishable keys are safe to expose publicly - they can only initialize
# Stripe.js and tokenize payment info, not make charges or access data.
STRIPE_PUBLISHABLE_KEY_URL = (
"https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
)
# Override for local testing with Stripe test keys (pk_test_*)
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
# Persistent Document Storage Configuration
# When enabled, indexed documents are written to local filesystem with hierarchical structure
PERSISTENT_DOCUMENT_STORAGE_ENABLED = (
os.environ.get("PERSISTENT_DOCUMENT_STORAGE_ENABLED", "").lower() == "true"
)
# Base directory path for persistent document storage (local filesystem)
# Example: /var/onyx/indexed-docs or /app/indexed-docs
PERSISTENT_DOCUMENT_STORAGE_PATH = os.environ.get(
"PERSISTENT_DOCUMENT_STORAGE_PATH", "/app/indexed-docs"
)

View File

@@ -1,5 +1,6 @@
import os
INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
NUM_RETURNED_HITS = 50
@@ -11,6 +12,9 @@ NUM_POSTPROCESSED_RESULTS = 20
# May be less depending on model
MAX_CHUNKS_FED_TO_CHAT = int(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 25)
# Maximum percentage of the context window to fill with selected sections
SELECTED_SECTIONS_MAX_WINDOW_PERCENTAGE = 0.8
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
# Capped in Vespa at 0.5
DOC_TIME_DECAY = float(
@@ -23,6 +27,11 @@ FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
# Currently only applies to search flow not chat
CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 1)
CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 1)
DISABLE_LLM_QUERY_REPHRASE = (
os.environ.get("DISABLE_LLM_QUERY_REPHRASE", "").lower() == "true"
)
# 1 edit per 20 characters, currently unused due to fuzzy match being too slow
QUOTE_ALLOWED_ERROR_PERCENT = 0.05
QA_TIMEOUT = int(os.environ.get("QA_TIMEOUT") or "60") # 60 seconds
# Weighting factor between Vector and Keyword Search, 1 for completely vector search
HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.5)))
@@ -37,6 +46,34 @@ TITLE_CONTENT_RATIO = max(
0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.10))
)
# A list of languages passed to the LLM to rephase the query
# For example "English,French,Spanish", be sure to use the "," separator
# TODO these are not used, should probably reintroduce these
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
LANGUAGE_HINT = "\n" + (
os.environ.get("LANGUAGE_HINT")
or "IMPORTANT: Respond in the same language as my query!"
)
LANGUAGE_CHAT_NAMING_HINT = (
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
or "The name of the conversation must be in the same language as the user query."
)
# Number of prompts each persona should have
NUM_PERSONA_PROMPTS = 4
NUM_PERSONA_PROMPT_GENERATION_CHUNKS = 5
# Agentic search takes significantly more tokens and therefore has much higher cost.
# This configuration allows users to get a search-only experience with instant results
# and no involvement from the LLM.
# Additionally, some LLM providers have strict rate limits which may prohibit
# sending many API requests at once (as is done in agentic search).
# Whether the LLM should evaluate all of the document chunks passed in for usefulness
# in relation to the user query
DISABLE_LLM_DOC_RELEVANCE = (
os.environ.get("DISABLE_LLM_DOC_RELEVANCE", "").lower() == "true"
)
# Stops streaming answers back to the UI if this pattern is seen:
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
@@ -49,6 +86,9 @@ HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"
NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)
# Enable in-house model for detecting connector-based filtering in queries
ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)
VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)
# Whether or not to use the semantic & keyword search expansions for Basic Search
@@ -56,3 +96,5 @@ USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
== "true"
)
USE_DIV_CON_AGENT = os.environ.get("USE_DIV_CON_AGENT", "false").lower() == "true"

View File

@@ -7,7 +7,6 @@ from enum import Enum
ONYX_DEFAULT_APPLICATION_NAME = "Onyx"
ONYX_DISCORD_URL = "https://discord.gg/4NA5SbzrWb"
ONYX_UTM_SOURCE = "onyx_app"
SLACK_USER_TOKEN_PREFIX = "xoxp-"
SLACK_BOT_TOKEN_PREFIX = "xoxb-"
ONYX_EMAILABLE_LOGO_MAX_DIM = 512
@@ -23,9 +22,6 @@ PUBLIC_DOC_PAT = "PUBLIC"
ID_SEPARATOR = ":;:"
DEFAULT_BOOST = 0
# Tag for endpoints that should be included in the public API documentation
PUBLIC_API_TAGS: list[str | Enum] = ["public"]
# Cookies
FASTAPI_USERS_AUTH_COOKIE_NAME = (
"fastapiusersauth" # Currently a constant, but logic allows for configuration
@@ -93,7 +89,6 @@ SSL_CERT_FILE = "bundle.pem"
DANSWER_API_KEY_PREFIX = "API_KEY__"
DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "onyxapikey.ai"
UNNAMED_KEY_PLACEHOLDER = "Unnamed"
DISCORD_SERVICE_API_KEY_NAME = "discord-bot-service"
# Key-Value store keys
KV_REINDEX_KEY = "needs_reindexing"
@@ -240,8 +235,6 @@ class NotificationType(str, Enum):
PERSONA_SHARED = "persona_shared"
TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending" # 2 days left in trial
RELEASE_NOTES = "release_notes"
ASSISTANT_FILES_READY = "assistant_files_ready"
FEATURE_ANNOUNCEMENT = "feature_announcement"
class BlobType(str, Enum):
@@ -328,7 +321,6 @@ class FileOrigin(str, Enum):
PLAINTEXT_CACHE = "plaintext_cache"
OTHER = "other"
QUERY_HISTORY_CSV = "query_history_csv"
SANDBOX_SNAPSHOT = "sandbox_snapshot"
USER_FILE = "user_file"
@@ -346,7 +338,6 @@ class MilestoneRecordType(str, Enum):
MULTIPLE_ASSISTANTS = "multiple_assistants"
CREATED_ASSISTANT = "created_assistant"
CREATED_ONYX_BOT = "created_onyx_bot"
REQUESTED_CONNECTOR = "requested_connector"
class PostgresAdvisoryLocks(Enum):
@@ -386,9 +377,6 @@ class OnyxCeleryQueues:
# KG processing queue
KG_PROCESSING = "kg_processing"
# Sandbox processing queue
SANDBOX = "sandbox"
class OnyxRedisLocks:
PRIMARY_WORKER = "da_lock:primary_worker"
@@ -434,13 +422,6 @@ class OnyxRedisLocks:
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
# Release notes
RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES_BEAT_LOCK = "da_lock:cleanup_idle_sandboxes_beat"
CLEANUP_OLD_SNAPSHOTS_BEAT_LOCK = "da_lock:cleanup_old_snapshots_beat"
class OnyxRedisSignals:
BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
@@ -566,13 +547,6 @@ class OnyxCeleryTask:
CHECK_KG_PROCESSING_CLUSTERING_ONLY = "check_kg_processing_clustering_only"
KG_RESET_SOURCE_INDEX = "kg_reset_source_index"
# Sandbox cleanup
CLEANUP_IDLE_SANDBOXES = "cleanup_idle_sandboxes"
CLEANUP_OLD_SNAPSHOTS = "cleanup_old_snapshots"
# Sandbox file sync
SANDBOX_FILE_SYNC = "sandbox_file_sync"
# this needs to correspond to the matching entry in supervisord
ONYX_CELERY_BEAT_HEARTBEAT_KEY = "onyx:celery:beat:heartbeat"

View File

@@ -4,6 +4,8 @@ import os
# Onyx Slack Bot Configs
#####
ONYX_BOT_NUM_RETRIES = int(os.environ.get("ONYX_BOT_NUM_RETRIES", "5"))
# How much of the available input context can be used for thread context
MAX_THREAD_CONTEXT_PERCENTAGE = 512 * 2 / 3072
# Number of docs to display in "Reference Documents"
ONYX_BOT_NUM_DOCS_TO_DISPLAY = int(os.environ.get("ONYX_BOT_NUM_DOCS_TO_DISPLAY", "5"))
# If the LLM fails to answer, Onyx can still show the "Reference Documents"
@@ -45,6 +47,10 @@ ONYX_BOT_MAX_WAIT_TIME = int(os.environ.get("ONYX_BOT_MAX_WAIT_TIME") or 180)
# Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback.
# Set to 0 to disable it (default)
ONYX_BOT_FEEDBACK_REMINDER = int(os.environ.get("ONYX_BOT_FEEDBACK_REMINDER") or 0)
# Set to True to rephrase the Slack users messages
ONYX_BOT_REPHRASE_MESSAGE = (
os.environ.get("ONYX_BOT_REPHRASE_MESSAGE", "").lower() == "true"
)
# ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of
# responses OnyxBot can send in a given time period.

Some files were not shown because too many files have changed in this diff Show More