Compare commits

...

75 Commits

Author SHA1 Message Date
Yuhong Sun
b1e92d8e8f k 2026-01-23 17:17:28 -08:00
Danelegend
0594fd17de chore(tests): add more packet tests (#7677) 2026-01-23 19:49:41 +00:00
Jamison Lahman
fded81dc28 chore(extensions): pull in chrome extension (#7703) 2026-01-23 10:17:05 -08:00
Danelegend
31db112de9 feat(url): Open url around snippet (#7488) 2026-01-23 17:02:38 +00:00
Jamison Lahman
a3e2da2c51 chore(vscode): add useful database operations (#7702) 2026-01-23 08:49:59 -08:00
Evan Lohn
f4d33bcc0d feat: basic user MCP action attaching (#7681) 2026-01-23 05:50:49 +00:00
Jamison Lahman
464d957494 chore(devtools): upgrade ods v0.4.0; vscode to restore seeded db (#7696) 2026-01-23 05:21:46 +00:00
Jamison Lahman
be12de9a44 chore(devtools): ods db restore --fetch-seeded (#7689) 2026-01-22 20:41:28 -08:00
Yuhong Sun
3e4a1f8a09 feat: Maintain correct docs on replay (#7683) 2026-01-22 19:24:10 -08:00
Raunak Bhagat
af9b7826ab fix: Remove cursor pointer from view-only field (#7688)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-01-23 02:47:08 +00:00
Danelegend
cb16eb13fc chore(tests): Mock LLM (#7590) 2026-01-23 01:48:54 +00:00
Jamison Lahman
20a73bdd2e chore(desktop): make artifact filename version-agnostic (#7679) 2026-01-22 15:15:52 -08:00
Justin Tahara
85cc2b99b7 fix(fastapi): Resolve CVE-2025-68481 (#7661) 2026-01-22 20:07:25 +00:00
Jamison Lahman
1208a3ee2b chore(fe): disable blur when there is not a custom background (#7673)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-01-22 11:26:16 -08:00
Justin Tahara
900fcef9dd feat(desktop): Domain Configuration (#7655) 2026-01-22 18:15:44 +00:00
Justin Tahara
d4ed25753b fix(ui): Coda Logo (#7656) 2026-01-22 10:10:02 -08:00
Justin Tahara
0ee58333b4 fix(ui): User Groups Connectors Fix (#7658) 2026-01-22 17:59:12 +00:00
Justin Tahara
11b7e0d571 fix(ui): First Connector Result (#7657) 2026-01-22 17:52:02 +00:00
acaprau
a35831f328 fix(opensearch): Release Onyx Helm Charts was failing (#7672) 2026-01-22 17:41:47 +00:00
Justin Tahara
048a6d5259 fix(ui): Fix Token Rate Limits Page (#7659) 2026-01-22 17:20:21 +00:00
Ciaran Sweet
e4bdb15910 docs: enhance send-chat-message docs to also show ChatFullResponse (#7430) 2026-01-22 16:48:26 +00:00
Jamison Lahman
3517d59286 chore(fe): add custom backgrounds to the settings page (#7668) 2026-01-21 21:32:56 -08:00
Jamison Lahman
4bc08e5d88 chore(fe): remove Text pseudo-element padding (#7665) 2026-01-21 19:50:42 -08:00
Yuhong Sun
4bd080cf62 chore: Redirect user to create account (#7654) 2026-01-22 02:44:58 +00:00
Raunak Bhagat
b0a8625ffc feat: Add confirmation modal for connector disconnect (#7637)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 02:08:19 +00:00
Yuhong Sun
f94baf6143 fix: DR Language Tuning (#7660) 2026-01-21 17:36:50 -08:00
Wenxi
9e1867638a feat: onyx discord bot - frontend (#7497) 2026-01-22 00:00:12 +00:00
Yuhong Sun
5b6d7c9f0d chore: Onboarding Image Generation (#7653) 2026-01-21 15:49:15 -08:00
Danelegend
e5dcf31f10 fix(image): Emit error to user (#7644) 2026-01-21 22:50:12 +00:00
Nikolas Garza
8ca06ef3e7 fix: deflake chat user journey test (#7646) 2026-01-21 22:33:30 +00:00
Justin Tahara
6897dbd610 feat(desktop): Properly Sign Mac App (#7608) 2026-01-21 22:17:45 +00:00
Evan Lohn
7f3cb77466 chore: remove prompt caching from chat history (#7636) 2026-01-21 21:35:11 +00:00
acaprau
267042a5aa fix(opensearch): Use the same method for getting title that the title embedding logic uses; small cleanup for content embedding (#7638) 2026-01-21 21:34:38 +00:00
Yuhong Sun
d02b3ae6ac chore: Remove default prompt shortcuts (#7639) 2026-01-21 21:28:53 +00:00
Yuhong Sun
683c3f7a7e fix: color mode and memories (#7642) 2026-01-21 13:29:33 -08:00
Nikolas Garza
008b4d2288 fix(slack): Extract person names and filter garbage in query expansion (#7632) 2026-01-21 21:09:50 +00:00
Jamison Lahman
8be261405a chore(deployments): fix region (#7640) 2026-01-21 13:14:42 -08:00
acaprau
61f2c48ebc feat(opensearch): Add helm charts (#7606) 2026-01-21 19:34:18 +00:00
acaprau
dbde2e6d6d chore(opensearch): Create OpenSearch docker compose, enabling test_opensearch_client.py to run in CI (#7611) 2026-01-21 18:41:23 +00:00
Raunak Bhagat
2860136214 feat: Refreshed user settings page (#7455)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 16:41:56 +00:00
Raunak Bhagat
49ec5994d3 refactor: Improve refresh-components with cleanup and truncation (#7622)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 00:29:25 -08:00
Raunak Bhagat
8d5fb67f0f feat: improve prompt shortcuts with uniqueness constraints and enhancements (#7619)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 07:31:35 +00:00
Raunak Bhagat
15d02f6e3c fix: Prevent description duplication in Modal header (#7609)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 04:32:22 +00:00
Jamison Lahman
e58974c419 chore(fe): move chatpage footer inside background element (#7618) 2026-01-21 04:21:49 +00:00
Yuhong Sun
6b66c07952 chore: Delete multilingual docker compose file (#7616) 2026-01-20 19:50:01 -08:00
Jamison Lahman
cae058a3ac chore(extensions): simplify and de-dupe NRFPage (#7607) 2026-01-21 03:42:19 +00:00
Nikolas Garza
aa3b21a191 fix: scroll to bottom when loading existing conversations (#7614) 2026-01-20 19:19:18 -08:00
Raunak Bhagat
7a07a78696 fix: Set width to fit for rightChildren section in LineItem (#7604)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 01:55:03 +00:00
Nikolas Garza
a8db236e37 feat(billing): fetch Stripe publishable key from S3 (#7595) 2026-01-21 01:32:57 +00:00
Raunak Bhagat
8a2e4ed36f fix: Fix flashing in progress-circle icon (#7605)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 01:03:52 +00:00
Evan Lohn
216f2c95a7 chore: add dialog description to modal (#7603) 2026-01-21 00:41:35 +00:00
Evan Lohn
67081efe08 fix: modal header in index attempt errors (#7601) 2026-01-21 00:37:23 +00:00
Yuhong Sun
9d40b8336f feat: Allow no system prompt (#7600) 2026-01-20 16:16:39 -08:00
Evan Lohn
23f0033302 chore: bg services launch.json (#7597) 2026-01-21 00:05:20 +00:00
Raunak Bhagat
9011b76eb0 refactor: Add new layout component (#7588)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 23:36:18 +00:00
Yuhong Sun
45e436bafc fix: prompt tunings (#7594) 2026-01-20 15:13:05 -08:00
Justin Tahara
010bc36d61 Revert "chore(deps): Bump fastapi-users from 14.0.1 to 15.0.2 in /backend/requirements" (#7593) 2026-01-20 14:44:21 -08:00
dependabot[bot]
468e488bdb chore(deps): bump docker/setup-buildx-action from 3.11.1 to 3.12.0 (#7527)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-20 22:36:39 +00:00
dependabot[bot]
9104c0ffce chore(deps): Bump fastapi-users from 14.0.1 to 15.0.2 in /backend/requirements (#6897)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: justin-tahara <justintahara@gmail.com>
2026-01-20 22:31:02 +00:00
Jamison Lahman
d36a6bd0b4 feat(fe): custom chat backgrounds (#7486)
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
2026-01-20 14:29:06 -08:00
Jamison Lahman
a3603c498c chore(deployments): fetch secrets from AWS (#7584) 2026-01-20 22:10:19 +00:00
Jamison Lahman
8f274e34c9 chore(blame): unignore checked in .vscode/ files (#7592) 2026-01-20 14:07:27 -08:00
Justin Tahara
5c256760ff fix(vertex ai): Extra Args for Opus 4.5 (#7586) 2026-01-20 14:07:14 -08:00
Nikolas Garza
258e1372b3 fix(billing): remove grandfathered pricing option when subscription lapses (#7583) 2026-01-20 21:55:37 +00:00
Yuhong Sun
83a543a265 chore: NLTK and stopwords (#7587) 2026-01-20 13:36:04 -08:00
Evan Lohn
f9719d199d fix: drive connector creation ui (#7578) 2026-01-20 21:10:06 +00:00
Raunak Bhagat
1c7bb6e56a fix: Input variant refactor (#7579)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 13:04:16 -08:00
acaprau
982ad7d329 feat(opensearch): Add dual document indices (#7539) 2026-01-20 20:53:24 +00:00
Jamison Lahman
f94292808b chore(vscode): launch.template.jsonc -> launch.json (#7440) 2026-01-20 20:32:46 +00:00
Justin Tahara
293553a2e2 fix(tests): Anthropic Prompt Caching Test (#7585) 2026-01-20 20:32:24 +00:00
Justin Tahara
ba906ae6fa chore(llm): Removing Claude Haiku 3.5 (#7577) 2026-01-20 19:06:14 +00:00
Raunak Bhagat
c84c7a354e refactor: refactor to use string-enum props instead of boolean props (#7575)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 18:59:54 +00:00
Jamison Lahman
2187b0dd82 chore(pre-commit): disallow large files (#7576) 2026-01-20 11:02:00 -08:00
acaprau
d88a417bf9 feat(opensearch): Formally disable secondary indices in the backend (#7541) 2026-01-20 18:21:47 +00:00
Jamison Lahman
f2d32b0b3b fix(fe): inline code text wraps (#7574) 2026-01-20 17:11:42 +00:00
291 changed files with 19678 additions and 5370 deletions

View File

@@ -8,7 +8,9 @@ on:
# Set restrictive default permissions for all jobs. Jobs that need more permissions
# should explicitly declare them.
permissions: {}
permissions:
# Required for OIDC authentication with AWS
id-token: write # zizmor: ignore[excessive-permissions]
env:
EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
@@ -150,16 +152,30 @@ jobs:
if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'
runs-on: ubuntu-slim
timeout-minutes: 10
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
@@ -168,6 +184,7 @@ jobs:
needs: determine-builds
if: needs.determine-builds.outputs.build-desktop == 'true'
permissions:
id-token: write
contents: write
actions: read
strategy:
@@ -185,12 +202,33 @@ jobs:
runs-on: ${{ matrix.platform }}
timeout-minutes: 90
environment: release
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
persist-credentials: true # zizmor: ignore[artipacked]
- name: Configure AWS credentials
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
if: startsWith(matrix.platform, 'macos-')
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
APPLE_ID, deploy/apple-id
APPLE_PASSWORD, deploy/apple-password
APPLE_CERTIFICATE, deploy/apple-certificate
APPLE_CERTIFICATE_PASSWORD, deploy/apple-certificate-password
KEYCHAIN_PASSWORD, deploy/keychain-password
APPLE_TEAM_ID, deploy/apple-team-id
parse-json-secrets: true
- name: install dependencies (ubuntu only)
if: startsWith(matrix.platform, 'ubuntu-')
run: |
@@ -285,15 +323,40 @@ jobs:
Write-Host "Versions set to: $VERSION"
- name: Import Apple Developer Certificate
if: startsWith(matrix.platform, 'macos-')
run: |
echo $APPLE_CERTIFICATE | base64 --decode > certificate.p12
security create-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
security default-keychain -s build.keychain
security unlock-keychain -p "$KEYCHAIN_PASSWORD" build.keychain
security set-keychain-settings -t 3600 -u build.keychain
security import certificate.p12 -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign
security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$KEYCHAIN_PASSWORD" build.keychain
security find-identity -v -p codesigning build.keychain
- name: Verify Certificate
if: startsWith(matrix.platform, 'macos-')
run: |
CERT_INFO=$(security find-identity -v -p codesigning build.keychain | grep -E "(Developer ID Application|Apple Distribution|Apple Development)" | head -n 1)
CERT_ID=$(echo "$CERT_INFO" | awk -F'"' '{print $2}')
echo "CERT_ID=$CERT_ID" >> $GITHUB_ENV
echo "Certificate imported."
- uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
APPLE_ID: ${{ env.APPLE_ID }}
APPLE_PASSWORD: ${{ env.APPLE_PASSWORD }}
APPLE_SIGNING_IDENTITY: ${{ env.CERT_ID }}
APPLE_TEAM_ID: ${{ env.APPLE_TEAM_ID }}
with:
tagName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseName: ${{ needs.determine-builds.outputs.is-test-run != 'true' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseBody: "See the assets to download this version and install."
releaseDraft: true
prerelease: false
assetNamePattern: "[name]_[arch][ext]"
args: ${{ matrix.args }}
build-web-amd64:
@@ -305,6 +368,7 @@ jobs:
- run-id=${{ github.run_id }}-web-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -317,6 +381,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -326,13 +404,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -363,6 +441,7 @@ jobs:
- run-id=${{ github.run_id }}-web-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -375,6 +454,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -384,13 +477,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -423,19 +516,34 @@ jobs:
- run-id=${{ github.run_id }}-merge-web
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -471,6 +579,7 @@ jobs:
- run-id=${{ github.run_id }}-web-cloud-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -483,6 +592,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -492,13 +615,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -537,6 +660,7 @@ jobs:
- run-id=${{ github.run_id }}-web-cloud-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -549,6 +673,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -558,13 +696,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -605,19 +743,34 @@ jobs:
- run-id=${{ github.run_id }}-merge-web-cloud
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -650,6 +803,7 @@ jobs:
- run-id=${{ github.run_id }}-backend-amd64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -662,6 +816,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -671,13 +839,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -707,6 +875,7 @@ jobs:
- run-id=${{ github.run_id }}-backend-arm64
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -719,6 +888,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -728,13 +911,13 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -766,19 +949,34 @@ jobs:
- run-id=${{ github.run_id }}-merge-backend
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -815,6 +1013,7 @@ jobs:
- volume=40gb
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -827,6 +1026,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -836,15 +1049,15 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push AMD64
id: build
@@ -879,6 +1092,7 @@ jobs:
- volume=40gb
- extras=ecr-cache
timeout-minutes: 90
environment: release
outputs:
digest: ${{ steps.build.outputs.digest }}
env:
@@ -891,6 +1105,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
@@ -900,15 +1128,15 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Build and push ARM64
id: build
@@ -944,19 +1172,34 @@ jobs:
- run-id=${{ github.run_id }}-merge-model-server
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
username: ${{ env.DOCKER_USERNAME }}
password: ${{ env.DOCKER_TOKEN }}
- name: Docker meta
id: meta
@@ -994,11 +1237,26 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-web
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1014,8 +1272,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1034,11 +1292,26 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-web-cloud
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1054,8 +1327,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1074,6 +1347,7 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-backend
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
steps:
@@ -1084,6 +1358,20 @@ jobs:
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1100,8 +1388,8 @@ jobs:
-v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1121,11 +1409,26 @@ jobs:
- run-id=${{ github.run_id }}-trivy-scan-model-server
- extras=ecr-cache
timeout-minutes: 90
environment: release
env:
REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
DOCKER_USERNAME, deploy/docker-username
DOCKER_TOKEN, deploy/docker-token
parse-json-secrets: true
- name: Run Trivy vulnerability scanner
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
with:
@@ -1141,8 +1444,8 @@ jobs:
docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
-e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
-e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
-e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
-e TRIVY_USERNAME="${{ env.DOCKER_USERNAME }}" \
-e TRIVY_PASSWORD="${{ env.DOCKER_TOKEN }}" \
aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
image \
--skip-version-check \
@@ -1170,12 +1473,26 @@ jobs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 90
environment: release
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708
with:
role-to-assume: ${{ secrets.AWS_OIDC_ROLE_ARN }}
aws-region: us-east-2
- name: Get AWS Secrets
uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802
with:
secret-ids: |
MONITOR_DEPLOYMENTS_WEBHOOK, deploy/monitor-deployments-webhook
parse-json-secrets: true
- name: Determine failed jobs
id: failed-jobs
shell: bash
@@ -1241,7 +1558,7 @@ jobs:
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
webhook-url: ${{ env.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: ${{ steps.failed-jobs.outputs.jobs }}
title: "🚨 Deployment Workflow Failed"
ref-name: ${{ github.ref_name }}

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -29,6 +29,7 @@ jobs:
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/

View File

@@ -94,7 +94,7 @@ jobs:
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -45,6 +45,9 @@ env:
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
# OpenSearch
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -125,11 +128,13 @@ jobs:
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
-f docker-compose.opensearch.yml \
up -d \
minio \
relational_db \
cache \
index \
opensearch \
code-interpreter
- name: Run migrations
@@ -158,7 +163,7 @@ jobs:
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
# Collect logs from each container
for container in $containers; do

View File

@@ -88,6 +88,7 @@ jobs:
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
@@ -180,6 +181,11 @@ jobs:
trap cleanup EXIT
# Run the actual installation with detailed logging
# Note that opensearch.enabled is true whereas others in this install
# are false. There is some work that needs to be done to get this
# entire step working in CI, enabling opensearch here is a small step
# in that direction. If this is causing issues, disabling it in this
# step should be ok in the short term.
echo "=== Starting ct install ==="
set +e
ct install --all \
@@ -187,6 +193,8 @@ jobs:
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=opensearch.enabled=true \
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \

View File

@@ -103,7 +103,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit

View File

@@ -95,7 +95,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit

View File

@@ -85,7 +85,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/

View File

@@ -70,7 +70,7 @@ jobs:
password: ${{ secrets.DOCKER_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
- name: Build and load
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6

3
.gitignore vendored
View File

@@ -1,5 +1,8 @@
# editors
.vscode
!/.vscode/env_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed
.cursor

View File

@@ -74,6 +74,13 @@ repos:
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-added-large-files
name: Check for added large files
args: ["--maxkb=1500"]
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
hooks:

View File

@@ -1,5 +1,3 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
@@ -24,7 +22,7 @@
"Slack Bot",
"Celery primary",
"Celery light",
"Celery background",
"Celery heavy",
"Celery docfetching",
"Celery docprocessing",
"Celery beat"
@@ -579,6 +577,99 @@
"group": "3"
}
},
{
// Dummy entry used to label the group
"name": "--- Database ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "4",
"order": 0
}
},
{
"name": "Clean restore seeded database dump (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--clean",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Create database snapshot",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"dump",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore database snapshot (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--clean",
"--yes",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Upgrade database to head revision",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"upgrade"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -37,10 +37,6 @@ CVE-2023-50868
CVE-2023-52425
CVE-2024-28757
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
# No impact in our settings
CVE-2023-7104
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193

View File

@@ -89,12 +89,6 @@ RUN uv pip install --system --no-cache-dir --upgrade \
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
RUN python -c "import tiktoken; \
tiktoken.get_encoding('cl100k_base')"

View File

@@ -0,0 +1,42 @@
"""add_unique_constraint_to_inputprompt_prompt_user_id
Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique constraint on (prompt, user_id) for user-owned prompts
# This ensures each user can only have one shortcut with a given name
op.create_unique_constraint(
"uq_inputprompt_prompt_user_id",
"inputprompt",
["prompt", "user_id"],
)
# Create partial unique index for public prompts (where user_id IS NULL)
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
# so we need a partial index to ensure public prompt names are also unique
op.execute(
"""
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
ON inputprompt (prompt)
WHERE user_id IS NULL
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")

View File

@@ -0,0 +1,29 @@
"""remove default prompt shortcuts
Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Delete any user associations for the default prompts first (foreign key constraint)
op.execute(
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
)
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
op.execute("DELETE FROM inputprompt WHERE id < 0")
def downgrade() -> None:
# We don't restore the default prompts on downgrade
pass

View File

@@ -0,0 +1,31 @@
"""add chat_background to user
Revision ID: fb80bdd256de
Revises: 8b5ce697290e
Create Date: 2026-01-16 16:15:59.222617
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "fb80bdd256de"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"chat_background",
sa.String(),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("user", "chat_background")

View File

@@ -17,7 +17,8 @@ from onyx.context.search.models import InferenceChunk
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.db.models import User
from onyx.document_index.factory import get_current_primary_default_document_index
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.factory import get_default_llm
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
@@ -42,11 +43,13 @@ def _run_single_search(
document_index: DocumentIndex,
user: User | None,
db_session: Session,
num_hits: int | None = None,
) -> list[InferenceChunk]:
"""Execute a single search query and return chunks."""
chunk_search_request = ChunkSearchRequest(
query=query,
user_selected_filters=filters,
limit=num_hits,
)
return search_pipeline(
@@ -72,7 +75,9 @@ def stream_search_query(
Used by both streaming and non-streaming endpoints.
"""
# Get document index
document_index = get_current_primary_default_document_index(db_session)
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
# Determine queries to execute
original_query = request.search_query
@@ -114,6 +119,7 @@ def stream_search_query(
document_index=document_index,
user=user,
db_session=db_session,
num_hits=request.num_hits,
)
else:
# Multiple queries - run in parallel and merge with RRF
@@ -121,7 +127,14 @@ def stream_search_query(
search_functions = [
(
_run_single_search,
(query, request.filters, document_index, user, db_session),
(
query,
request.filters,
document_index,
user,
db_session,
request.num_hits,
),
)
for query in all_executed_queries
]
@@ -168,6 +181,9 @@ def stream_search_query(
# Merge chunks into sections
sections = merge_individual_chunks(chunks)
# Truncate to the requested number of hits
sections = sections[: request.num_hits]
# Apply LLM document selection if requested
# num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
# The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it

View File

@@ -10,6 +10,8 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
("/enterprise-settings/logo", {"GET"}),
("/enterprise-settings/logotype", {"GET"}),
("/enterprise-settings/custom-analytics-script", {"GET"}),
# Stripe publishable key is safe to expose publicly
("/tenants/stripe-publishable-key", {"GET"}),
]

View File

@@ -32,6 +32,7 @@ class SendSearchQueryRequest(BaseModel):
filters: BaseFilters | None = None
num_docs_fed_to_llm_selection: int | None = None
run_query_expansion: bool = False
num_hits: int = 50
include_content: bool = False
stream: bool = False

View File

@@ -1,3 +1,6 @@
import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
@@ -12,11 +15,14 @@ from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
from ee.onyx.server.tenants.models import ProductGatingRequest
from ee.onyx.server.tenants.models import ProductGatingResponse
from ee.onyx.server.tenants.models import StripePublishableKeyResponse
from ee.onyx.server.tenants.models import SubscriptionSessionResponse
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
from ee.onyx.server.tenants.product_gating import store_product_gating
from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
@@ -26,6 +32,10 @@ logger = setup_logger()
router = APIRouter(prefix="/tenants")
# Cache for Stripe publishable key to avoid hitting S3 on every request
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()
@router.post("/product-gating")
def gate_product(
@@ -113,3 +123,67 @@ async def create_subscription_session(
except Exception as e:
logger.exception("Failed to create subscription session")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stripe-publishable-key")
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
"""
Fetch the Stripe publishable key.
Priority: env var override (for testing) > S3 bucket (production).
This endpoint is public (no auth required) since publishable keys are safe to expose.
The key is cached in memory to avoid hitting S3 on every request.
"""
global _stripe_publishable_key_cache
# Fast path: return cached value without lock
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Use lock to prevent concurrent S3 requests
async with _stripe_key_lock:
# Double-check after acquiring lock (another request may have populated cache)
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Check for env var override first (for local testing with pk_test_* keys)
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
)
try:
async with httpx.AsyncClient() as client:
response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
response.raise_for_status()
key = response.text.strip()
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
)

View File

@@ -105,3 +105,7 @@ class PendingUserSnapshot(BaseModel):
class ApproveUserRequest(BaseModel):
email: str
class StripePublishableKeyResponse(BaseModel):
publishable_key: str

View File

@@ -11,6 +11,7 @@ from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Protocol
from typing import Tuple
@@ -1456,6 +1457,9 @@ def get_default_admin_user_emails_() -> list[str]:
STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
STATE_TOKEN_LIFETIME_SECONDS = 3600
CSRF_TOKEN_KEY = "csrftoken"
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
class OAuth2AuthorizeResponse(BaseModel):
@@ -1463,13 +1467,19 @@ class OAuth2AuthorizeResponse(BaseModel):
def generate_state_token(
data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
data: Dict[str, str],
secret: SecretType,
lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
) -> str:
data["aud"] = STATE_TOKEN_AUDIENCE
return generate_jwt(data, secret, lifetime_seconds)
def generate_csrf_token() -> str:
return secrets.token_urlsafe(32)
# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
def create_onyx_oauth_router(
oauth_client: BaseOAuth2,
@@ -1498,6 +1508,13 @@ def get_oauth_router(
redirect_url: Optional[str] = None,
associate_by_email: bool = False,
is_verified_by_default: bool = False,
*,
csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
csrf_token_cookie_path: str = "/",
csrf_token_cookie_domain: Optional[str] = None,
csrf_token_cookie_secure: Optional[bool] = None,
csrf_token_cookie_httponly: bool = True,
csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
) -> APIRouter:
"""Generate a router with the OAuth routes."""
router = APIRouter()
@@ -1514,6 +1531,9 @@ def get_oauth_router(
route_name=callback_route_name,
)
if csrf_token_cookie_secure is None:
csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
@router.get(
"/authorize",
name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1521,8 +1541,10 @@ def get_oauth_router(
)
async def authorize(
request: Request,
response: Response,
redirect: bool = Query(False),
scopes: List[str] = Query(None),
) -> OAuth2AuthorizeResponse:
) -> Response | OAuth2AuthorizeResponse:
referral_source = request.cookies.get("referral_source", None)
if redirect_url is not None:
@@ -1532,9 +1554,11 @@ def get_oauth_router(
next_url = request.query_params.get("next", "/")
csrf_token = generate_csrf_token()
state_data: Dict[str, str] = {
"next_url": next_url,
"referral_source": referral_source or "default_referral",
CSRF_TOKEN_KEY: csrf_token,
}
state = generate_state_token(state_data, state_secret)
@@ -1551,6 +1575,31 @@ def get_oauth_router(
authorization_url, {"access_type": "offline", "prompt": "consent"}
)
if redirect:
redirect_response = RedirectResponse(authorization_url, status_code=302)
redirect_response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return redirect_response
response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return OAuth2AuthorizeResponse(authorization_url=authorization_url)
@log_function_time(print_only=True)
@@ -1600,7 +1649,33 @@ def get_oauth_router(
try:
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
except jwt.DecodeError:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
),
)
except jwt.ExpiredSignatureError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode,
"ACCESS_TOKEN_ALREADY_EXPIRED",
"ACCESS_TOKEN_ALREADY_EXPIRED",
),
)
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
if (
not cookie_csrf_token
or not state_csrf_token
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
)
next_url = state_data.get("next_url", "/")
referral_source = state_data.get("referral_source", None)

View File

@@ -26,10 +26,13 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.background.celery.celery_utils import make_probe_path
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.document_index.opensearch.client import (
wait_for_opensearch_with_timeout,
)
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_connector import RedisConnector
@@ -516,15 +519,17 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
"""Waits for Vespa to become ready subject to a timeout.
Raises WorkerShutdown if the timeout is reached."""
if ENABLE_OPENSEARCH_FOR_ONYX:
# TODO(andrei): Do some similar liveness checking for OpenSearch.
return
if not wait_for_vespa_with_timeout():
msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
if not wait_for_opensearch_with_timeout():
msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
# File for validating worker liveness
class LivenessProbe(bootsteps.StartStopStep):

View File

@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.httpx.httpx_pool import HttpxPool
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
callback=callback,
)
document_index = get_default_document_index(
document_indices = get_all_document_indices(
index_attempt.search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_index=document_index,
document_indices=document_indices,
ignore_time_skip=True, # Documents are already filtered during extraction
db_session=db_session,
tenant_id=tenant_id,

View File

@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.relationships import delete_document_references_from_kg
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
@@ -97,13 +97,17 @@ def document_by_cc_pair_cleanup_task(
action = "skip"
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
# This flow is for updates and deletion so we get all indices.
document_indices = get_all_document_indices(
active_search_settings.primary,
active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(doc_index)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
count = get_document_connector_count(db_session, document_id)
if count == 1:
@@ -113,11 +117,12 @@ def document_by_cc_pair_cleanup_task(
chunk_count = fetch_chunk_count_for_document(document_id, db_session)
_ = retry_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
for retry_document_index in retry_document_indices:
_ = retry_document_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
delete_document_references_from_kg(
db_session=db_session,
@@ -155,14 +160,18 @@ def document_by_cc_pair_cleanup_task(
hidden=doc.hidden,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# there are still other cc_pair references to the doc, so just resync to Vespa
delete_document_by_connector_credential_pair__no_commit(

View File

@@ -32,7 +32,7 @@ from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_active_search_settings_list
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.file_store.file_store import get_default_file_store
@@ -244,7 +244,8 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
search_settings=current_search_settings,
)
document_index = get_default_document_index(
# This flow is for indexing so we get all indices.
document_indices = get_all_document_indices(
current_search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -258,7 +259,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_index=document_index,
document_indices=document_indices,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -412,12 +413,16 @@ def process_single_user_file_delete(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
document_index = get_default_document_index(
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(document_index)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
index_name = active_search_settings.primary.index_name
selection = f"{index_name}.document_id=='{user_file_id}'"
@@ -438,11 +443,12 @@ def process_single_user_file_delete(
else:
chunk_count = user_file.chunk_count
retry_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
for retry_document_index in retry_document_indices:
retry_document_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
file_store = get_default_file_store()
@@ -564,12 +570,16 @@ def process_single_user_file_project_sync(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(doc_index)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if not user_file:
@@ -579,13 +589,14 @@ def process_single_user_file_project_sync(
return None
project_ids = [project.id for project in user_file.projects]
retry_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
for retry_document_index in retry_document_indices:
retry_document_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
task_logger.info(
f"process_single_user_file_project_sync - User file id={user_file_id}"

View File

@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_document_set import RedisDocumentSet
@@ -70,6 +70,8 @@ logger = setup_logger()
# celery auto associates tasks created inside another task,
# which bloats the result metadata considerably. trail=False prevents this.
# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
# generic *document_index*.
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
ignore_result=True,
@@ -465,13 +467,17 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
try:
with get_session_with_current_tenant() as db_session:
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_index = RetryDocumentIndex(doc_index)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
doc = get_document(document_id, db_session)
if not doc:
@@ -500,14 +506,18 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
# aggregated_boost_factor=doc.aggregated_boost_factor,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update db last. Worst case = we crash right before this and
# the sync might repeat again later

View File

@@ -7,6 +7,7 @@ from typing import Any
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.emitter import Emitter
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
@@ -15,6 +16,11 @@ from onyx.tools.models import ToolCallInfo
from onyx.utils.threadpool_concurrency import run_in_background
from onyx.utils.threadpool_concurrency import wait_on_background
# Type alias for search doc deduplication key
# Simple key: just document_id (str)
# Full key: (document_id, chunk_ind, match_highlights)
SearchDocKey = str | tuple[str, int, tuple[str, ...]]
class ChatStateContainer:
"""Container for accumulating state during LLM loop execution.
@@ -40,6 +46,10 @@ class ChatStateContainer:
# True if this turn is a clarification question (deep research flow)
self.is_clarification: bool = False
# Note: LLM cost tracking is now handled in multi_llm.py
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
# Track which citation numbers were actually emitted during streaming
self._emitted_citations: set[int] = set()
def add_tool_call(self, tool_call: ToolCallInfo) -> None:
"""Add a tool call to the accumulated state."""
@@ -91,6 +101,54 @@ class ChatStateContainer:
with self._lock:
return self.is_clarification
@staticmethod
def create_search_doc_key(
search_doc: SearchDoc, use_simple_key: bool = True
) -> SearchDocKey:
"""Create a unique key for a SearchDoc for deduplication.
Args:
search_doc: The SearchDoc to create a key for
use_simple_key: If True (default), use only document_id for deduplication.
If False, include chunk_ind and match_highlights so that the same
document/chunk with different highlights are stored separately.
"""
if use_simple_key:
return search_doc.document_id
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def add_search_docs(
self, search_docs: list[SearchDoc], use_simple_key: bool = True
) -> None:
"""Add search docs to the accumulated collection with deduplication.
Args:
search_docs: List of SearchDoc objects to add
use_simple_key: If True (default), deduplicate by document_id only.
If False, deduplicate by document_id + chunk_ind + match_highlights.
"""
with self._lock:
for doc in search_docs:
key = self.create_search_doc_key(doc, use_simple_key)
if key not in self._all_search_docs:
self._all_search_docs[key] = doc
def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
"""Thread-safe getter for all accumulated search docs (returns a copy)."""
with self._lock:
return self._all_search_docs.copy()
def add_emitted_citation(self, citation_num: int) -> None:
"""Add a citation number that was actually emitted during streaming."""
with self._lock:
self._emitted_citations.add(citation_num)
def get_emitted_citations(self) -> set[int]:
"""Thread-safe getter for emitted citations (returns a copy)."""
with self._lock:
return self._emitted_citations.copy()
def run_chat_loop_with_state_containers(
func: Callable[..., None],

View File

@@ -53,6 +53,50 @@ def update_citation_processor_from_tool_response(
citation_processor.update_citation_mapping(citation_to_doc)
def extract_citation_order_from_text(text: str) -> list[int]:
"""Extract citation numbers from text in order of first appearance.
Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
the citation numbers in the order they first appear in the text.
Args:
text: The text containing citations
Returns:
List of citation numbers in order of first appearance (no duplicates)
"""
# Same pattern used in collapse_citations and DynamicCitationProcessor
# Group 2 captures the number in double bracket format: [[1]], 【【1】】
# Group 4 captures the numbers in single bracket format: [1], [1, 2]
citation_pattern = re.compile(
r"([\[【[]{2}(\d+)[\]】]]{2})|([\[【[]([\d]+(?: *, *\d+)*)[\]】]])"
)
seen: set[int] = set()
order: list[int] = []
for match in citation_pattern.finditer(text):
# Group 2 is for double bracket single number, group 4 is for single bracket
if match.group(2):
nums_str = match.group(2)
elif match.group(4):
nums_str = match.group(4)
else:
continue
for num_str in nums_str.split(","):
num_str = num_str.strip()
if num_str:
try:
num = int(num_str)
if num not in seen:
seen.add(num)
order.append(num)
except ValueError:
continue
return order
def collapse_citations(
answer_text: str,
existing_citation_mapping: CitationMapping,

View File

@@ -45,6 +45,7 @@ from onyx.tools.tool_implementations.images.models import (
FinalImageGenerationResponse,
)
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tracing.framework.create import trace
@@ -453,12 +454,16 @@ def run_llm_loop(
# The section below calculates the available tokens for history a bit more accurately
# now that project files are loaded in.
if persona and persona.replace_base_system_prompt and persona.system_prompt:
if persona and persona.replace_base_system_prompt:
# Handles the case where user has checked off the "Replace base system prompt" checkbox
system_prompt = ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
system_prompt = (
ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
)
if persona.system_prompt
else None
)
custom_agent_prompt_msg = None
else:
@@ -612,6 +617,7 @@ def run_llm_loop(
next_citation_num=citation_processor.get_next_citation_number(),
max_concurrent_tools=None,
skip_search_query_expansion=has_called_search_tool,
url_snippet_map=extract_url_snippet_map(gathered_documents or []),
)
tool_responses = parallel_tool_call_results.tool_responses
citation_mapping = parallel_tool_call_results.updated_citation_mapping
@@ -650,8 +656,15 @@ def run_llm_loop(
# Extract search_docs if this is a search tool response
search_docs = None
displayed_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
search_docs = tool_response.rich_response.search_docs
displayed_docs = tool_response.rich_response.displayed_docs
# Add ALL search docs to state container for DB persistence
if search_docs:
state_container.add_search_docs(search_docs)
if gathered_documents:
gathered_documents.extend(search_docs)
else:
@@ -685,7 +698,7 @@ def run_llm_loop(
reasoning_tokens=llm_step_result.reasoning, # All tool calls from this loop share the same reasoning
tool_call_arguments=tool_call.tool_args,
tool_call_response=saved_response,
search_docs=search_docs,
search_docs=displayed_docs or search_docs,
generated_images=generated_images,
)
# Add to state container for partial save support

View File

@@ -14,6 +14,7 @@ from onyx.chat.emitter import Emitter
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import ChatFileType
@@ -432,7 +433,7 @@ def translate_history_to_llm_format(
for idx, msg in enumerate(history):
# if the message is being added to the history
if msg.message_type in [
if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
MessageType.SYSTEM,
MessageType.USER,
MessageType.ASSISTANT,
@@ -859,6 +860,11 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(
result.citation_number
)
else:
# When citation_processor is None, use delta.content directly without modification
accumulated_answer += delta.content
@@ -985,6 +991,9 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(result.citation_number)
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)

View File

@@ -42,7 +42,6 @@ from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_session_by_id
@@ -744,27 +743,16 @@ def llm_loop_completion_handle(
else:
final_answer = "The generation was stopped by the user."
# Build citation_docs_info from accumulated citations in state container
citation_docs_info: list[CitationDocInfo] = []
seen_citation_nums: set[int] = set()
for citation_num, search_doc in state_container.citation_to_doc.items():
if citation_num not in seen_citation_nums:
seen_citation_nums.add(citation_num)
citation_docs_info.append(
CitationDocInfo(
search_doc=search_doc,
citation_number=citation_num,
)
)
save_chat_turn(
message_text=final_answer,
reasoning_tokens=state_container.reasoning_tokens,
citation_docs_info=citation_docs_info,
citation_to_doc=state_container.citation_to_doc,
tool_calls=state_container.tool_calls,
all_search_docs=state_container.get_all_search_docs(),
db_session=db_session,
assistant_message=assistant_message,
is_clarification=state_container.is_clarification,
emitted_citations=state_container.get_emitted_citations(),
)

View File

@@ -2,8 +2,9 @@ import json
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import SearchDocKey
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import add_search_docs_to_chat_message
from onyx.db.chat import add_search_docs_to_tool_call
@@ -19,22 +20,6 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
"""
Create a unique key for a SearchDoc that accounts for different versions of the same
document/chunk with different match_highlights.
Args:
search_doc: The SearchDoc pydantic model to create a key for
Returns:
A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
this specific version of the document
"""
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def _create_and_link_tool_calls(
tool_calls: list[ToolCallInfo],
assistant_message: ChatMessage,
@@ -154,38 +139,36 @@ def save_chat_turn(
message_text: str,
reasoning_tokens: str | None,
tool_calls: list[ToolCallInfo],
citation_docs_info: list[CitationDocInfo],
citation_to_doc: dict[int, SearchDoc],
all_search_docs: dict[SearchDocKey, SearchDoc],
db_session: Session,
assistant_message: ChatMessage,
is_clarification: bool = False,
emitted_citations: set[int] | None = None,
) -> None:
"""
Save a chat turn by populating the assistant_message and creating related entities.
This function:
1. Updates the ChatMessage with text, reasoning tokens, and token count
2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
4. Builds citation mapping from citation_docs_info
5. Links all unique SearchDocs from tool calls to the ChatMessage
2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
3. Builds tool_call -> search_doc mapping for displayed docs
4. Builds citation mapping from citation_to_doc
5. Links all unique SearchDocs to the ChatMessage
6. Creates ToolCall entries and links SearchDocs to them
7. Builds the citations mapping for the ChatMessage
Deduplication Logic:
- SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
- This ensures that the same document/chunk with different match_highlights (from different
queries) are stored as separate SearchDoc entries
- Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
matches its specific query highlights
Args:
message_text: The message content to save
reasoning_tokens: Optional reasoning tokens for the message
tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
citation_docs_info: List of citation document information for building citations mapping
citation_to_doc: Mapping from citation number to SearchDoc for building citations
all_search_docs: Pre-deduplicated search docs from ChatStateContainer
db_session: Database session for persistence
assistant_message: The ChatMessage object to populate (should already exist in DB)
is_clarification: Whether this assistant message is a clarification question (deep research flow)
emitted_citations: Set of citation numbers that were actually emitted during streaming.
If provided, only citations in this set will be saved; others are filtered out.
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
@@ -200,53 +183,53 @@ def save_chat_turn(
else:
assistant_message.token_count = 0
# 2. Create SearchDoc entries from tool_calls
# Build mapping from SearchDoc to DB SearchDoc ID
# Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
# while ensuring different versions with different highlights are stored separately
search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
tool_call_to_search_doc_ids: dict[str, list[int]] = {}
# 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
search_doc_key_to_id: dict[SearchDocKey, int] = {}
for key, search_doc_py in all_search_docs.items():
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[key] = db_search_doc.id
# Process tool calls and their search docs
# 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
tool_call_to_search_doc_ids: dict[str, list[int]] = {}
for tool_call_info in tool_calls:
if tool_call_info.search_docs:
search_doc_ids_for_tool: list[int] = []
for search_doc_py in tool_call_info.search_docs:
# Create a unique key for this SearchDoc version
search_doc_key = _create_search_doc_key(search_doc_py)
# Check if we've already created this exact SearchDoc version
if search_doc_key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
key = ChatStateContainer.create_search_doc_key(search_doc_py)
if key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[key])
else:
# Create new DB SearchDoc entry
# Displayed doc not in all_search_docs - create it
# This can happen if displayed_docs contains docs not in search_docs
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[search_doc_key] = db_search_doc.id
search_doc_key_to_id[key] = db_search_doc.id
search_doc_ids_for_tool.append(db_search_doc.id)
tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
set(search_doc_ids_for_tool)
)
# 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
# Use a set to deduplicate by ID (since we've already deduplicated by key above)
all_search_doc_ids_set: set[int] = set()
for search_doc_ids in tool_call_to_search_doc_ids.values():
all_search_doc_ids_set.update(search_doc_ids)
# Collect all search doc IDs for ChatMessage linking
all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())
# 4. Build citation mapping from citation_docs_info
# 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
# Only include citations that were actually emitted during streaming
citation_number_to_search_doc_id: dict[int, int] = {}
for citation_doc_info in citation_docs_info:
# Extract SearchDoc pydantic model
search_doc_py = citation_doc_info.search_doc
for citation_num, search_doc_py in citation_to_doc.items():
# Skip citations that weren't actually emitted (if emitted_citations is provided)
if emitted_citations is not None and citation_num not in emitted_citations:
continue
# Create the unique key for this SearchDoc version
search_doc_key = _create_search_doc_key(search_doc_py)
search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)
# Get the search doc ID (should already exist from processing tool_calls)
if search_doc_key in search_doc_key_to_id:
@@ -283,10 +266,7 @@ def save_chat_turn(
all_search_doc_ids_set.add(db_search_doc_id)
# Build mapping from citation number to search doc ID
if citation_doc_info.citation_number is not None:
citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
db_search_doc_id
)
citation_number_to_search_doc_id[citation_num] = db_search_doc_id
# 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
@@ -306,23 +286,10 @@ def save_chat_turn(
tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
)
# 7. Build citations mapping from citation_docs_info
# Any citation_doc_info with a citation_number appeared in the text and should be mapped
citations: dict[int, int] = {}
for citation_doc_info in citation_docs_info:
if citation_doc_info.citation_number is not None:
search_doc_id = citation_number_to_search_doc_id.get(
citation_doc_info.citation_number
)
if search_doc_id is not None:
citations[citation_doc_info.citation_number] = search_doc_id
else:
logger.warning(
f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
f"but no matching search doc ID in mapping"
)
assistant_message.citations = citations if citations else None
# 7. Build citations mapping - use the mapping we already built in step 4
assistant_message.citations = (
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
)
# Finally save the messages, tool calls, and docs
db_session.commit()

View File

@@ -208,8 +208,19 @@ OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 920
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")
ENABLE_OPENSEARCH_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
# This is the "base" config for now, the idea is that at least for our dev
# environments we always want to be dual indexing into both OpenSearch and Vespa
# to stress test the new codepaths. Only enable this if there is some instance
# of OpenSearch running for the relevant Onyx instance.
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
)
# Given that the "base" config above is true, this enables whether we want to
# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
# in the event we see issues with OpenSearch retrieval in our dev environments.
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -738,6 +749,10 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default
LOG_ONYX_MODEL_INTERACTIONS = (
os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
)
PROMPT_CACHE_CHAT_HISTORY = (
os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
)
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
LOG_VESPA_TIMING_INFORMATION = (
@@ -1016,3 +1031,14 @@ INSTANCE_TYPE = (
## Discord Bot Configuration
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
## Stripe Configuration
# URL to fetch the Stripe publishable key from a public S3 bucket.
# Publishable keys are safe to expose publicly - they can only initialize
# Stripe.js and tokenize payment info, not make charges or access data.
STRIPE_PUBLISHABLE_KEY_URL = (
"https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
)
# Override for local testing with Stripe test keys (pk_test_*)
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")

View File

@@ -1,6 +1,5 @@
import os
INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
NUM_RETURNED_HITS = 50

View File

@@ -15,6 +15,7 @@ from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
from onyx.onyxbot.slack.models import ChannelType
from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
@@ -113,7 +114,7 @@ def is_recency_query(query: str) -> bool:
if not has_recency_keyword:
return False
# Get combined stop words (NLTK + Slack-specific)
# Get combined stop words (English + Slack-specific)
all_stop_words = _get_combined_stop_words()
# Extract content words (excluding stop words)
@@ -488,7 +489,7 @@ def build_channel_override_query(channel_references: set[str], time_filter: str)
return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"
# Slack-specific stop words (in addition to standard NLTK stop words)
# Slack-specific stop words (in addition to standard English stop words)
# These include Slack-specific terms and temporal/recency keywords
SLACK_SPECIFIC_STOP_WORDS = frozenset(
RECENCY_KEYWORDS
@@ -508,27 +509,16 @@ SLACK_SPECIFIC_STOP_WORDS = frozenset(
)
def _get_combined_stop_words() -> set[str]:
"""Get combined NLTK + Slack-specific stop words.
def _get_combined_stop_words() -> frozenset[str]:
"""Get combined English + Slack-specific stop words.
Returns a set of stop words for filtering content words.
Falls back to just Slack-specific stop words if NLTK is unavailable.
Returns a frozenset of stop words for filtering content words.
Note: Currently only supports English stop words. Non-English queries
may have suboptimal content word extraction. Future enhancement could
detect query language and load appropriate stop words.
"""
try:
from nltk.corpus import stopwords # type: ignore
# TODO: Support multiple languages - currently hardcoded to English
# Could detect language or allow configuration
nltk_stop_words = set(stopwords.words("english"))
except Exception:
# Fallback if NLTK not available
nltk_stop_words = set()
return nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS
return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS
def extract_content_words_from_recency_query(
@@ -536,7 +526,7 @@ def extract_content_words_from_recency_query(
) -> list[str]:
"""Extract meaningful content words from a recency query.
Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.
Filters out English stop words, Slack-specific terms, channel references, and proper nouns.
Args:
query_text: The user's query text
@@ -545,7 +535,7 @@ def extract_content_words_from_recency_query(
Returns:
List of content words (up to MAX_CONTENT_WORDS)
"""
# Get combined stop words (NLTK + Slack-specific)
# Get combined stop words (English + Slack-specific)
all_stop_words = _get_combined_stop_words()
words = query_text.split()
@@ -567,6 +557,23 @@ def extract_content_words_from_recency_query(
return content_words_filtered[:MAX_CONTENT_WORDS]
def _is_valid_keyword_query(line: str) -> bool:
"""Check if a line looks like a valid keyword query vs explanatory text.
Returns False for lines that appear to be LLM explanations rather than keywords.
"""
# Reject lines that start with parentheses (explanatory notes)
if line.startswith("("):
return False
# Reject lines that are too long (likely sentences, not keywords)
# Keywords should be short - reject if > 50 chars or > 6 words
if len(line) > 50 or len(line.split()) > 6:
return False
return True
def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
"""Use LLM to expand query into multiple search variations.
@@ -589,10 +596,18 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
response_clean = _parse_llm_code_block_response(response)
# Split into lines and filter out empty lines
rephrased_queries = [
raw_queries = [
line.strip() for line in response_clean.split("\n") if line.strip()
]
# Filter out lines that look like explanatory text rather than keywords
rephrased_queries = [q for q in raw_queries if _is_valid_keyword_query(q)]
# Log if we filtered out garbage
if len(raw_queries) != len(rephrased_queries):
filtered_out = set(raw_queries) - set(rephrased_queries)
logger.warning(f"Filtered out non-keyword LLM responses: {filtered_out}")
# If no queries generated, use empty query
if not rephrased_queries:
logger.debug("No content keywords extracted from query expansion")

View File

@@ -144,10 +144,6 @@ class BasicChunkRequest(BaseModel):
# In case some queries favor recency more than other queries.
recency_bias_multiplier: float = 1.0
# Sometimes we may want to extract specific keywords from a more semantic query for
# a better keyword search.
query_keywords: list[str] | None = None # Not used currently
limit: int | None = None
offset: int | None = None # This one is not set currently
@@ -166,6 +162,8 @@ class ChunkIndexRequest(BasicChunkRequest):
# Calculated final filters
filters: IndexFilters
query_keywords: list[str] | None = None
class ContextExpansionType(str, Enum):
NOT_RELEVANT = "not_relevant"
@@ -372,6 +370,10 @@ class SearchDocsResponse(BaseModel):
# document id is the most staightforward way.
citation_mapping: dict[int, str]
# For cases where the frontend only needs to display a subset of the search docs
# The whole list is typically still needed for later steps but this set should be saved separately
displayed_docs: list[SearchDoc] | None = None
class SavedSearchDoc(SearchDoc):
db_doc_id: int
@@ -430,11 +432,6 @@ class SavedSearchDoc(SearchDoc):
return self_score < other_score
class CitationDocInfo(BaseModel):
search_doc: SearchDoc
citation_number: int | None
class SavedSearchDocWithContent(SavedSearchDoc):
"""Used for endpoints that need to return the actual contents of the retrieved
section in addition to the match_highlights."""

View File

@@ -19,6 +19,7 @@ from onyx.db.models import Persona
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.interfaces import LLM
from onyx.natural_language_processing.english_stopwords import strip_stopwords
from onyx.secondary_llm_flows.source_filter import extract_source_filter
from onyx.secondary_llm_flows.time_filter import extract_time_filter
from onyx.utils.logger import setup_logger
@@ -278,12 +279,16 @@ def search_pipeline(
bypass_acl=chunk_search_request.bypass_acl,
)
query_keywords = strip_stopwords(chunk_search_request.query)
query_request = ChunkIndexRequest(
query=chunk_search_request.query,
hybrid_alpha=chunk_search_request.hybrid_alpha,
recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
query_keywords=chunk_search_request.query_keywords,
query_keywords=query_keywords,
filters=filters,
limit=chunk_search_request.limit,
offset=chunk_search_request.offset,
)
retrieved_chunks = search_chunks(

View File

@@ -23,45 +23,6 @@ from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
logger = setup_logger()
def _dedupe_chunks(
chunks: list[InferenceChunk],
) -> list[InferenceChunk]:
used_chunks: dict[tuple[str, int], InferenceChunk] = {}
for chunk in chunks:
key = (chunk.document_id, chunk.chunk_id)
if key not in used_chunks:
used_chunks[key] = chunk
else:
stored_chunk_score = used_chunks[key].score or 0
this_chunk_score = chunk.score or 0
if stored_chunk_score < this_chunk_score:
used_chunks[key] = chunk
return list(used_chunks.values())
def download_nltk_data() -> None:
import nltk # type: ignore[import-untyped]
resources = {
"stopwords": "corpora/stopwords",
# "wordnet": "corpora/wordnet", # Not in use
"punkt_tab": "tokenizers/punkt_tab",
}
for resource_name, resource_path in resources.items():
try:
nltk.data.find(resource_path)
logger.info(f"{resource_name} is already downloaded.")
except LookupError:
try:
logger.info(f"Downloading {resource_name}...")
nltk.download(resource_name, quiet=True)
logger.info(f"{resource_name} downloaded successfully.")
except Exception as e:
logger.error(f"Failed to download {resource_name}. Error: {e}")
def combine_retrieval_results(
chunk_sets: list[list[InferenceChunk]],
) -> list[InferenceChunk]:

View File

@@ -3,6 +3,8 @@ from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
@@ -18,45 +20,6 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def insert_input_prompt_if_not_exists(
user: User | None,
input_prompt_id: int | None,
prompt: str,
content: str,
active: bool,
is_public: bool,
db_session: Session,
commit: bool = True,
) -> InputPrompt:
if input_prompt_id is not None:
input_prompt = (
db_session.query(InputPrompt).filter_by(id=input_prompt_id).first()
)
else:
query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt)
if user:
query = query.filter(InputPrompt.user_id == user.id)
else:
query = query.filter(InputPrompt.user_id.is_(None))
input_prompt = query.first()
if input_prompt is None:
input_prompt = InputPrompt(
id=input_prompt_id,
prompt=prompt,
content=content,
active=active,
is_public=is_public or user is None,
user_id=user.id if user else None,
)
db_session.add(input_prompt)
if commit:
db_session.commit()
return input_prompt
def insert_input_prompt(
prompt: str,
content: str,
@@ -64,16 +27,41 @@ def insert_input_prompt(
user: User | None,
db_session: Session,
) -> InputPrompt:
input_prompt = InputPrompt(
user_id = user.id if user else None
# Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
# to avoid race conditions with the uniqueness check
stmt = pg_insert(InputPrompt).values(
prompt=prompt,
content=content,
active=True,
is_public=is_public,
user_id=user.id if user is not None else None,
user_id=user_id,
)
db_session.add(input_prompt)
db_session.commit()
# Use the appropriate constraint based on whether this is a user-owned or public prompt
if user_id is not None:
stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
else:
# Partial unique indexes cannot be targeted by constraint name;
# must use index_elements + index_where
stmt = stmt.on_conflict_do_nothing(
index_elements=[InputPrompt.prompt],
index_where=InputPrompt.user_id.is_(None),
)
stmt = stmt.returning(InputPrompt)
result = db_session.execute(stmt)
input_prompt = result.scalar_one_or_none()
if input_prompt is None:
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
db_session.commit()
return input_prompt
@@ -98,23 +86,40 @@ def update_input_prompt(
input_prompt.content = content
input_prompt.active = active
db_session.commit()
try:
db_session.commit()
except IntegrityError:
db_session.rollback()
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
return input_prompt
def validate_user_prompt_authorization(
user: User | None, input_prompt: InputPrompt
) -> bool:
"""
Check if the user is authorized to modify the given input prompt.
Returns True only if the user owns the prompt.
Returns False for public prompts (only admins can modify those),
unless auth is disabled (then anyone can manage public prompts).
"""
prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)
if prompt.user_id is not None:
if user is None:
return False
# Public prompts cannot be modified via the user API (unless auth is disabled)
if prompt.is_public or prompt.user_id is None:
return AUTH_TYPE == AuthType.DISABLED
user_details = UserInfo.from_model(user)
if str(user_details.id) != str(prompt.user_id):
return False
return True
# User must be logged in
if user is None:
return False
# User must own the prompt
user_details = UserInfo.from_model(user)
return str(user_details.id) == str(prompt.user_id)
def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:

View File

@@ -9,6 +9,9 @@ def get_memories(user: User | None, db_session: Session) -> list[str]:
if user is None:
return []
if not user.use_memories:
return []
user_info = [
f"User's name: {user.personal_name}" if user.personal_name else "",
f"User's role: {user.personal_role}" if user.personal_role else "",

View File

@@ -188,6 +188,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
nullable=True,
default=None,
)
chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
# personalization fields are exposed via the chat user settings "Personalization" tab
personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
@@ -3626,6 +3627,18 @@ class InputPrompt(Base):
ForeignKey("user.id", ondelete="CASCADE"), nullable=True
)
__table_args__ = (
# Unique constraint on (prompt, user_id) for user-owned prompts
UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
# Partial unique index for public prompts (user_id IS NULL)
Index(
"uq_inputprompt_prompt_public",
"prompt",
unique=True,
postgresql_where=text("user_id IS NULL"),
),
)
class InputPrompt__User(Base):
__tablename__ = "inputprompt__user"
@@ -3634,7 +3647,7 @@ class InputPrompt__User(Base):
ForeignKey("inputprompt.id"), primary_key=True
)
user_id: Mapped[UUID | None] = mapped_column(
ForeignKey("inputprompt.id"), primary_key=True
ForeignKey("user.id"), primary_key=True
)
disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

View File

@@ -20,7 +20,7 @@ from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.key_value_store.factory import get_kv_store
from onyx.utils.logger import setup_logger
@@ -80,39 +80,43 @@ def _perform_index_swap(
db_session=db_session,
)
# remove the old index from the vector db
document_index = get_default_document_index(new_search_settings, None)
# This flow is for checking and possibly creating an index so we get all
# indices.
document_indices = get_all_document_indices(new_search_settings, None, None)
WAIT_SECONDS = 5
success = False
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
try:
logger.notice(
f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
)
document_index.ensure_indices_exist(
primary_embedding_dim=new_search_settings.final_embedding_dim,
primary_embedding_precision=new_search_settings.embedding_precision,
# just finished swap, no more secondary index
secondary_index_embedding_dim=None,
secondary_index_embedding_precision=None,
)
for document_index in document_indices:
success = False
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
try:
logger.notice(
f"Document index {document_index.__class__.__name__} swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
)
document_index.ensure_indices_exist(
primary_embedding_dim=new_search_settings.final_embedding_dim,
primary_embedding_precision=new_search_settings.embedding_precision,
# just finished swap, no more secondary index
secondary_index_embedding_dim=None,
secondary_index_embedding_precision=None,
)
logger.notice("Vespa index swap complete.")
success = True
break
except Exception:
logger.exception(
f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
logger.notice("Document index swap complete.")
success = True
break
except Exception:
logger.exception(
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
if not success:
logger.error(
f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
)
return None
if not success:
logger.error(
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
)
return None
return current_search_settings

View File

@@ -139,6 +139,20 @@ def update_user_theme_preference(
db_session.commit()
def update_user_chat_background(
user_id: UUID,
chat_background: str | None,
db_session: Session,
) -> None:
"""Update user's chat background setting."""
db_session.execute(
update(User)
.where(User.id == user_id) # type: ignore
.values(chat_background=chat_background)
)
db_session.commit()
def update_user_personalization(
user_id: UUID,
*,

View File

@@ -287,6 +287,7 @@ def run_deep_research_llm_loop(
token_count=100,
message_type=MessageType.USER,
)
truncated_message_history = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,

View File

@@ -2,13 +2,18 @@ from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
def generate_enriched_content_for_chunk(chunk: DocMetadataAwareIndexChunk) -> str:
def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"
def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
def cleanup_content_for_chunks(
chunks: list[InferenceChunkUncleaned],
) -> list[InferenceChunk]:

View File

@@ -1,9 +1,8 @@
import httpx
from sqlalchemy.orm import Session
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchOldDocumentIndex,
@@ -17,17 +16,24 @@ def get_default_document_index(
secondary_search_settings: SearchSettings | None,
httpx_client: httpx.Client | None = None,
) -> DocumentIndex:
"""Primary index is the index that is used for querying/updating etc.
Secondary index is for when both the currently used index and the upcoming
index both need to be updated, updates are applied to both indices"""
"""Gets the default document index from env vars.
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated, updates are applied to both indices.
"""
secondary_index_name: str | None = None
secondary_large_chunks_enabled: bool | None = None
if secondary_search_settings:
secondary_index_name = secondary_search_settings.index_name
secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled
if ENABLE_OPENSEARCH_FOR_ONYX:
if ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX:
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
secondary_index_name=secondary_index_name,
@@ -47,12 +53,48 @@ def get_default_document_index(
)
def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
def get_all_document_indices(
search_settings: SearchSettings,
secondary_search_settings: SearchSettings | None,
httpx_client: httpx.Client | None = None,
) -> list[DocumentIndex]:
"""Gets all document indices.
NOTE: Will only return an OpenSearch index interface if
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
where we know it won't be enabled.
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
"""
TODO: Use redis to cache this or something
"""
search_settings = get_current_search_settings(db_session)
return get_default_document_index(
search_settings,
None,
vespa_document_index = VespaIndex(
index_name=search_settings.index_name,
secondary_index_name=(
secondary_search_settings.index_name if secondary_search_settings else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)
result: list[DocumentIndex] = [vespa_document_index]
if opensearch_document_index:
result.append(opensearch_document_index)
return result

View File

@@ -1,4 +1,5 @@
import logging
import time
from typing import Any
from typing import Generic
from typing import TypeVar
@@ -569,6 +570,9 @@ class OpenSearchClient:
def close(self) -> None:
"""Closes the client.
TODO(andrei): Can we have some way to auto close when the client no
longer has any references?
Raises:
Exception: There was an error closing the client.
"""
@@ -596,3 +600,55 @@ class OpenSearchClient:
)
hits_second_layer: list[Any] = hits_first_layer.get("hits", [])
return hits_second_layer
def wait_for_opensearch_with_timeout(
wait_interval_s: int = 5,
wait_limit_s: int = 60,
client: OpenSearchClient | None = None,
) -> bool:
"""Waits for OpenSearch to become ready subject to a timeout.
Will create a new dummy client if no client is provided. Will close this
client at the end of the function. Will not close the client if it was
supplied.
Args:
wait_interval_s: The interval in seconds to wait between checks.
Defaults to 5.
wait_limit_s: The total timeout in seconds to wait for OpenSearch to
become ready. Defaults to 60.
client: The OpenSearch client to use for pinging. If None, a new dummy
client will be created. Defaults to None.
Returns:
True if OpenSearch is ready, False otherwise.
"""
made_client = False
try:
if client is None:
# NOTE: index_name does not matter because we are only using this object
# to ping.
# TODO(andrei): Make this better.
client = OpenSearchClient(index_name="")
made_client = True
time_start = time.monotonic()
while True:
if client.ping():
logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
return True
time_elapsed = time.monotonic() - time_start
if time_elapsed > wait_limit_s:
logger.info(
f"[OpenSearch] Readiness probe did not succeed within the timeout "
f"({wait_limit_s} seconds)."
)
return False
logger.info(
f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
)
time.sleep(wait_interval_s)
finally:
if made_client:
assert client is not None
client.close()

View File

@@ -17,7 +17,7 @@ from onyx.db.enums import EmbeddingPrecision
from onyx.db.models import DocumentSource
from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk,
generate_enriched_content_for_chunk_text,
)
from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
from onyx.document_index.interfaces import (
@@ -140,9 +140,12 @@ def _convert_onyx_chunk_to_opensearch_document(
return DocumentChunk(
document_id=chunk.source_document.id,
chunk_index=chunk.chunk_id,
title=chunk.source_document.title,
# Use get_title_for_document_index to match the logic used when creating
# the title_embedding in the embedder. This method falls back to
# semantic_identifier when title is None (but not empty string).
title=chunk.source_document.get_title_for_document_index(),
title_vector=chunk.title_embedding,
content=generate_enriched_content_for_chunk(chunk),
content=generate_enriched_content_for_chunk_text(chunk),
content_vector=chunk.embeddings.full_embedding,
source_type=chunk.source_document.source.value,
metadata_list=chunk.source_document.get_metadata_str_attributes(),
@@ -421,6 +424,24 @@ class OpenSearchDocumentIndex(DocumentIndex):
def verify_and_create_index_if_necessary(
self, embedding_dim: int, embedding_precision: EmbeddingPrecision
) -> None:
"""Verifies and creates the index if necessary.
Also puts the desired search pipeline state, creating the pipelines if
they do not exist and updating them otherwise.
Args:
embedding_dim: Vector dimensionality for the vector similarity part
of the search.
embedding_precision: Precision of the values of the vectors for the
similarity part of the search.
Raises:
RuntimeError: There was an error verifying or creating the index or
search pipelines.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary."
)
expected_mappings = DocumentSchema.get_document_schema(
embedding_dim, self._tenant_state.multitenant
)
@@ -450,6 +471,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
) -> list[DocumentInsertionRecord]:
logger.debug(
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
)
# Set of doc IDs.
unique_docs_to_be_indexed: set[str] = set()
document_indexing_results: list[DocumentInsertionRecord] = []
@@ -494,6 +518,8 @@ class OpenSearchDocumentIndex(DocumentIndex):
def delete(self, document_id: str, chunk_count: int | None = None) -> int:
"""Deletes all chunks for a given document.
Does nothing if the specified document ID does not exist.
TODO(andrei): Make this method require supplying source type.
TODO(andrei): Consider implementing this method to delete on document
chunk IDs vs querying for matching document chunks.
@@ -510,6 +536,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
Returns:
The number of chunks successfully deleted.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
)
query_body = DocumentQuery.delete_from_document_id_query(
document_id=document_id,
tenant_state=self._tenant_state,
@@ -523,6 +552,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
) -> None:
"""Updates some set of chunks.
NOTE: Will raise if the specified document chunks do not exist.
NOTE: Requires document chunk count be known; will raise if it is not.
NOTE: Each update request must have some field to update; if not it is
assumed there is a bug in the caller and this will raise.
@@ -539,6 +569,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
RuntimeError: Failed to update some or all of the chunks for the
specified documents.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
)
for update_request in update_requests:
properties_to_update: dict[str, Any] = dict()
# TODO(andrei): Nit but consider if we can use DocumentChunk
@@ -604,6 +637,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
TODO(andrei): Consider implementing this method to retrieve on document
chunk IDs vs querying for matching document chunks.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
)
results: list[InferenceChunk] = []
for chunk_request in chunk_requests:
search_hits: list[SearchHit[DocumentChunk]] = []
@@ -643,6 +679,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
logger.debug(
f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
)
query_body = DocumentQuery.get_hybrid_search_query(
query_text=query,
query_vector=query_embedding,

View File

@@ -17,7 +17,7 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
get_experts_stores_representations,
)
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk,
generate_enriched_content_for_chunk_text,
)
from onyx.document_index.document_index_utils import get_uuid_from_chunk
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
@@ -186,7 +186,7 @@ def _index_vespa_chunk(
# For the BM25 index, the keyword suffix is used, the vector is already generated with the more
# natural language representation of the metadata section
CONTENT: remove_invalid_unicode_chars(
generate_enriched_content_for_chunk(chunk)
generate_enriched_content_for_chunk_text(chunk)
),
# This duplication of `content` is needed for keyword highlighting
# Note that it's not exactly the same as the actual content

View File

@@ -7,6 +7,9 @@ from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import DocumentFailure
from onyx.db.models import SearchSettings
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk_embedding,
)
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocAwareChunk
@@ -126,7 +129,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
if chunk.large_chunk_reference_ids:
large_chunks_present = True
chunk_text = (
f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
generate_enriched_content_for_chunk_embedding(chunk)
) or chunk.source_document.get_title_for_document_index()
if not chunk_text:

View File

@@ -37,6 +37,7 @@ from onyx.document_index.document_index_utils import (
get_multipass_config,
)
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import DocumentInsertionRecord
from onyx.document_index.interfaces import DocumentMetadata
from onyx.document_index.interfaces import IndexBatchParams
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
@@ -163,7 +164,7 @@ def index_doc_batch_with_handler(
*,
chunker: Chunker,
embedder: IndexingEmbedder,
document_index: DocumentIndex,
document_indices: list[DocumentIndex],
document_batch: list[Document],
request_id: str | None,
tenant_id: str,
@@ -176,7 +177,7 @@ def index_doc_batch_with_handler(
index_pipeline_result = index_doc_batch(
chunker=chunker,
embedder=embedder,
document_index=document_index,
document_indices=document_indices,
document_batch=document_batch,
request_id=request_id,
tenant_id=tenant_id,
@@ -627,7 +628,7 @@ def index_doc_batch(
document_batch: list[Document],
chunker: Chunker,
embedder: IndexingEmbedder,
document_index: DocumentIndex,
document_indices: list[DocumentIndex],
request_id: str | None,
tenant_id: str,
adapter: IndexingBatchAdapter,
@@ -743,47 +744,57 @@ def index_doc_batch(
short_descriptor_log = str(short_descriptor_list)[:1024]
logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
# A document will not be spread across different batches, so all the
# documents with chunks in this set, are fully represented by the chunks
# in this set
(
insertion_records,
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
)
primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
for document_index in document_indices:
# A document will not be spread across different batches, so all the
# documents with chunks in this set, are fully represented by the chunks
# in this set
(
insertion_records,
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
)
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
all_returned_doc_ids: set[str] = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
f"This occured for document index {document_index.__class__.__name__}"
)
# We treat the first document index we got as the primary one used
# for reporting the state of indexing.
if primary_doc_idx_insertion_records is None:
primary_doc_idx_insertion_records = insertion_records
if primary_doc_idx_vector_db_write_failures is None:
primary_doc_idx_vector_db_write_failures = vector_db_write_failures
adapter.post_index(
context=context,
@@ -792,11 +803,15 @@ def index_doc_batch(
result=result,
)
assert primary_doc_idx_insertion_records is not None
assert primary_doc_idx_vector_db_write_failures is not None
return IndexingPipelineResult(
new_docs=len([r for r in insertion_records if not r.already_existed]),
new_docs=len(
[r for r in primary_doc_idx_insertion_records if not r.already_existed]
),
total_docs=len(filtered_documents),
total_chunks=len(chunks_with_embeddings),
failures=vector_db_write_failures + embedding_failures,
failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
)
@@ -805,7 +820,7 @@ def run_indexing_pipeline(
document_batch: list[Document],
request_id: str | None,
embedder: IndexingEmbedder,
document_index: DocumentIndex,
document_indices: list[DocumentIndex],
db_session: Session,
tenant_id: str,
adapter: IndexingBatchAdapter,
@@ -846,7 +861,7 @@ def run_indexing_pipeline(
return index_doc_batch_with_handler(
chunker=chunker,
embedder=embedder,
document_index=document_index,
document_indices=document_indices,
document_batch=document_batch,
request_id=request_id,
tenant_id=tenant_id,

View File

@@ -41,6 +41,11 @@ alphanum_regex = re.compile(r"[^a-z0-9]+")
rem_email_regex = re.compile(r"(?<=\S)@([a-z0-9-]+)\.([a-z]{2,6})$")
def _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:
"""Generate n-grams from a sequence."""
return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]
def _clean_name(entity_name: str) -> str:
"""
Clean an entity string by removing non-alphanumeric characters and email addresses.
@@ -58,8 +63,6 @@ def _normalize_one_entity(
attributes: dict[str, str],
allowed_docs_temp_view_name: str | None = None,
) -> str | None:
from nltk import ngrams # type: ignore
"""
Matches a single entity to the best matching entity of the same type.
"""
@@ -150,16 +153,16 @@ def _normalize_one_entity(
# step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank
n1, n2, n3 = (
set(ngrams(cleaned_entity, 1)),
set(ngrams(cleaned_entity, 2)),
set(ngrams(cleaned_entity, 3)),
set(_ngrams(cleaned_entity, 1)),
set(_ngrams(cleaned_entity, 2)),
set(_ngrams(cleaned_entity, 3)),
)
for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):
cleaned_candidate = _clean_name(candidate_name)
h_n1, h_n2, h_n3 = (
set(ngrams(cleaned_candidate, 1)),
set(ngrams(cleaned_candidate, 2)),
set(ngrams(cleaned_candidate, 3)),
set(_ngrams(cleaned_candidate, 1)),
set(_ngrams(cleaned_candidate, 2)),
set(_ngrams(cleaned_candidate, 3)),
)
# compute ngram overlap, renormalize scores if the names are too short for larger ngrams

View File

@@ -54,11 +54,6 @@
"model_vendor": "amazon",
"model_version": "v1:0"
},
"anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -1465,11 +1460,6 @@
"model_vendor": "mistral",
"model_version": "v0:1"
},
"bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"chat-bison": {
"display_name": "Chat Bison",
"model_vendor": "google",
@@ -1500,16 +1490,6 @@
"model_vendor": "openai",
"model_version": "latest"
},
"claude-3-5-haiku-20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"claude-3-5-haiku-latest": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "latest"
},
"claude-3-5-sonnet-20240620": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -1715,11 +1695,6 @@
"model_vendor": "amazon",
"model_version": "v1:0"
},
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -3251,15 +3226,6 @@
"model_vendor": "anthropic",
"model_version": "latest"
},
"openrouter/anthropic/claude-3-5-haiku": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic"
},
"openrouter/anthropic/claude-3-5-haiku-20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"openrouter/anthropic/claude-3-haiku": {
"display_name": "Claude Haiku 3",
"model_vendor": "anthropic"
@@ -3774,11 +3740,6 @@
"model_vendor": "amazon",
"model_version": "1:0"
},
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -3899,15 +3860,6 @@
"model_vendor": "twelvelabs",
"model_version": "v1:0"
},
"vertex_ai/claude-3-5-haiku": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic"
},
"vertex_ai/claude-3-5-haiku@20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"vertex_ai/claude-3-5-sonnet": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic"

View File

@@ -301,6 +301,12 @@ class LitellmLLM(LLM):
)
is_ollama = self._model_provider == LlmProviderNames.OLLAMA_CHAT
is_mistral = self._model_provider == LlmProviderNames.MISTRAL
is_vertex_ai = self._model_provider == LlmProviderNames.VERTEX_AI
# Vertex Anthropic Opus 4.5 rejects output_config (LiteLLM maps reasoning_effort).
# Keep this guard until LiteLLM/Vertex accept the field for this model.
is_vertex_opus_4_5 = (
is_vertex_ai and "claude-opus-4-5" in self.config.model_name.lower()
)
#########################
# Build arguments
@@ -331,12 +337,16 @@ class LitellmLLM(LLM):
# Temperature
temperature = 1 if is_reasoning else self._temperature
if stream:
if stream and not is_vertex_opus_4_5:
optional_kwargs["stream_options"] = {"include_usage": True}
# Use configured default if not provided (if not set in env, low)
reasoning_effort = reasoning_effort or ReasoningEffort(DEFAULT_REASONING_EFFORT)
if is_reasoning and reasoning_effort != ReasoningEffort.OFF:
if (
is_reasoning
and reasoning_effort != ReasoningEffort.OFF
and not is_vertex_opus_4_5
):
if is_openai_model:
# OpenAI API does not accept reasoning params for GPT 5 chat models
# (neither reasoning nor reasoning_effort are accepted)

View File

@@ -0,0 +1,225 @@
import re
ENGLISH_STOPWORDS = [
"a",
"about",
"above",
"after",
"again",
"against",
"ain",
"all",
"am",
"an",
"and",
"any",
"are",
"aren",
"aren't",
"as",
"at",
"be",
"because",
"been",
"before",
"being",
"below",
"between",
"both",
"but",
"by",
"can",
"couldn",
"couldn't",
"d",
"did",
"didn",
"didn't",
"do",
"does",
"doesn",
"doesn't",
"doing",
"don",
"don't",
"down",
"during",
"each",
"few",
"for",
"from",
"further",
"had",
"hadn",
"hadn't",
"has",
"hasn",
"hasn't",
"have",
"haven",
"haven't",
"having",
"he",
"he'd",
"he'll",
"he's",
"her",
"here",
"hers",
"herself",
"him",
"himself",
"his",
"how",
"i",
"i'd",
"i'll",
"i'm",
"i've",
"if",
"in",
"into",
"is",
"isn",
"isn't",
"it",
"it'd",
"it'll",
"it's",
"its",
"itself",
"just",
"ll",
"m",
"ma",
"me",
"mightn",
"mightn't",
"more",
"most",
"mustn",
"mustn't",
"my",
"myself",
"needn",
"needn't",
"no",
"nor",
"not",
"now",
"o",
"of",
"off",
"on",
"once",
"only",
"or",
"other",
"our",
"ours",
"ourselves",
"out",
"over",
"own",
"re",
"s",
"same",
"shan",
"shan't",
"she",
"she'd",
"she'll",
"she's",
"should",
"should've",
"shouldn",
"shouldn't",
"so",
"some",
"such",
"t",
"than",
"that",
"that'll",
"the",
"their",
"theirs",
"them",
"themselves",
"then",
"there",
"these",
"they",
"they'd",
"they'll",
"they're",
"they've",
"this",
"those",
"through",
"to",
"too",
"under",
"until",
"up",
"ve",
"very",
"was",
"wasn",
"wasn't",
"we",
"we'd",
"we'll",
"we're",
"we've",
"were",
"weren",
"weren't",
"what",
"when",
"where",
"which",
"while",
"who",
"whom",
"why",
"will",
"with",
"won",
"won't",
"wouldn",
"wouldn't",
"y",
"you",
"you'd",
"you'll",
"you're",
"you've",
"your",
"yours",
"yourself",
"yourselves",
]
ENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)
def strip_stopwords(text: str) -> list[str]:
"""Remove English stopwords from text.
Matching is case-insensitive and ignores leading/trailing punctuation
on each word. Internal punctuation (like apostrophes in contractions)
is preserved for matching, so "you're" matches the stopword "you're"
but "youre" would not.
"""
words = text.split()
result = []
for word in words:
# Strip leading/trailing punctuation to get the core word for comparison
# This preserves internal punctuation like apostrophes
core = re.sub(r"^[^\w']+|[^\w']+$", "", word)
if core.lower() not in ENGLISH_STOPWORDS_SET:
result.append(word)
return result

View File

@@ -32,9 +32,6 @@ from onyx.configs.constants import MessageType
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.context.search.retrieval.search_runner import (
download_nltk_data,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
@@ -1129,9 +1126,6 @@ if __name__ == "__main__":
set_is_ee_based_on_env_variable()
logger.info("Verifying query preprocessing (NLTK) data is downloaded")
download_nltk_data()
try:
# Keep the main thread alive
while tenant_handler.running:

View File

@@ -96,7 +96,7 @@ ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
CHAT_NAMING_SYSTEM_PROMPT = """
Given the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \
Make sure the name is in the same language as the user's language.
Make sure the name is in the same language as the user's first message.
IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
""".strip()

View File

@@ -19,7 +19,7 @@ If you need to ask questions, follow these guidelines:
- Be concise and do not ask more than 5 questions.
- If there are ambiguous terms or questions, ask the user to clarify.
- Your questions should be a numbered list for clarity.
- Respond in the user's language.
- Respond in the same language as the user's query.
- Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}
- Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
""".strip()
@@ -44,9 +44,9 @@ For context, the date is {current_datetime}.
The research plan should be formatted as a numbered list of steps and have 6 or less individual steps.
Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps.
Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.
Output only the numbered list of steps with no additional prefix or suffix. Respond in the user's language.
Output only the numbered list of steps with no additional prefix or suffix.
""".strip()
@@ -76,10 +76,11 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.
@@ -129,7 +130,7 @@ For context, the date is {current_datetime}.
Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long.
You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible. Respond in the user's language.
You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.
Not every fact retrieved will be relevant to the user's query.
@@ -165,10 +166,11 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

View File

@@ -1,30 +1,39 @@
from onyx.configs.app_configs import MAX_SLACK_QUERY_EXPANSIONS
SLACK_QUERY_EXPANSION_PROMPT = f"""
Rewrite the user's query and, if helpful, split it into at most {MAX_SLACK_QUERY_EXPANSIONS} \
keyword-only queries, so that Slack's keyword search yields the best matches.
Rewrite the user's query into at most {MAX_SLACK_QUERY_EXPANSIONS} keyword-only queries for Slack's keyword search.
Keep in mind the Slack's search behavior:
- Pure keyword AND search (no semantics).
- Word order matters.
- More words = fewer matches, so keep each query concise.
- IMPORTANT: Prefer simple 1-2 word queries over longer multi-word queries.
Slack search behavior:
- Pure keyword AND search (no semantics)
- More words = fewer matches, so keep queries concise (1-3 words)
Critical: Extract ONLY keywords that would actually appear in Slack message content.
ALWAYS include:
- Person names (e.g., "Sarah Chen", "Mike Johnson") - people search for messages from/about specific people
- Project/product names, technical terms, proper nouns
- Actual content words: "performance", "bug", "deployment", "API", "error"
DO NOT include:
- Meta-words: "topics", "conversations", "discussed", "summary", "messages", "big", "main", "talking"
- Temporal: "today", "yesterday", "week", "month", "recent", "past", "last"
- Channels/Users: "general", "eng-general", "engineering", "@username"
DO include:
- Actual content: "performance", "bug", "deployment", "API", "database", "error", "feature"
- Meta-words: "topics", "conversations", "discussed", "summary", "messages"
- Temporal: "today", "yesterday", "week", "month", "recent", "last"
- Channel names: "general", "eng-general", "random"
Examples:
Query: "what are the big topics in eng-general this week?"
Output:
Query: "messages with Sarah about the deployment"
Output:
Sarah deployment
Sarah
deployment
Query: "what did Mike say about the budget?"
Output:
Mike budget
Mike
budget
Query: "performance issues in eng-general"
Output:
performance issues
@@ -41,7 +50,7 @@ Now process this query:
{{query}}
Output:
Output (keywords only, one per line, NO explanations or commentary):
"""
SLACK_DATE_EXTRACTION_PROMPT = """

View File

@@ -48,7 +48,7 @@ Do not use the "site:" operator in your web search queries.
OPEN_URLS_GUIDANCE = """
## open_url
Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches.
Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs.
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources.
You should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.
"""

View File

@@ -1,24 +0,0 @@
input_prompts:
- id: -5
prompt: "Elaborate"
content: "Elaborate on the above, give me a more in depth explanation."
active: true
is_public: true
- id: -4
prompt: "Reword"
content: "Help me rewrite the following politely and concisely for professional communication:\n"
active: true
is_public: true
- id: -3
prompt: "Email"
content: "Write a professional email for me including a subject line, signature, etc. Template the parts that need editing with [ ]. The email should cover the following points:\n"
active: true
is_public: true
- id: -2
prompt: "Debug"
content: "Provide step-by-step troubleshooting instructions for the following issue:\n"
active: true
is_public: true

View File

@@ -1,40 +0,0 @@
import yaml
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import INPUT_PROMPT_YAML
from onyx.db.input_prompt import insert_input_prompt_if_not_exists
from onyx.utils.logger import setup_logger
logger = setup_logger()
def load_input_prompts_from_yaml(
db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
) -> None:
with open(input_prompts_yaml, "r") as file:
data = yaml.safe_load(file)
all_input_prompts = data.get("input_prompts", [])
for input_prompt in all_input_prompts:
# If these prompts are deleted (which is a hard delete in the DB), on server startup
# they will be recreated, but the user can always just deactivate them, just a light inconvenience
insert_input_prompt_if_not_exists(
user=None,
input_prompt_id=input_prompt.get("id"),
prompt=input_prompt["prompt"],
content=input_prompt["content"],
is_public=input_prompt["is_public"],
active=input_prompt.get("active", True),
db_session=db_session,
commit=True,
)
def load_chat_yamls(
db_session: Session,
input_prompts_yaml: str = INPUT_PROMPT_YAML,
) -> None:
"""Load all chat-related YAML configurations (such as the prompt shortcuts which are called input prompts on the backend)"""
load_input_prompts_from_yaml(db_session, input_prompts_yaml)

View File

@@ -32,6 +32,7 @@ def get_document_info(
db_session: Session = Depends(get_session),
) -> DocumentInfo:
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
user_acl_filters = build_access_filters_for_user(user, db_session)
@@ -76,6 +77,7 @@ def get_chunk_info(
db_session: Session = Depends(get_session),
) -> ChunkInfo:
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
user_acl_filters = build_access_filters_for_user(user, db_session)

View File

@@ -821,20 +821,36 @@ def _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User | None) ->
def _db_mcp_server_to_api_mcp_server(
db_server: DbMCPServer, email: str, db: Session, include_auth_config: bool = False
db_server: DbMCPServer,
db: Session,
request_user: User | None,
include_auth_config: bool = False,
) -> MCPServer:
"""Convert database MCP server to API model"""
email = request_user.email if request_user else ""
# Check if user has authentication configured and extract credentials
auth_performer = db_server.auth_performer
user_authenticated: bool | None = None
user_credentials = None
admin_credentials = None
can_view_admin_credentials = bool(include_auth_config) and (
request_user is not None
and (
request_user.role == UserRole.ADMIN
or (request_user.email and request_user.email == db_server.owner)
)
)
if db_server.auth_type == MCPAuthenticationType.NONE:
user_authenticated = True # No auth required
elif auth_performer == MCPAuthenticationPerformer.ADMIN:
user_authenticated = db_server.admin_connection_config is not None
if include_auth_config and db_server.admin_connection_config is not None:
if (
can_view_admin_credentials
and db_server.admin_connection_config is not None
and include_auth_config
):
if db_server.auth_type == MCPAuthenticationType.API_TOKEN:
admin_credentials = {
"api_key": db_server.admin_connection_config.config["headers"][
@@ -890,11 +906,12 @@ def _db_mcp_server_to_api_mcp_server(
if client_info:
if not client_info.client_id or not client_info.client_secret:
raise ValueError("Stored client info had empty client ID or secret")
admin_credentials = {
"client_id": client_info.client_id,
"client_secret": client_info.client_secret,
}
else:
if can_view_admin_credentials:
admin_credentials = {
"client_id": client_info.client_id,
"client_secret": client_info.client_secret,
}
elif can_view_admin_credentials:
admin_credentials = {}
logger.warning(f"No client info found for server {db_server.name}")
@@ -961,14 +978,13 @@ def get_mcp_servers_for_assistant(
logger.info(f"Fetching MCP servers for assistant: {assistant_id}")
email = user.email if user else ""
try:
persona_id = int(assistant_id)
db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)
# Convert to API model format with opportunistic token refresh for OAuth
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, email, db)
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
for db_server in db_mcp_servers
]
@@ -981,6 +997,25 @@ def get_mcp_servers_for_assistant(
raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")
@router.get("/servers", response_model=MCPServersResponse)
def get_mcp_servers_for_user(
db: Session = Depends(get_session),
user: User | None = Depends(current_user),
) -> MCPServersResponse:
"""List all MCP servers for use in agent configuration and chat UI.
This endpoint is intentionally available to all authenticated users so they
can attach MCP actions to assistants. Sensitive admin credentials are never
returned.
"""
db_mcp_servers = get_all_mcp_servers(db)
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
for db_server in db_mcp_servers
]
return MCPServersResponse(mcp_servers=mcp_servers)
def _get_connection_config(
mcp_server: DbMCPServer, is_admin: bool, user: User | None, db_session: Session
) -> MCPConnectionConfig | None:
@@ -1528,8 +1563,6 @@ def get_mcp_server_detail(
_ensure_mcp_server_owner_or_admin(server, user)
email = user.email if user else ""
# TODO: user permissions per mcp server not yet implemented, for now
# permissions are based on access to assistants
# # Quick permission check admin or user has access
@@ -1537,7 +1570,10 @@ def get_mcp_server_detail(
# raise HTTPException(status_code=403, detail="Forbidden")
return _db_mcp_server_to_api_mcp_server(
server, email, db_session, include_auth_config=True
server,
db_session,
include_auth_config=True,
request_user=user,
)
@@ -1596,13 +1632,12 @@ def get_mcp_servers_for_admin(
logger.info("Fetching all MCP servers for admin display")
email = user.email if user else ""
try:
db_mcp_servers = get_all_mcp_servers(db)
# Convert to API model format
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, email, db)
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
for db_server in db_mcp_servers
]
@@ -1845,7 +1880,9 @@ def update_mcp_server_simple(
db_session.commit()
# Return the updated server in API format
return _db_mcp_server_to_api_mcp_server(updated_server, user.email, db_session)
return _db_mcp_server_to_api_mcp_server(
updated_server, db_session, request_user=user
)
@admin_router.delete("/server/{server_id}")

View File

@@ -13,6 +13,7 @@ from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
from onyx.configs.constants import DEV_VERSION_PATTERN
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import STABLE_VERSION_PATTERN
from onyx.db.auth import get_user_count
from onyx.server.manage.models import AllVersions
from onyx.server.manage.models import AuthTypeResponse
from onyx.server.manage.models import ContainerVersions
@@ -28,12 +29,14 @@ def healthcheck() -> StatusResponse:
@router.get("/auth/type", tags=PUBLIC_API_TAGS)
def get_auth_type() -> AuthTypeResponse:
async def get_auth_type() -> AuthTypeResponse:
user_count = await get_user_count()
return AuthTypeResponse(
auth_type=AUTH_TYPE,
requires_verification=user_needs_to_be_verified(),
anonymous_user_enabled=anonymous_user_enabled(),
password_min_length=PASSWORD_MIN_LENGTH,
has_users=user_count > 0,
)

View File

@@ -44,6 +44,8 @@ class AuthTypeResponse(BaseModel):
requires_verification: bool
anonymous_user_enabled: bool | None = None
password_min_length: int
# whether there are any users in the system
has_users: bool = True
class UserSpecificAssistantPreference(BaseModel):
@@ -65,6 +67,7 @@ class UserPreferences(BaseModel):
auto_scroll: bool | None = None
temperature_override_enabled: bool | None = None
theme_preference: ThemePreference | None = None
chat_background: str | None = None
# controls which tools are enabled for the user for a specific assistant
assistant_specific_configs: UserSpecificAssistantPreferences | None = None
@@ -136,6 +139,7 @@ class UserInfo(BaseModel):
auto_scroll=user.auto_scroll,
temperature_override_enabled=user.temperature_override_enabled,
theme_preference=user.theme_preference,
chat_background=user.chat_background,
assistant_specific_configs=assistant_specific_configs,
)
),
@@ -199,6 +203,10 @@ class ThemePreferenceRequest(BaseModel):
theme_preference: ThemePreference
class ChatBackgroundRequest(BaseModel):
chat_background: str | None
class PersonalizationUpdateRequest(BaseModel):
name: str | None = None
role: str | None = None

View File

@@ -6,33 +6,25 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.models import IndexModelStatus
from onyx.db.models import User
from onyx.db.search_settings import create_search_settings
from onyx.db.search_settings import delete_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_embedding_provider_from_provider_type
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.document_index_utils import get_multipass_config
from onyx.document_index.factory import get_default_document_index
from onyx.file_processing.unstructured import delete_unstructured_api_key
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import update_unstructured_api_key
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
from onyx.server.manage.models import FullModelVersionResponse
from onyx.server.models import IdReturn
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MULTI_TENANT
router = APIRouter(prefix="/search-settings")
@@ -48,91 +40,97 @@ def set_new_search_settings(
"""Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
Gives an error if the same model name is used as the current or secondary index
"""
if search_settings_new.index_name:
logger.warning("Index name was specified by request, this is not suggested")
# Disallow contextual RAG for cloud deployments
if MULTI_TENANT and search_settings_new.enable_contextual_rag:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Contextual RAG disabled in Onyx Cloud",
)
# Validate cloud provider exists or create new LiteLLM provider
if search_settings_new.provider_type is not None:
cloud_provider = get_embedding_provider_from_provider_type(
db_session, provider_type=search_settings_new.provider_type
)
if cloud_provider is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
)
search_settings = get_current_search_settings(db_session)
if search_settings_new.index_name is None:
# We define index name here
index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
if (
search_settings_new.model_name == search_settings.model_name
and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
):
index_name += ALT_INDEX_SUFFIX
search_values = search_settings_new.model_dump()
search_values["index_name"] = index_name
new_search_settings_request = SavedSearchSettings(**search_values)
else:
new_search_settings_request = SavedSearchSettings(
**search_settings_new.model_dump()
)
secondary_search_settings = get_secondary_search_settings(db_session)
if secondary_search_settings:
# Cancel any background indexing jobs
expire_index_attempts(
search_settings_id=secondary_search_settings.id, db_session=db_session
)
# Mark previous model as a past model directly
update_search_settings_status(
search_settings=secondary_search_settings,
new_status=IndexModelStatus.PAST,
db_session=db_session,
)
new_search_settings = create_search_settings(
search_settings=new_search_settings_request, db_session=db_session
# TODO(andrei): Re-enable.
logger.error("Setting new search settings is temporarily disabled.")
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="Setting new search settings is temporarily disabled.",
)
# if search_settings_new.index_name:
# logger.warning("Index name was specified by request, this is not suggested")
# Ensure Vespa has the new index immediately
get_multipass_config(search_settings)
get_multipass_config(new_search_settings)
document_index = get_default_document_index(search_settings, new_search_settings)
# # Disallow contextual RAG for cloud deployments
# if MULTI_TENANT and search_settings_new.enable_contextual_rag:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail="Contextual RAG disabled in Onyx Cloud",
# )
document_index.ensure_indices_exist(
primary_embedding_dim=search_settings.final_embedding_dim,
primary_embedding_precision=search_settings.embedding_precision,
secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
secondary_index_embedding_precision=new_search_settings.embedding_precision,
)
# # Validate cloud provider exists or create new LiteLLM provider
# if search_settings_new.provider_type is not None:
# cloud_provider = get_embedding_provider_from_provider_type(
# db_session, provider_type=search_settings_new.provider_type
# )
# Pause index attempts for the currently in use index to preserve resources
if DISABLE_INDEX_UPDATE_ON_SWAP:
expire_index_attempts(
search_settings_id=search_settings.id, db_session=db_session
)
for cc_pair in get_connector_credential_pairs(db_session):
resync_cc_pair(
cc_pair=cc_pair,
search_settings_id=new_search_settings.id,
db_session=db_session,
)
# if cloud_provider is None:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
# )
db_session.commit()
return IdReturn(id=new_search_settings.id)
# search_settings = get_current_search_settings(db_session)
# if search_settings_new.index_name is None:
# # We define index name here
# index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
# if (
# search_settings_new.model_name == search_settings.model_name
# and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
# ):
# index_name += ALT_INDEX_SUFFIX
# search_values = search_settings_new.model_dump()
# search_values["index_name"] = index_name
# new_search_settings_request = SavedSearchSettings(**search_values)
# else:
# new_search_settings_request = SavedSearchSettings(
# **search_settings_new.model_dump()
# )
# secondary_search_settings = get_secondary_search_settings(db_session)
# if secondary_search_settings:
# # Cancel any background indexing jobs
# expire_index_attempts(
# search_settings_id=secondary_search_settings.id, db_session=db_session
# )
# # Mark previous model as a past model directly
# update_search_settings_status(
# search_settings=secondary_search_settings,
# new_status=IndexModelStatus.PAST,
# db_session=db_session,
# )
# new_search_settings = create_search_settings(
# search_settings=new_search_settings_request, db_session=db_session
# )
# # Ensure Vespa has the new index immediately
# get_multipass_config(search_settings)
# get_multipass_config(new_search_settings)
# document_index = get_default_document_index(search_settings, new_search_settings)
# document_index.ensure_indices_exist(
# primary_embedding_dim=search_settings.final_embedding_dim,
# primary_embedding_precision=search_settings.embedding_precision,
# secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
# secondary_index_embedding_precision=new_search_settings.embedding_precision,
# )
# # Pause index attempts for the currently in use index to preserve resources
# if DISABLE_INDEX_UPDATE_ON_SWAP:
# expire_index_attempts(
# search_settings_id=search_settings.id, db_session=db_session
# )
# for cc_pair in get_connector_credential_pairs(db_session):
# resync_cc_pair(
# cc_pair=cc_pair,
# search_settings_id=new_search_settings.id,
# db_session=db_session,
# )
# db_session.commit()
# return IdReturn(id=new_search_settings.id)
@router.post("/cancel-new-embedding")

View File

@@ -56,6 +56,7 @@ from onyx.db.user_preferences import get_latest_access_token_for_user
from onyx.db.user_preferences import update_assistant_preferences
from onyx.db.user_preferences import update_user_assistant_visibility
from onyx.db.user_preferences import update_user_auto_scroll
from onyx.db.user_preferences import update_user_chat_background
from onyx.db.user_preferences import update_user_default_model
from onyx.db.user_preferences import update_user_personalization
from onyx.db.user_preferences import update_user_pinned_assistants
@@ -75,6 +76,7 @@ from onyx.server.documents.models import PaginatedReturn
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.manage.models import AllUsersResponse
from onyx.server.manage.models import AutoScrollRequest
from onyx.server.manage.models import ChatBackgroundRequest
from onyx.server.manage.models import PersonalizationUpdateRequest
from onyx.server.manage.models import TenantInfo
from onyx.server.manage.models import TenantSnapshot
@@ -784,6 +786,25 @@ def update_user_theme_preference_api(
update_user_theme_preference(user.id, request.theme_preference, db_session)
@router.patch("/user/chat-background")
def update_user_chat_background_api(
request: ChatBackgroundRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
if user is None:
if AUTH_TYPE == AuthType.DISABLED:
store = get_kv_store()
no_auth_user = fetch_no_auth_user(store)
no_auth_user.preferences.chat_background = request.chat_background
set_no_auth_user_preferences(store, no_auth_user.preferences)
return
else:
raise RuntimeError("This should never happen")
update_user_chat_background(user.id, request.chat_background, db_session)
@router.patch("/user/default-model")
def update_user_default_model_api(
request: ChosenDefaultModelRequest,

View File

@@ -22,7 +22,7 @@ from onyx.db.models import User
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.indexing.adapters.document_indexing_adapter import (
DocumentIndexingBatchAdapter,
)
@@ -103,9 +103,11 @@ def upsert_ingestion_doc(
# Need to index for both the primary and secondary index if possible
active_search_settings = get_active_search_settings(db_session)
curr_doc_index = get_default_document_index(
# This flow is for indexing so we get all indices.
document_indices = get_all_document_indices(
active_search_settings.primary,
None,
None,
)
search_settings = get_current_search_settings(db_session)
@@ -128,7 +130,7 @@ def upsert_ingestion_doc(
indexing_pipeline_result = run_indexing_pipeline(
embedder=index_embedding_model,
document_index=curr_doc_index,
document_indices=document_indices,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -151,13 +153,14 @@ def upsert_ingestion_doc(
search_settings=sec_search_settings
)
sec_doc_index = get_default_document_index(
active_search_settings.secondary, None
# This flow is for indexing so we get all indices.
sec_document_indices = get_all_document_indices(
active_search_settings.secondary, None, None
)
run_indexing_pipeline(
embedder=new_index_embedding_model,
document_index=sec_doc_index,
document_indices=sec_document_indices,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -192,15 +195,18 @@ def delete_ingestion_doc(
)
active_search_settings = get_active_search_settings(db_session)
doc_index = get_default_document_index(
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
active_search_settings.primary,
active_search_settings.secondary,
None,
)
doc_index.delete_single(
doc_id=document_id,
tenant_id=tenant_id,
chunk_count=document.chunk_count,
)
for document_index in document_indices:
document_index.delete_single(
doc_id=document_id,
tenant_id=tenant_id,
chunk_count=document.chunk_count,
)
# Delete from database
delete_documents_complete__no_commit(db_session, [document_id])

View File

@@ -530,7 +530,30 @@ def handle_new_chat_message(
return StreamingResponse(stream_generator(), media_type="text/event-stream")
@router.post("/send-chat-message", response_model=None, tags=PUBLIC_API_TAGS)
@router.post(
"/send-chat-message",
response_model=ChatFullResponse,
tags=PUBLIC_API_TAGS,
responses={
200: {
"description": (
"If `stream=true`, returns `text/event-stream`.\n"
"If `stream=false`, returns `application/json` (ChatFullResponse)."
),
"content": {
"text/event-stream": {
"schema": {"type": "string"},
"examples": {
"stream": {
"summary": "Stream of NDJSON AnswerStreamPart's",
"value": "string",
}
},
},
},
}
},
)
def handle_send_chat_message(
chat_message_req: SendMessageRequest,
request: Request,

View File

@@ -51,6 +51,7 @@ def admin_search(
tenant_id=tenant_id,
)
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
if not isinstance(document_index, VespaIndex):

View File

@@ -4,6 +4,7 @@ from typing import cast
from sqlalchemy.orm import Session
from onyx.chat.citation_utils import extract_citation_order_from_text
from onyx.configs.constants import MessageType
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
@@ -521,6 +522,13 @@ def translate_assistant_message_to_packets(
)
)
# Sort citations by order of appearance in message text
citation_order = extract_citation_order_from_text(chat_message.message or "")
order_map = {num: idx for idx, num in enumerate(citation_order)}
citation_info_list.sort(
key=lambda c: order_map.get(c.citation_number, float("inf"))
)
# Message comes after tool calls, with optional reasoning step beforehand
message_turn_index = max_tool_turn + 1
if chat_message.reasoning_tokens:

View File

@@ -6,7 +6,6 @@ from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
from onyx.configs.chat_configs import INPUT_PROMPT_YAML
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import KV_SEARCH_SETTINGS
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
@@ -14,9 +13,6 @@ from onyx.configs.embedding_configs import SupportedEmbeddingModel
from onyx.configs.model_configs import GEN_AI_API_KEY
from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.retrieval.search_runner import (
download_nltk_data,
)
from onyx.db.connector import check_connectors_exist
from onyx.db.connector import create_initial_default_connector
from onyx.db.connector_credential_pair import associate_default_cc_pair
@@ -36,7 +32,7 @@ from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
@@ -46,7 +42,6 @@ from onyx.llm.constants import LlmProviderNames
from onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.seeding.load_yamls import load_input_prompts_from_yaml
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.settings.store import load_settings
@@ -116,9 +111,6 @@ def setup_onyx(
f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}."
)
logger.notice("Verifying query preprocessing (NLTK) data is downloaded")
download_nltk_data()
# setup Postgres with default credential, llm providers, etc.
setup_postgres(db_session)
@@ -132,13 +124,15 @@ def setup_onyx(
# Ensure Vespa is setup correctly, this step is relatively near the end because Vespa
# takes a bit of time to start up
logger.notice("Verifying Document Index(s) is/are available.")
document_index = get_default_document_index(
# This flow is for setting up the document index so we get all indices here.
document_indices = get_all_document_indices(
search_settings,
secondary_search_settings,
None,
)
success = setup_vespa(
document_index,
success = setup_document_indices(
document_indices,
IndexingSetting.from_db_model(search_settings),
(
IndexingSetting.from_db_model(secondary_search_settings)
@@ -147,7 +141,9 @@ def setup_onyx(
),
)
if not success:
raise RuntimeError("Could not connect to Vespa within the specified timeout.")
raise RuntimeError(
"Could not connect to a document index within the specified timeout."
)
logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}")
if search_settings.provider_type is None:
@@ -229,44 +225,62 @@ def mark_reindex_flag(db_session: Session) -> None:
kv_store.store(KV_REINDEX_KEY, False)
def setup_vespa(
document_index: DocumentIndex,
def setup_document_indices(
document_indices: list[DocumentIndex],
index_setting: IndexingSetting,
secondary_index_setting: IndexingSetting | None,
num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,
) -> bool:
# Vespa startup is a bit slow, so give it a few seconds
WAIT_SECONDS = 5
for x in range(num_attempts):
try:
logger.notice(f"Setting up Vespa (attempt {x+1}/{num_attempts})...")
document_index.ensure_indices_exist(
primary_embedding_dim=index_setting.final_embedding_dim,
primary_embedding_precision=index_setting.embedding_precision,
secondary_index_embedding_dim=(
secondary_index_setting.final_embedding_dim
if secondary_index_setting
else None
),
secondary_index_embedding_precision=(
secondary_index_setting.embedding_precision
if secondary_index_setting
else None
),
)
"""Sets up all input document indices.
logger.notice("Vespa setup complete.")
return True
except Exception:
logger.exception(
f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
If any document index setup fails, the function will return False. Otherwise
returns True.
"""
for document_index in document_indices:
# Document index startup is a bit slow, so give it a few seconds.
WAIT_SECONDS = 5
document_index_setup_success = False
for x in range(num_attempts):
try:
logger.notice(
f"Setting up document index {document_index.__class__.__name__} (attempt {x+1}/{num_attempts})..."
)
document_index.ensure_indices_exist(
primary_embedding_dim=index_setting.final_embedding_dim,
primary_embedding_precision=index_setting.embedding_precision,
secondary_index_embedding_dim=(
secondary_index_setting.final_embedding_dim
if secondary_index_setting
else None
),
secondary_index_embedding_precision=(
secondary_index_setting.embedding_precision
if secondary_index_setting
else None
),
)
logger.error(
f"Vespa setup did not succeed. Attempt limit reached. ({num_attempts})"
)
return False
logger.notice(
f"Document index {document_index.__class__.__name__} setup complete."
)
document_index_setup_success = True
break
except Exception:
logger.exception(
f"Document index {document_index.__class__.__name__} setup did not succeed. "
"The relevant service may not be ready yet. "
f"Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
if not document_index_setup_success:
logger.error(
f"Document index {document_index.__class__.__name__} setup did not succeed. "
f"Attempt limit reached. ({num_attempts})"
)
return False
return True
def setup_postgres(db_session: Session) -> None:
@@ -275,10 +289,6 @@ def setup_postgres(db_session: Session) -> None:
create_initial_default_connector(db_session)
associate_default_cc_pair(db_session)
# Load input prompts and user folders from YAML
logger.notice("Loading input prompts and user folders")
load_input_prompts_from_yaml(db_session, INPUT_PROMPT_YAML)
if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
# Only for dev flows
logger.notice("Setting up default OpenAI LLM for dev.")
@@ -347,6 +357,8 @@ def setup_multitenant_onyx() -> None:
def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:
# TODO(andrei): We don't yet support OpenSearch for multi-tenant instances
# so this function remains unchanged.
# This is for local testing
WAIT_SECONDS = 5
VESPA_ATTEMPTS = 5

View File

@@ -60,6 +60,7 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tools.utils import generate_tools_description
@@ -431,6 +432,14 @@ def run_research_agent_call(
max_concurrent_tools=1,
# May be better to not do this step, hard to say, needs to be tested
skip_search_query_expansion=False,
url_snippet_map=extract_url_snippet_map(
[
search_doc
for tool_call in state_container.get_tool_calls()
if tool_call.search_docs
for search_doc in tool_call.search_docs
]
),
)
tool_responses = parallel_tool_call_results.tool_responses
citation_mapping = (
@@ -465,8 +474,14 @@ def run_research_agent_call(
)
search_docs = None
displayed_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
search_docs = tool_response.rich_response.search_docs
displayed_docs = tool_response.rich_response.displayed_docs
# Add ALL search docs to state container for DB persistence
if search_docs:
state_container.add_search_docs(search_docs)
# This is used for the Open URL reminder in the next cycle
# only do this if the web search tool yielded results
@@ -499,7 +514,7 @@ def run_research_agent_call(
or most_recent_reasoning,
tool_call_arguments=tool_call.tool_args,
tool_call_response=tool_response.llm_facing_response,
search_docs=search_docs,
search_docs=displayed_docs or search_docs,
generated_images=None,
)
state_container.add_tool_call(tool_call_info)

View File

@@ -36,6 +36,15 @@ class ToolCallException(Exception):
self.llm_facing_message = llm_facing_message
class ToolExecutionException(Exception):
"""Exception raise for errors during tool execution."""
def __init__(self, message: str, emit_error_packet: bool = False):
super().__init__(message)
self.emit_error_packet = emit_error_packet
class SearchToolUsage(str, Enum):
DISABLED = "disabled"
ENABLED = "enabled"
@@ -142,6 +151,7 @@ class OpenURLToolOverrideKwargs(BaseModel):
# To know what citation number to start at for constructing the string to the LLM
starting_citation_num: int
citation_mapping: dict[str, int]
url_snippet_map: dict[str, str]
# None indicates that the default value should be used

View File

@@ -19,7 +19,6 @@ from onyx.db.oauth_config import get_oauth_config
from onyx.db.search_settings import get_current_search_settings
from onyx.db.tools import get_builtin_tool
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
@@ -120,18 +119,9 @@ def construct_tools(
if user and user.oauth_accounts:
user_oauth_token = user.oauth_accounts[0].access_token
document_index_cache: DocumentIndex | None = None
search_settings_cache = None
def _get_document_index() -> DocumentIndex:
nonlocal document_index_cache, search_settings_cache
if document_index_cache is None:
if search_settings_cache is None:
search_settings_cache = get_current_search_settings(db_session)
document_index_cache = get_default_document_index(
search_settings_cache, None
)
return document_index_cache
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
added_search_tool = False
for db_tool_model in persona.tools:
@@ -174,7 +164,7 @@ def construct_tools(
user=user,
persona=persona,
llm=llm,
document_index=_get_document_index(),
document_index=document_index,
user_selected_filters=search_tool_config.user_selected_filters,
project_id=search_tool_config.project_id,
bypass_acl=search_tool_config.bypass_acl,
@@ -228,7 +218,7 @@ def construct_tools(
OpenURLTool(
tool_id=db_tool_model.id,
emitter=emitter,
document_index=_get_document_index(),
document_index=document_index,
user=user,
)
]
@@ -387,9 +377,6 @@ def construct_tools(
if not search_tool_config:
search_tool_config = SearchToolConfig()
search_settings = get_current_search_settings(db_session)
document_index = get_default_document_index(search_settings, None)
search_tool = SearchTool(
tool_id=search_tool_db_model.id,
db_session=db_session,

View File

@@ -23,6 +23,7 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.images.models import (
FinalImageGenerationResponse,
@@ -188,7 +189,9 @@ class ImageGenerationTool(Tool[None]):
except requests.RequestException as e:
logger.error(f"Error fetching or converting image: {e}")
raise ValueError("Failed to fetch or convert the generated image")
raise ToolExecutionException(
"Failed to fetch or convert the generated image", emit_error_packet=True
)
except Exception as e:
logger.debug(f"Error occurred during image generation: {e}")
@@ -198,18 +201,27 @@ class ImageGenerationTool(Tool[None]):
"Your request was rejected as a result of our safety system"
in error_message
):
raise ValueError(
"The image generation request was rejected due to OpenAI's content policy. Please try a different prompt."
raise ToolExecutionException(
(
"The image generation request was rejected due to OpenAI's content policy. "
"Please try a different prompt."
),
emit_error_packet=True,
)
elif "Invalid image URL" in error_message:
raise ValueError("Invalid image URL provided for image generation.")
raise ToolExecutionException(
"Invalid image URL provided for image generation.",
emit_error_packet=True,
)
elif "invalid_request_error" in error_message:
raise ValueError(
"Invalid request for image generation. Please check your input."
raise ToolExecutionException(
"Invalid request for image generation. Please check your input.",
emit_error_packet=True,
)
raise ValueError(
"An error occurred during image generation. Please try again later."
raise ToolExecutionException(
f"An error occurred during image generation. error={error_message}",
emit_error_packet=True,
)
def run(

View File

@@ -492,7 +492,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
indexed_result, crawled_result = run_functions_tuples_in_parallel(
[
(_retrieve_indexed_with_filters, (all_requests,)),
(self._fetch_web_content, (urls,)),
(self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),
],
allow_failures=True,
timeout=OPEN_URL_TIMEOUT_SECONDS,
@@ -800,7 +800,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
return merged_sections
def _fetch_web_content(
self, urls: list[str]
self, urls: list[str], url_snippet_map: dict[str, str]
) -> tuple[list[InferenceSection], list[str]]:
if not urls:
return [], []
@@ -831,7 +831,11 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
and content.full_content
and not is_insufficient
):
sections.append(inference_section_from_internet_page_scrape(content))
sections.append(
inference_section_from_internet_page_scrape(
content, url_snippet_map.get(content.link, "")
)
)
else:
# TODO: Slight improvement - if failed URL reasons are passed back to the LLM
# for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would

View File

@@ -0,0 +1,239 @@
import unicodedata
from pydantic import BaseModel
from rapidfuzz import fuzz
from rapidfuzz import utils
from onyx.utils.text_processing import is_zero_width_char
from onyx.utils.text_processing import normalize_char
class SnippetMatchResult(BaseModel):
snippet_located: bool
start_idx: int = -1
end_idx: int = -1
NegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)
def find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:
"""
Finds where the snippet is located in the content.
Strategy:
1. Normalize the snippet & attempt to find it in the content
2. Perform a token based fuzzy search for the snippet in the content
Notes:
- If there are multiple matches of snippet, we choose the first normalised occurrence
"""
if not snippet or not content:
return NegativeSnippetMatchResult
result = _normalize_and_match(content, snippet)
if result.snippet_located:
return result
result = _token_based_match(content, snippet)
if result.snippet_located:
return result
return NegativeSnippetMatchResult
def _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:
"""
Normalizes the snippet & content, then performs a direct string match.
"""
normalized_content, content_map = _normalize_text_with_mapping(content)
normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)
if not normalized_content or not normalized_snippet:
return NegativeSnippetMatchResult
pos = normalized_content.find(normalized_snippet)
if pos != -1:
original_start = content_map[pos]
# Account for leading characters stripped from snippet during normalization
# (e.g., leading punctuation like "[![]![]]" that was removed)
if url_snippet_map:
first_snippet_orig_pos = url_snippet_map[0]
if first_snippet_orig_pos > 0:
# There were leading characters stripped from snippet
# Extend start position backwards to include them from content
original_start = max(original_start - first_snippet_orig_pos, 0)
# Determine end position, including any trailing characters that were
# normalized away (e.g., punctuation)
match_end_norm = pos + len(normalized_snippet)
if match_end_norm >= len(content_map):
# Match extends to end of normalized content - include all trailing chars
original_end = len(content) - 1
else:
# Match is in the middle - end at character before next normalized char
original_end = content_map[match_end_norm] - 1
# Account for trailing characters stripped from snippet during normalization
# (e.g., trailing punctuation like "\n[" that was removed)
if url_snippet_map:
last_snippet_orig_pos = url_snippet_map[-1]
trailing_stripped = len(snippet) - last_snippet_orig_pos - 1
if trailing_stripped > 0:
# Extend end position to include trailing characters from content
# that correspond to the stripped trailing snippet characters
original_end = min(original_end + trailing_stripped, len(content) - 1)
return SnippetMatchResult(
snippet_located=True,
start_idx=original_start,
end_idx=original_end,
)
return NegativeSnippetMatchResult
def _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:
"""
Text normalization that maintains position mapping.
Returns:
tuple: (normalized_text, position_map)
- position_map[i] gives the original position for normalized position i
"""
if not text:
return "", []
original_text = text
# Step 1: NFC normalization with position mapping
nfc_text = unicodedata.normalize("NFC", text)
# Build mapping from NFC positions to original start positions
nfc_to_orig: list[int] = []
orig_idx = 0
for nfc_char in nfc_text:
nfc_to_orig.append(orig_idx)
# Find how many original chars contributed to this NFC char
for length in range(1, len(original_text) - orig_idx + 1):
substr = original_text[orig_idx : orig_idx + length]
if unicodedata.normalize("NFC", substr) == nfc_char:
orig_idx += length
break
else:
orig_idx += 1 # Fallback
# Work with NFC text from here
text = nfc_text
html_entities = {
"&nbsp;": " ",
"&#160;": " ",
"&amp;": "&",
"&lt;": "<",
"&gt;": ">",
"&quot;": '"',
"&apos;": "'",
"&#39;": "'",
"&#x27;": "'",
"&ndash;": "-",
"&mdash;": "-",
"&hellip;": "...",
"&#xB0;": "°",
"&#xBA;": "°",
"&zwj;": "",
}
# Sort entities by length (longest first) for greedy matching
sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)
result_chars = []
result_map = []
i = 0
last_was_space = True # Track to avoid leading spaces
while i < len(text):
# Convert NFC position to original position
orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1
char = text[i]
output = None
step = 1
# Check for HTML entities first (greedy match)
for entity in sorted_entities:
if text[i : i + len(entity)] == entity:
output = html_entities[entity]
step = len(entity)
break
# If no entity matched, process single character
if output is None:
# Skip zero-width characters
if is_zero_width_char(char):
i += 1
continue
output = normalize_char(char)
# Add output to result, normalizing each character from entity output
if output:
for out_char in output:
# Normalize entity output the same way as regular chars
normalized = normalize_char(out_char)
# Handle whitespace collapsing
if normalized == " ":
if not last_was_space:
result_chars.append(" ")
result_map.append(orig_pos)
last_was_space = True
else:
result_chars.append(normalized)
result_map.append(orig_pos)
last_was_space = False
i += step
# Remove trailing space if present
if result_chars and result_chars[-1] == " ":
result_chars.pop()
result_map.pop()
return "".join(result_chars), result_map
def _token_based_match(
content: str,
snippet: str,
min_threshold: float = 0.8,
) -> SnippetMatchResult:
"""
Performs a token based fuzzy search for the snippet in the content.
min_threshold exists in the range [0, 1]
"""
if not content or not snippet:
return NegativeSnippetMatchResult
res = fuzz.partial_ratio_alignment(
content, snippet, processor=utils.default_process
)
if not res:
return NegativeSnippetMatchResult
score = res.score
if score >= (min_threshold * 100):
start_idx = res.src_start
end_idx = res.src_end
return SnippetMatchResult(
snippet_located=True,
start_idx=start_idx,
end_idx=end_idx,
)
return NegativeSnippetMatchResult

View File

@@ -832,7 +832,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
top_sections=merged_sections,
citation_start=override_kwargs.starting_citation_num,
limit=override_kwargs.max_llm_chunks,
include_document_id=True,
include_document_id=False,
)
# End overall timing
@@ -844,12 +844,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
f"document expansion: {document_expansion_elapsed:.3f}s)"
)
# TODO: extension - this can include the smaller set of approved docs to be saved/displayed in the UI
# for replaying. Currently the full set is returned and saved.
return ToolResponse(
# Typically the rich response will give more docs in case it needs to be displayed in the UI
rich_response=SearchDocsResponse(
search_docs=search_docs, citation_mapping=citation_mapping
search_docs=search_docs,
citation_mapping=citation_mapping,
displayed_docs=final_ui_docs or None,
),
# The LLM facing response typically includes less docs to cut down on noise and token usage
llm_facing_response=docs_str,

View File

@@ -73,7 +73,7 @@ def convert_inference_sections_to_llm_string(
link = next(iter(chunk.source_links.values()), None)
if link:
result["url"] = link
if include_document_id and "url" not in result:
if include_document_id:
result["document_identifier"] = chunk.document_id
if chunk.metadata:
result["metadata"] = json.dumps(chunk.metadata)

View File

@@ -1,11 +1,19 @@
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDoc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.snippet_matcher import (
find_snippet_in_content,
)
from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
TRUNCATED_CONTENT_PREFIX = "[...truncated] "
def filter_web_search_results_with_no_title_or_snippet(
results: list[WebSearchResult],
) -> list[WebSearchResult]:
@@ -26,14 +34,99 @@ def truncate_search_result_content(content: str, max_chars: int = 15000) -> str:
"""Truncate search result content to a maximum number of characters"""
if len(content) <= max_chars:
return content
return content[:max_chars] + " [...truncated]"
return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX
def _truncate_content_around_snippet(
content: str, snippet: str, max_chars: int = 15000
) -> str:
"""
Truncates content around snippet with max_chars
Assumes snippet exists
"""
result = find_snippet_in_content(content, snippet)
if not result.snippet_located:
return ""
start_idx = result.start_idx
end_idx = result.end_idx
new_start, new_end = _expand_range_centered(
start_idx, end_idx + 1, len(content), max_chars
)
truncated_content = content[new_start:new_end]
# Add the AFFIX to the start and end of truncated content
if new_start > 0:
truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content
if new_end < len(content):
truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX
return truncated_content
def _expand_range_centered(
start_idx: int, end_idx: int, N: int, target_size: int
) -> tuple[int, int]:
"""
Expands a range [start_idx, end_idx) to be centered within a list of size N
Args:
start_idx: Starting index (inclusive)
end_idx: Ending index (exclusive)
N: Size of the list
target_size: Target size of the range
Returns:
Tuple of (new start index, new end index)
"""
current_size = end_idx - start_idx
if current_size >= target_size:
return start_idx, end_idx
padding_needed = target_size - current_size
padding_top = padding_needed // 2
padding_bottom = padding_needed - padding_top
# Try expand symmetrically
new_start = start_idx - padding_top
new_end = end_idx + padding_bottom
# Handle overflow
if new_start < 0:
overflow = -new_start
new_start = 0
new_end = min(N, new_end + overflow)
if new_end > N:
overflow = new_end - N
new_end = N
new_start = max(0, new_start - overflow)
return new_start, new_end
def inference_section_from_internet_page_scrape(
result: WebContent,
snippet: str,
rank: int = 0,
) -> InferenceSection:
truncated_content = truncate_search_result_content(result.full_content)
# truncate the content around snippet if snippet exists
truncated_content = ""
if snippet:
truncated_content = _truncate_content_around_snippet(
result.full_content, snippet
)
# Fallback if no snippet exists or we failed to find it
if not truncated_content:
truncated_content = truncate_search_result_content(result.full_content)
# Calculate score using reciprocal rank to preserve ordering
score = 1.0 / (rank + 1)
@@ -97,3 +190,14 @@ def inference_section_from_internet_search_result(
chunks=[chunk],
combined_content=result.snippet,
)
def extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:
"""
Given a list of SearchDocs, this will extract the url -> summary map.
"""
url_snippet_map: dict[str, str] = {}
for document in documents:
if document.source_type == DocumentSource.WEB and document.link:
url_snippet_map[document.link] = document.blurb
return url_snippet_map

View File

@@ -7,6 +7,7 @@ from onyx.chat.models import ChatMessageSimple
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDocsResponse
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PacketException
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.interface import Tool
from onyx.tools.models import ChatMinimalTextMessage
@@ -15,6 +16,7 @@ from onyx.tools.models import ParallelToolCallResponse
from onyx.tools.models import SearchToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
@@ -152,6 +154,33 @@ def _safe_run_single_tool(
},
)
)
except ToolExecutionException as e:
# Unexpected error during tool execution
logger.error(f"Unexpected error running tool {tool.name}: {e}")
tool_response = ToolResponse(
rich_response=None,
llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
)
_error_tracing.attach_error_to_current_span(
SpanError(
message="Tool execution error (unexpected)",
data={
"tool_name": tool.name,
"tool_call_id": tool_call.tool_call_id,
"tool_args": tool_call.tool_args,
"error": str(e),
"stack_trace": traceback.format_exc(),
"error_type": type(e).__name__,
},
)
)
if e.emit_error_packet:
tool.emitter.emit(
Packet(
placement=tool_call.placement,
obj=PacketException(exception=e),
)
)
except Exception as e:
# Unexpected error during tool execution
logger.error(f"Unexpected error running tool {tool.name}: {e}")
@@ -200,6 +229,8 @@ def run_tool_calls(
max_concurrent_tools: int | None = None,
# Skip query expansion for repeat search tool calls
skip_search_query_expansion: bool = False,
# A map of url -> summary for passing web results to open url tool
url_snippet_map: dict[str, str] = {},
) -> ParallelToolCallResponse:
"""Run (optionally merged) tool calls in parallel and update citation mappings.
@@ -330,6 +361,7 @@ def run_tool_calls(
override_kwargs = OpenURLToolOverrideKwargs(
starting_citation_num=starting_citation_num,
citation_mapping=url_to_citation,
url_snippet_map=url_snippet_map,
)
starting_citation_num += 100

View File

@@ -9,6 +9,36 @@ from onyx.utils.logger import setup_logger
logger = setup_logger(__name__)
# Mapping of curly/smart quotes to straight quotes
CURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {
"\u2019": "'", # Right single quotation mark
"\u2018": "'", # Left single quotation mark
"\u201c": '"', # Left double quotation mark
"\u201d": '"', # Right double quotation mark
}
# Zero-width characters that should typically be removed during text normalization
ZERO_WIDTH_CHARS: set[str] = {
"\u200b", # Zero-width space
"\u200c", # Zero-width non-joiner
"\u200d", # Zero-width joiner
"\ufeff", # Byte order mark / zero-width no-break space
"\u2060", # Word joiner
}
def normalize_curly_quotes(text: str) -> str:
"""Convert curly/smart quotes to straight quotes."""
for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():
text = text.replace(curly, straight)
return text
def is_zero_width_char(c: str) -> bool:
"""Check if a character is a zero-width character."""
return c in ZERO_WIDTH_CHARS
ESCAPE_SEQUENCE_RE = re.compile(
r"""
( \\U........ # 8-digit hex escapes
@@ -257,3 +287,15 @@ def remove_invalid_unicode_chars(text: str) -> str:
- Unicode non-characters
"""
return _INVALID_UNICODE_CHARS_RE.sub("", text)
def normalize_char(c: str) -> str:
"""Normalize a single character (curly quotes, whitespace, punctuation)."""
if c in CURLY_TO_STRAIGHT_QUOTES:
c = CURLY_TO_STRAIGHT_QUOTES[c]
if c.isspace():
return " "
elif re.match(r"[^\w\s\']", c):
return " "
else:
return c.lower()

View File

@@ -255,11 +255,11 @@ fastapi==0.116.1
# onyx
fastapi-limiter==0.1.6
# via onyx
fastapi-users==14.0.1
fastapi-users==15.0.2
# via
# fastapi-users-db-sqlalchemy
# onyx
fastapi-users-db-sqlalchemy==5.0.0
fastapi-users-db-sqlalchemy==7.0.0
# via onyx
fastavro==1.12.1
# via cohere
@@ -608,9 +608,7 @@ mypy-extensions==1.0.0
nest-asyncio==1.6.0
# via onyx
nltk==3.9.1
# via
# onyx
# unstructured
# via unstructured
numpy==2.4.1
# via
# magika
@@ -784,7 +782,7 @@ psycopg2-binary==2.9.9
# via onyx
puremagic==1.28
# via onyx
pwdlib==0.2.1
pwdlib==0.3.0
# via fastapi-users
py==1.11.0
# via retry
@@ -904,7 +902,7 @@ python-json-logger==4.0.0
# via pydocket
python-magic==0.4.27
# via unstructured
python-multipart==0.0.20
python-multipart==0.0.21
# via
# fastapi-users
# mcp

View File

@@ -298,7 +298,7 @@ numpy==2.4.1
# pandas-stubs
# shapely
# voyageai
onyx-devtools==0.3.2
onyx-devtools==0.4.0
# via onyx
openai==2.14.0
# via

View File

@@ -45,7 +45,9 @@ from onyx.db.connector_credential_pair import (
get_connector_credential_pair,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import (
get_all_document_indices,
)
from onyx.file_store.file_store import get_default_file_store
# pylint: enable=E402
@@ -59,7 +61,7 @@ _DELETION_BATCH_SIZE = 1000
def _unsafe_deletion(
db_session: Session,
document_index: DocumentIndex,
document_indices: list[DocumentIndex],
cc_pair: ConnectorCredentialPair,
pair_id: int,
) -> int:
@@ -80,11 +82,12 @@ def _unsafe_deletion(
break
for document in documents:
document_index.delete_single(
doc_id=document.id,
tenant_id=POSTGRES_DEFAULT_SCHEMA,
chunk_count=document.chunk_count,
)
for document_index in document_indices:
document_index.delete_single(
doc_id=document.id,
tenant_id=POSTGRES_DEFAULT_SCHEMA,
chunk_count=document.chunk_count,
)
delete_documents_complete__no_commit(
db_session=db_session,
@@ -211,14 +214,16 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
try:
logger.notice("Deleting information from Vespa and Postgres")
active_search_settings = get_active_search_settings(db_session)
document_index = get_default_document_index(
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
active_search_settings.primary,
active_search_settings.secondary,
None,
)
files_deleted_count = _unsafe_deletion(
db_session=db_session,
document_index=document_index,
document_indices=document_indices,
cc_pair=cc_pair,
pair_id=cc_pair_id,
)

View File

@@ -3,28 +3,8 @@
# We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
source "$(dirname "$0")/../../.vscode/.env"
OPENSEARCH_CONTAINER_NAME="onyx-opensearch"
OPENSEARCH_IMAGE="opensearchproject/opensearch:3.4.0"
# First check the env for OPENSEARCH_REST_API_PORT, else hardcode to 9200.
OPENSEARCH_REST_API_PORT=${OPENSEARCH_REST_API_PORT:-9200}
OPENSEARCH_PERFORMANCE_ANALYZER_PORT=9600
cd "$(dirname "$0")/../../deployment/docker_compose"
function stop_and_remove_opensearch_container() {
echo "Stopping and removing the existing OpenSearch container..."
docker stop "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
docker rm "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
}
# Set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file.
if [ -z "$OPENSEARCH_ADMIN_PASSWORD" ]; then
echo "Error: OPENSEARCH_ADMIN_PASSWORD environment variable is not set." >&2
echo "Please set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file." >&2
exit 1
fi
# Stop and remove the existing container.
stop_and_remove_opensearch_container
# Start the OpenSearch container.
echo "Starting OpenSearch container..."
docker run --detach --name "$OPENSEARCH_CONTAINER_NAME" --publish "$OPENSEARCH_REST_API_PORT:9200" --publish "$OPENSEARCH_PERFORMANCE_ANALYZER_PORT:9600" -e "discovery.type=single-node" -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_ADMIN_PASSWORD" "$OPENSEARCH_IMAGE"
# Start OpenSearch.
echo "Forcefully starting fresh OpenSearch container..."
docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch

View File

@@ -13,8 +13,6 @@ import sys
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.document_index.factory import get_default_document_index
def get_tenant_index_name(tenant_id: str) -> dict[str, str]:
@@ -26,14 +24,7 @@ def get_tenant_index_name(tenant_id: str) -> dict[str, str]:
try:
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
search_settings = get_current_search_settings(db_session)
secondary_search_settings = get_secondary_search_settings(db_session)
document_index = get_default_document_index(
search_settings=search_settings,
secondary_search_settings=secondary_search_settings,
)
index_name = document_index.index_name
index_name = search_settings.index_name
print(f"Found index name: {index_name}", file=sys.stderr)
return {"status": "success", "index_name": index_name}

View File

@@ -14,6 +14,10 @@ from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
# Counter for generating unique file IDs in mock file store
_mock_file_id_counter = 0
def ensure_default_llm_provider(db_session: Session) -> None:
"""Ensure a default LLM provider exists for tests that exercise chat flows."""
@@ -80,11 +84,34 @@ def mock_vespa_query() -> Iterator[None]:
yield
@pytest.fixture
def mock_file_store() -> Iterator[None]:
"""Mock the file store to avoid S3/storage dependencies in tests."""
global _mock_file_id_counter
def _mock_save_file(*args: Any, **kwargs: Any) -> str:
global _mock_file_id_counter
_mock_file_id_counter += 1
# Return a predictable file ID for tests
return "123"
mock_store = MagicMock()
mock_store.save_file.side_effect = _mock_save_file
mock_store.initialize.return_value = None
with patch(
"onyx.file_store.utils.get_default_file_store",
return_value=mock_store,
):
yield
@pytest.fixture
def mock_external_deps(
mock_nlp_embeddings_post: None,
mock_gpu_status: None,
mock_vespa_query: None,
mock_file_store: None,
) -> Iterator[None]:
"""Convenience fixture to enable all common external dependency mocks."""
yield

View File

@@ -0,0 +1,156 @@
from __future__ import annotations
from typing import cast
from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import MessageResponseIDInfo
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
def assert_answer_stream_part_correct(
received: AnswerStreamPart, expected: AnswerStreamPart
) -> None:
assert isinstance(received, type(expected))
if isinstance(received, Packet):
r_packet = cast(Packet, received)
e_packet = cast(Packet, expected)
assert r_packet.placement == e_packet.placement
if isinstance(r_packet.obj, SearchToolDocumentsDelta):
assert isinstance(e_packet.obj, SearchToolDocumentsDelta)
assert is_search_tool_document_delta_equal(r_packet.obj, e_packet.obj)
return
elif isinstance(r_packet.obj, OpenUrlDocuments):
assert isinstance(e_packet.obj, OpenUrlDocuments)
assert is_open_url_documents_equal(r_packet.obj, e_packet.obj)
return
elif isinstance(r_packet.obj, AgentResponseStart):
assert isinstance(e_packet.obj, AgentResponseStart)
assert is_agent_response_start_equal(r_packet.obj, e_packet.obj)
return
elif isinstance(r_packet.obj, ImageGenerationFinal):
assert isinstance(e_packet.obj, ImageGenerationFinal)
assert is_image_generation_final_equal(r_packet.obj, e_packet.obj)
return
assert r_packet.obj == e_packet.obj
elif isinstance(received, MessageResponseIDInfo):
# We're not going to make assumptions about what the user id / assistant id should be
# So just return
return
elif isinstance(received, CreateChatSessionID):
# Don't worry about same session ids
return
else:
raise NotImplementedError("Not implemented")
def _are_search_docs_equal(
received: list[SearchDoc],
expected: list[SearchDoc],
) -> bool:
"""
What we care about:
- All documents are present (order does not)
- Expected document_id, link, blurb, source_type and hidden
"""
if len(received) != len(expected):
return False
received.sort(key=lambda x: x.document_id)
expected.sort(key=lambda x: x.document_id)
for received_document, expected_document in zip(received, expected):
if received_document.document_id != expected_document.document_id:
return False
if received_document.link != expected_document.link:
return False
if received_document.blurb != expected_document.blurb:
return False
if received_document.source_type != expected_document.source_type:
return False
if received_document.hidden != expected_document.hidden:
return False
return True
def is_search_tool_document_delta_equal(
received: SearchToolDocumentsDelta,
expected: SearchToolDocumentsDelta,
) -> bool:
"""
What we care about:
- All documents are present (order does not)
- Expected document_id, link, blurb, source_type and hidden
"""
received_documents = received.documents
expected_documents = expected.documents
return _are_search_docs_equal(received_documents, expected_documents)
def is_open_url_documents_equal(
received: OpenUrlDocuments,
expected: OpenUrlDocuments,
) -> bool:
"""
What we care about:
- All documents are present (order does not)
- Expected document_id, link, blurb, source_type and hidden
"""
received_documents = received.documents
expected_documents = expected.documents
return _are_search_docs_equal(received_documents, expected_documents)
def is_agent_response_start_equal(
received: AgentResponseStart,
expected: AgentResponseStart,
) -> bool:
"""
What we care about:
- All documents are present (order does not)
- Expected document_id, link, blurb, source_type and hidden
"""
received_documents = received.final_documents
expected_documents = expected.final_documents
if received_documents is None and expected_documents is None:
return True
if not received_documents or not expected_documents:
return False
return _are_search_docs_equal(received_documents, expected_documents)
def is_image_generation_final_equal(
received: ImageGenerationFinal,
expected: ImageGenerationFinal,
) -> bool:
"""
What we care about:
- Number of images are the same
- On each image, url and file_id are aligned such that url=/api/chat/file/{file_id}
- Revised prompt is expected
- Shape is expected
"""
if len(received.images) != len(expected.images):
return False
for received_image, expected_image in zip(received.images, expected.images):
if received_image.url != f"/api/chat/file/{received_image.file_id}":
return False
if received_image.revised_prompt != expected_image.revised_prompt:
return False
if received_image.shape != expected_image.shape:
return False
return True

View File

@@ -0,0 +1,139 @@
from __future__ import annotations
from collections.abc import Iterator
from onyx.chat.models import AnswerStreamPart
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from tests.external_dependency_unit.answer.stream_test_assertions import (
assert_answer_stream_part_correct,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
create_packet_with_agent_response_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
create_packet_with_reasoning_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import create_placement
from tests.external_dependency_unit.mock_llm import LLMResponse
from tests.external_dependency_unit.mock_llm import MockLLMController
class StreamTestBuilder:
def __init__(self, llm_controller: MockLLMController) -> None:
self._llm_controller = llm_controller
# List of (expected_packet, forward_count) tuples
self._expected_packets_queue: list[tuple[Packet, int]] = []
def add_response(self, response: LLMResponse) -> StreamTestBuilder:
self._llm_controller.add_response(response)
return self
def add_responses_together(self, *responses: LLMResponse) -> StreamTestBuilder:
"""Add multiple responses that should be emitted together in the same tick."""
self._llm_controller.add_responses_together(*responses)
return self
def expect(
self, expected_pkt: Packet, forward: int | bool = True
) -> StreamTestBuilder:
"""
Add an expected packet to the queue.
Args:
expected_pkt: The packet to expect
forward: Number of tokens to forward before expecting this packet.
True = 1 token, False = 0 tokens, int = that many tokens.
"""
forward_count = 1 if forward is True else (0 if forward is False else forward)
self._expected_packets_queue.append((expected_pkt, forward_count))
return self
def expect_packets(
self, packets: list[Packet], forward: int | bool = True
) -> StreamTestBuilder:
"""
Add multiple expected packets to the queue.
Args:
packets: List of packets to expect
forward: Number of tokens to forward before expecting EACH packet.
True = 1 token per packet, False = 0 tokens, int = that many tokens per packet.
"""
forward_count = 1 if forward is True else (0 if forward is False else forward)
for pkt in packets:
self._expected_packets_queue.append((pkt, forward_count))
return self
def expect_reasoning(
self,
reasoning_tokens: list[str],
turn_index: int,
) -> StreamTestBuilder:
return (
self.expect(
Packet(
placement=create_placement(turn_index),
obj=ReasoningStart(),
)
)
.expect_packets(
[
create_packet_with_reasoning_delta(token, turn_index)
for token in reasoning_tokens
]
)
.expect(
Packet(
placement=create_placement(turn_index),
obj=ReasoningDone(),
)
)
)
def expect_agent_response(
self,
answer_tokens: list[str],
turn_index: int,
final_documents: list[SearchDoc] | None = None,
) -> StreamTestBuilder:
return (
self.expect(
Packet(
placement=create_placement(turn_index),
obj=AgentResponseStart(
final_documents=final_documents,
),
)
)
.expect_packets(
[
create_packet_with_agent_response_delta(token, turn_index)
for token in answer_tokens
]
)
.expect(
Packet(
placement=create_placement(turn_index),
obj=OverallStop(),
)
)
)
def run_and_validate(self, stream: Iterator[AnswerStreamPart]) -> None:
while self._expected_packets_queue:
expected_pkt, forward_count = self._expected_packets_queue.pop(0)
if forward_count > 0:
self._llm_controller.forward(forward_count)
received_pkt = next(stream)
assert_answer_stream_part_correct(received_pkt, expected_pkt)

View File

@@ -0,0 +1,121 @@
from __future__ import annotations
from collections.abc import Iterator
from uuid import UUID
from sqlalchemy.orm import Session
from onyx.chat.chat_utils import create_chat_session_from_request
from onyx.chat.models import AnswerStreamPart
from onyx.chat.process_message import handle_stream_message_objects
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import SearchDoc
from onyx.db.models import ChatSession
from onyx.db.models import User
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDelta
from tests.external_dependency_unit.mock_content_provider import MockWebContent
from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult
def create_placement(
turn_index: int,
tab_index: int = 0,
sub_turn_index: int | None = None,
) -> Placement:
return Placement(
turn_index=turn_index,
tab_index=tab_index,
sub_turn_index=sub_turn_index,
)
def submit_query(
query: str, chat_session_id: UUID | None, db_session: Session, user: User
) -> Iterator[AnswerStreamPart]:
request = SendMessageRequest(
message=query,
chat_session_id=chat_session_id,
stream=True,
chat_session_info=(
ChatSessionCreationRequest() if chat_session_id is None else None
),
)
return handle_stream_message_objects(
new_msg_req=request,
user=user,
db_session=db_session,
)
def create_chat_session(
db_session: Session,
user: User,
) -> ChatSession:
return create_chat_session_from_request(
chat_session_request=ChatSessionCreationRequest(),
user_id=user.id,
db_session=db_session,
)
def create_packet_with_agent_response_delta(token: str, turn_index: int) -> Packet:
return Packet(
placement=create_placement(turn_index),
obj=AgentResponseDelta(
content=token,
),
)
def create_packet_with_reasoning_delta(token: str, turn_index: int) -> Packet:
return Packet(
placement=create_placement(turn_index),
obj=ReasoningDelta(
reasoning=token,
),
)
def create_web_search_doc(
semantic_identifier: str,
link: str,
blurb: str,
) -> SearchDoc:
return SearchDoc(
document_id=f"WEB_SEARCH_DOC_{link}",
chunk_ind=0,
semantic_identifier=semantic_identifier,
link=link,
blurb=blurb,
source_type=DocumentSource.WEB,
boost=1,
hidden=False,
metadata={},
match_highlights=[],
)
def mock_web_search_result_to_search_doc(result: MockWebSearchResult) -> SearchDoc:
return create_web_search_doc(
semantic_identifier=result.title,
link=result.link,
blurb=result.snippet,
)
def mock_web_content_to_search_doc(content: MockWebContent) -> SearchDoc:
return create_web_search_doc(
semantic_identifier=content.title,
link=content.url,
blurb=content.title,
)
def tokenise(text: str) -> list[str]:
return [(token + " ") for token in text.split(" ")]

View File

@@ -0,0 +1,982 @@
from __future__ import annotations
import json
from uuid import UUID
import pytest
from sqlalchemy.orm import Session
from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import MessageResponseIDInfo
from onyx.configs.constants import DocumentSource
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeartbeat
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ReasoningDone
from onyx.server.query_and_chat.streaming_models import ReasoningStart
from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import TopLevelBranching
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.answer.stream_test_assertions import (
assert_answer_stream_part_correct,
)
from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
from tests.external_dependency_unit.answer.stream_test_utils import create_chat_session
from tests.external_dependency_unit.answer.stream_test_utils import (
create_packet_with_agent_response_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
create_packet_with_reasoning_delta,
)
from tests.external_dependency_unit.answer.stream_test_utils import create_placement
from tests.external_dependency_unit.answer.stream_test_utils import (
mock_web_content_to_search_doc,
)
from tests.external_dependency_unit.answer.stream_test_utils import (
mock_web_search_result_to_search_doc,
)
from tests.external_dependency_unit.answer.stream_test_utils import submit_query
from tests.external_dependency_unit.answer.stream_test_utils import tokenise
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.mock_content_provider import MockWebContent
from tests.external_dependency_unit.mock_content_provider import (
use_mock_content_provider,
)
from tests.external_dependency_unit.mock_image_provider import (
use_mock_image_generation_provider,
)
from tests.external_dependency_unit.mock_llm import LLMAnswerResponse
from tests.external_dependency_unit.mock_llm import LLMReasoningResponse
from tests.external_dependency_unit.mock_llm import LLMToolCallResponse
from tests.external_dependency_unit.mock_llm import use_mock_llm
from tests.external_dependency_unit.mock_search_pipeline import MockInternalSearchResult
from tests.external_dependency_unit.mock_search_pipeline import use_mock_search_pipeline
from tests.external_dependency_unit.mock_search_provider import MockWebSearchResult
from tests.external_dependency_unit.mock_search_provider import use_mock_web_provider
def test_stream_chat_with_answer(
db_session: Session,
full_deployment_setup: None,
mock_external_deps: None,
) -> None:
"""Test that the stream chat with answer endpoint returns a valid answer."""
ensure_default_llm_provider(db_session)
test_user = create_test_user(
db_session, email_prefix="test_stream_chat_with_answer"
)
query = "What is the capital of France?"
answer = "The capital of France is Paris."
answer_tokens = tokenise(answer)
with use_mock_llm() as mock_llm:
handler = StreamTestBuilder(llm_controller=mock_llm)
handler.add_response(LLMAnswerResponse(answer_tokens=answer_tokens))
chat_session = create_chat_session(db_session=db_session, user=test_user)
answer_stream = submit_query(
query=query,
chat_session_id=chat_session.id,
db_session=db_session,
user=test_user,
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=MessageResponseIDInfo(
user_message_id=1,
reserved_assistant_message_id=1,
),
)
handler.expect_agent_response(
answer_tokens=answer_tokens,
turn_index=0,
).run_and_validate(stream=answer_stream)
with pytest.raises(StopIteration):
next(answer_stream)
def test_stream_chat_with_answer_create_chat(
db_session: Session,
full_deployment_setup: None,
mock_external_deps: None,
) -> None:
ensure_default_llm_provider(db_session)
test_user = create_test_user(
db_session, email_prefix="test_stream_chat_with_answer_create_chat"
)
query = "Hi there friends"
answer = "Hello friend"
tokens = [answer]
with use_mock_llm() as mock_llm:
handler = StreamTestBuilder(llm_controller=mock_llm)
handler.add_response(LLMAnswerResponse(answer_tokens=tokens))
answer_stream = submit_query(
query=query,
chat_session_id=None,
db_session=db_session,
user=test_user,
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=CreateChatSessionID(
chat_session_id=UUID("123e4567-e89b-12d3-a456-426614174000")
),
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=MessageResponseIDInfo(
user_message_id=1,
reserved_assistant_message_id=2,
),
)
handler.expect_agent_response(
answer_tokens=tokens,
turn_index=0,
).run_and_validate(stream=answer_stream)
with pytest.raises(StopIteration):
next(answer_stream)
def test_stream_chat_with_search_and_openurl_tools(
db_session: Session,
full_deployment_setup: None,
mock_external_deps: None,
) -> None:
ensure_default_llm_provider(db_session)
test_user = create_test_user(
db_session, email_prefix="test_stream_chat_with_search_tool"
)
QUERY = "What is the weather in Sydney?"
REASONING_RESPONSE_1 = (
"I need to perform a web search to get current weather details. "
"I can use the search tool to do this."
)
WEB_QUERY_1 = "weather in sydney"
WEB_QUERY_2 = "current weather in sydney"
RESULTS1 = [
MockWebSearchResult(
title="Official Weather",
link="www.weather.com.au",
snippet="The current weather in Sydney is 20 degrees Celsius.",
),
MockWebSearchResult(
title="Weather CHannel",
link="www.wc.com.au",
snippet="Morning is 10 degree Celsius, afternoon is 25 degrees Celsius.",
),
]
RESULTS2 = [
MockWebSearchResult(
title="Weather Now!",
link="www.weathernow.com.au",
snippet="The weather right now is sunny with a temperature of 22 degrees Celsius.",
)
]
REASONING_RESPONSE_2 = "I like weathernow and the official weather site"
QUERY_URLS_1 = ["www.weathernow.com.au", "www.weather.com.au"]
CONTENT1 = [
MockWebContent(
title="Weather Now!",
url="www.weathernow.com.au",
content="The weather right now is sunny with a temperature of 22 degrees Celsius.",
),
MockWebContent(
title="Weather Official",
url="www.weather.com.au",
content="The current weather in Sydney is 20 degrees Celsius.",
),
]
REASONING_RESPONSE_3 = (
"I now know everything that I need to know. " "I can now answer the question."
)
ANSWER_RESPONSE_1 = (
"The weather in Sydney is sunny with a temperature of 22 degrees celsius."
)
with (
use_mock_llm() as mock_llm,
use_mock_web_provider(db_session) as mock_web,
use_mock_content_provider() as mock_content,
):
handler = StreamTestBuilder(
llm_controller=mock_llm,
)
chat_session = create_chat_session(db_session=db_session, user=test_user)
answer_stream = submit_query(
query=QUERY,
chat_session_id=chat_session.id,
db_session=db_session,
user=test_user,
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=MessageResponseIDInfo(
user_message_id=1,
reserved_assistant_message_id=1,
),
)
# LLM Stream Response 1
mock_web.add_results(WEB_QUERY_1, RESULTS1)
mock_web.add_results(WEB_QUERY_2, RESULTS2)
handler.add_response(
LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_1))
).add_response(
LLMToolCallResponse(
tool_name="web_search",
tool_call_id="123",
tool_call_argument_tokens=[
json.dumps({"queries": [WEB_QUERY_1, WEB_QUERY_2]})
],
)
).expect(
Packet(
placement=create_placement(0),
obj=ReasoningStart(),
)
).expect_packets(
[
create_packet_with_reasoning_delta(token, 0)
for token in tokenise(REASONING_RESPONSE_1)
]
).expect(
Packet(placement=create_placement(0), obj=ReasoningDone())
).expect(
Packet(
placement=create_placement(1),
obj=SearchToolStart(
is_internet_search=True,
),
)
).expect(
Packet(
placement=create_placement(1),
obj=SearchToolQueriesDelta(
queries=[WEB_QUERY_1, WEB_QUERY_2],
),
)
).expect(
Packet(
placement=create_placement(1),
obj=SearchToolDocumentsDelta(
documents=[
mock_web_search_result_to_search_doc(result)
for result in RESULTS1
]
+ [
mock_web_search_result_to_search_doc(result)
for result in RESULTS2
]
),
)
).expect(
Packet(
placement=create_placement(1),
obj=SectionEnd(),
)
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 2
for content in CONTENT1:
mock_content.add_content(content)
handler.add_response(
LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_2))
).add_response(
LLMToolCallResponse(
tool_name="open_url",
tool_call_id="123",
tool_call_argument_tokens=[json.dumps({"urls": QUERY_URLS_1})],
)
).expect(
Packet(
placement=create_placement(2),
obj=ReasoningStart(),
)
).expect_packets(
[
create_packet_with_reasoning_delta(token, 2)
for token in tokenise(REASONING_RESPONSE_2)
]
).expect(
Packet(
placement=create_placement(2),
obj=ReasoningDone(),
)
).expect(
Packet(
placement=create_placement(3),
obj=OpenUrlStart(),
)
).expect(
Packet(
placement=create_placement(3),
obj=OpenUrlUrls(urls=[content.url for content in CONTENT1]),
)
).expect(
Packet(
placement=create_placement(3),
obj=OpenUrlDocuments(
documents=[
mock_web_content_to_search_doc(content) for content in CONTENT1
]
),
)
).expect(
Packet(
placement=create_placement(3),
obj=SectionEnd(),
)
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 3
handler.add_response(
LLMReasoningResponse(reasoning_tokens=tokenise(REASONING_RESPONSE_3))
).add_response(
LLMAnswerResponse(answer_tokens=tokenise(ANSWER_RESPONSE_1))
).expect(
Packet(
placement=create_placement(4),
obj=ReasoningStart(),
)
).expect_packets(
[
create_packet_with_reasoning_delta(token, 4)
for token in tokenise(REASONING_RESPONSE_3)
]
).expect(
Packet(
placement=create_placement(4),
obj=ReasoningDone(),
)
).expect_agent_response(
answer_tokens=tokenise(ANSWER_RESPONSE_1),
turn_index=5,
final_documents=[
mock_web_search_result_to_search_doc(result) for result in RESULTS1
]
+ [mock_web_search_result_to_search_doc(result) for result in RESULTS2]
+ [mock_web_content_to_search_doc(content) for content in CONTENT1],
).run_and_validate(
stream=answer_stream
)
with pytest.raises(StopIteration):
next(answer_stream)
def test_image_generation_tool_no_reasoning(
db_session: Session,
full_deployment_setup: None,
mock_external_deps: None,
) -> None:
ensure_default_llm_provider(db_session)
test_user = create_test_user(db_session, email_prefix="test_image_generation_tool")
QUERY = "Create me an image of a dog on a rocketship"
IMAGE_DATA = (
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfF"
"cSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
# Heartbeat interval is 5 seconds. A delay of 8 seconds ensures exactly 2 heartbeats:
IMAGE_DELAY = 8.0
ANSWER_RESPONSE = "Here is a dog on a rocketship"
with (
use_mock_llm() as mock_llm,
use_mock_image_generation_provider() as mock_image_gen,
):
handler = StreamTestBuilder(
llm_controller=mock_llm,
)
chat_session = create_chat_session(db_session=db_session, user=test_user)
answer_stream = submit_query(
query=QUERY,
chat_session_id=chat_session.id,
db_session=db_session,
user=test_user,
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=MessageResponseIDInfo(
user_message_id=1,
reserved_assistant_message_id=1,
),
)
# LLM Stream Response 1
mock_image_gen.add_image(IMAGE_DATA, IMAGE_DELAY)
mock_llm.set_max_timeout(
IMAGE_DELAY + 5.0
) # Give enough buffer for image generation
# The LLMToolCallResponse has 2 tokens (1 for tool name/id + 1 for arguments).
# We need to forward all 2 tokens before the tool starts executing and emitting packets.
# The tool then emits: start, heartbeats (during image generation), final, and section end.
handler.add_response(
LLMToolCallResponse(
tool_name="generate_image",
tool_call_id="123",
tool_call_argument_tokens=[json.dumps({"prompt": QUERY})],
)
).expect(
Packet(
placement=create_placement(0),
obj=ImageGenerationToolStart(),
),
forward=2, # Forward both tool call tokens before expecting first packet
).expect_packets(
[
Packet(
placement=create_placement(0),
obj=ImageGenerationToolHeartbeat(),
)
]
* 2,
forward=False,
).expect(
Packet(
placement=create_placement(0),
obj=ImageGenerationFinal(
images=[
GeneratedImage(
file_id="123",
url="/api/chat/file/123",
revised_prompt=QUERY,
shape="square",
)
]
),
),
forward=False,
).expect(
Packet(
placement=create_placement(0),
obj=SectionEnd(),
),
forward=False,
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 2 - the answer comes after the tool call, so turn_index=1
handler.add_response(
LLMAnswerResponse(
answer_tokens=tokenise(ANSWER_RESPONSE),
)
).expect(
Packet(
placement=create_placement(1),
obj=AgentResponseStart(final_documents=None),
)
).expect_packets(
[
create_packet_with_agent_response_delta(token, 1)
for token in tokenise(ANSWER_RESPONSE)
]
).expect(
Packet(
placement=create_placement(1),
obj=OverallStop(),
)
).run_and_validate(
stream=answer_stream
)
with pytest.raises(StopIteration):
next(answer_stream)
def test_parallel_internal_and_web_search_tool_calls(
db_session: Session,
full_deployment_setup: None,
mock_external_deps: None,
) -> None:
"""
User asks a question
LLM does some thinking
LLM runs parallel tool calls for internal & web search
-> Interal Search Branch performs seach + read ~10 documents
-> Web Search: Searches the web for information
LLM reads web documents
LLM does thinking across all results
LLM reads one more website
LLM does more thinking
LLM generates answer
"""
ensure_default_llm_provider(db_session)
test_user = create_test_user(
db_session, email_prefix="test_parallel_internal_and_web_search_tool_calls"
)
AVALIABLE_CONNECTORS = [
DocumentSource.GOOGLE_DRIVE,
DocumentSource.CONFLUENCE,
DocumentSource.LINEAR,
DocumentSource.FIREFLIES,
]
QUERY = "How will forecasts against 2026 global GDP growth affect our Q2 strategy?"
THINKING_RESPONSE_1 = (
"I need to build more context around the user's query to answer it. "
"I should look at GDP growth projections for 2026. "
"I should also look at what the Q2 strategy is and what projects are included. "
"I should perform both web and internal searches in parallel to get information efficiently."
)
WEB_QUERIES_1 = [
"2026 global GDP growth projections",
"GDP growth 2026",
"GDP forecast 2026",
]
WEB_RESULTS_1 = {
WEB_QUERIES_1[0]: [
MockWebSearchResult(
title="World Economic Outlook Update, January 2026",
link="https://www.imf.org/weo/issues/2026/01/19/world-economic-outlook-update-january-2026",
snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
),
MockWebSearchResult(
title="IMF sees steady global growth in 2026 as AI boom offsets ...",
link="https://www.reuters.com/article/us-world-economy-imf-idUSKBN2JU23E",
snippet="IMF forecasts 2026 global GDP growth at 3.3% even with stronger 2025 performance",
),
MockWebSearchResult(
title="The Global Economy Is Forecast to Post...",
link="https://www.goldmansachs.com/insights/articles/123",
snippet="Global GDP is projected by Goldman Sachs Research to increase 2.8% in 2026",
),
],
WEB_QUERIES_1[1]: [
MockWebSearchResult(
title="US third-quarter economic growth revised slightly higher",
link="https://www.reuters.com/word/us-third-quarter-eco",
snippet="Gross domestic product increased at an upwardly revised 4.4% annualized rate, the ...",
),
MockWebSearchResult(
title="US GDP Growth Is Projected to Outperform Economist ...",
link="https://www.goldmansachs.com/insights/articles/321",
snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
),
MockWebSearchResult(
title="Gross Domestic Product",
link="https://www.bea.gov/data/gdp/gross-domestic-product",
snippet="Real gross domestic product (GDP) increased at an annual rate of 4.4 percent in the third quarter",
),
],
WEB_QUERIES_1[2]: [
MockWebSearchResult(
title="World Economic Outlook Update, January 2026",
link="https://www.imf.org/web/issues/2026/01/19/world-economic-outlook-update-january-2026",
snippet="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
),
MockWebSearchResult(
title="US GDP Growth Is Projected to Outperform Economist ...",
link="https://www.goldmansachs.com/insights/articles/321",
snippet="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
),
MockWebSearchResult(
title="Our economic outlook for the United States - Vanguard",
link="https://corporate.vanguard.com/content/corp/vemo",
snippet="We expect strong capital investment to remain a principal strength in the year ahead",
),
],
}
INTERNAL_QUERIES_1 = ["Q2 strategy 2026", "GDP growth 2026 projects", "Q2 projects"]
INTERNAL_RESULTS_1 = {
INTERNAL_QUERIES_1[0]: [
MockInternalSearchResult(
document_id="123456789",
source_type=DocumentSource.GOOGLE_DRIVE,
semantic_identifier="Q2 strategy 2026",
chunk_ind=11,
),
MockInternalSearchResult(
document_id="732190732173",
source_type=DocumentSource.FIREFLIES,
semantic_identifier="What we think is going to happen in Q2",
chunk_ind=5,
),
MockInternalSearchResult(
document_id="12389123219",
source_type=DocumentSource.CONFLUENCE,
semantic_identifier="Strategy roadmap for Q2 2026",
chunk_ind=7,
),
],
INTERNAL_QUERIES_1[1]: [
MockInternalSearchResult(
document_id="123123",
source_type=DocumentSource.LINEAR,
semantic_identifier="GDP growth 2026 projects",
chunk_ind=13,
)
],
INTERNAL_QUERIES_1[2]: [
MockInternalSearchResult(
document_id="98823643243",
source_type=DocumentSource.GOOGLE_DRIVE,
semantic_identifier="Full list of Q2 projects",
chunk_ind=1,
)
],
}
OPEN_URL_URLS_1 = [
WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
]
OPEN_URL_DOCUMENTS_1 = [
MockWebContent(
title=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].title,
url=WEB_RESULTS_1[WEB_QUERIES_1[0]][0].link,
content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
),
MockWebContent(
title=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].title,
url=WEB_RESULTS_1[WEB_QUERIES_1[0]][2].link,
content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
),
MockWebContent(
title=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].title,
url=WEB_RESULTS_1[WEB_QUERIES_1[2]][0].link,
content="Global growth is projected at 3.3 percent for 2026 and 3.2 percent for 2027...",
),
]
THINKING_RESPONSE_2 = (
"I now have a clear picture of the 2026 global GDP projections and the Q2 strategy. "
"I would like to now about the outperform expections though..."
)
OPEN_URL_URLS_2 = [WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link]
OPEN_URL_DOCUMENTS_2 = [
MockWebContent(
title=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].title,
url=WEB_RESULTS_1[WEB_QUERIES_1[1]][1].link,
content="US GDP is forecast to expand 2.5% in 2026 (fourth quarter, yoy), versus",
)
]
REASONING_RESPONSE_3 = (
"I now have all the information I need to answer the user's question."
)
ANSWER_RESPONSE = (
"We will have to change around some of our projects to accomodate the outperform expections. "
"We should focus on aggresive expansion projects and prioritize them over cost-cutting initiatives."
)
expected_web_docs = []
seen_web_results = set()
for web_results in WEB_RESULTS_1.values():
for web_result in web_results:
key = (web_result.title, web_result.link)
if key in seen_web_results:
continue
seen_web_results.add(key)
expected_web_docs.append(mock_web_search_result_to_search_doc(web_result))
expected_internal_docs = []
seen_internal_results = set()
for internal_results in INTERNAL_RESULTS_1.values():
for internal_result in internal_results:
key = (internal_result.semantic_identifier, internal_result.document_id)
if key in seen_internal_results:
continue
seen_internal_results.add(key)
expected_internal_docs.append(internal_result.to_search_doc())
with (
use_mock_llm() as mock_llm,
use_mock_search_pipeline(
connectors=AVALIABLE_CONNECTORS
) as mock_search_pipeline,
use_mock_web_provider(db_session) as mock_web,
use_mock_content_provider() as mock_content,
):
for query, web_results in WEB_RESULTS_1.items():
mock_web.add_results(query, web_results)
for query, internal_results in INTERNAL_RESULTS_1.items():
mock_search_pipeline.add_search_results(query, internal_results)
handler = StreamTestBuilder(
llm_controller=mock_llm,
)
chat_session = create_chat_session(db_session=db_session, user=test_user)
answer_stream = submit_query(
query=QUERY,
chat_session_id=chat_session.id,
db_session=db_session,
user=test_user,
)
assert_answer_stream_part_correct(
received=next(answer_stream),
expected=MessageResponseIDInfo(
user_message_id=1,
reserved_assistant_message_id=1,
),
)
# LLM Stream Response 1
handler.add_response(
LLMReasoningResponse(
reasoning_tokens=tokenise(THINKING_RESPONSE_1),
)
).add_responses_together(
LLMToolCallResponse(
tool_name="internal_search",
tool_call_id="123",
tool_call_argument_tokens=[json.dumps({"queries": INTERNAL_QUERIES_1})],
),
LLMToolCallResponse(
tool_name="web_search",
tool_call_id="321",
tool_call_argument_tokens=[json.dumps({"queries": WEB_QUERIES_1})],
),
).expect_reasoning(
reasoning_tokens=tokenise(THINKING_RESPONSE_1),
turn_index=0,
).expect(
Packet(
placement=create_placement(1),
obj=TopLevelBranching(
num_parallel_branches=2,
),
)
).expect(
Packet(
placement=create_placement(1, 0),
obj=SearchToolStart(
is_internet_search=False,
),
)
).expect(
Packet(
placement=create_placement(1, 1),
obj=SearchToolStart(
is_internet_search=True,
),
)
).expect(
Packet(
placement=create_placement(1, 0),
obj=SearchToolQueriesDelta(
queries=INTERNAL_QUERIES_1 + [QUERY],
),
)
).expect(
Packet(
placement=create_placement(1, 0),
obj=SearchToolDocumentsDelta(
documents=expected_internal_docs,
),
)
).expect(
Packet(
placement=create_placement(1, 0),
obj=SectionEnd(),
)
).expect(
Packet(
placement=create_placement(1, 1),
obj=SearchToolQueriesDelta(
queries=WEB_QUERIES_1,
),
)
).expect(
Packet(
placement=create_placement(1, 1),
obj=SearchToolDocumentsDelta(
documents=expected_web_docs,
),
)
).expect(
Packet(
placement=create_placement(1, 1),
obj=SectionEnd(),
)
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 2
for content in OPEN_URL_DOCUMENTS_1:
mock_content.add_content(content)
handler.add_response(
LLMToolCallResponse(
tool_name="open_url",
tool_call_id="456",
tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_1})],
)
).expect(
Packet(
placement=create_placement(2, 0),
obj=OpenUrlStart(),
),
forward=2, # Need both header + argument tokens for the tool call
).expect(
Packet(
placement=create_placement(2, 0),
obj=OpenUrlUrls(urls=OPEN_URL_URLS_1),
),
forward=False,
).expect(
Packet(
placement=create_placement(2, 0),
obj=OpenUrlDocuments(
documents=[
mock_web_content_to_search_doc(content)
for content in OPEN_URL_DOCUMENTS_1
]
),
),
forward=False,
).expect(
Packet(
placement=create_placement(2, 0),
obj=SectionEnd(),
),
forward=False,
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 3
for content in OPEN_URL_DOCUMENTS_2:
mock_content.add_content(content)
handler.add_response(
LLMReasoningResponse(
reasoning_tokens=tokenise(THINKING_RESPONSE_2),
)
).add_response(
LLMToolCallResponse(
tool_name="open_url",
tool_call_id="789",
tool_call_argument_tokens=[json.dumps({"urls": OPEN_URL_URLS_2})],
)
).expect_reasoning(
reasoning_tokens=tokenise(THINKING_RESPONSE_2),
turn_index=3,
).expect(
Packet(
placement=create_placement(4),
obj=OpenUrlStart(),
)
).expect(
Packet(placement=create_placement(4), obj=OpenUrlUrls(urls=OPEN_URL_URLS_2))
).expect(
Packet(
placement=create_placement(4),
obj=OpenUrlDocuments(
documents=[
mock_web_content_to_search_doc(content)
for content in OPEN_URL_DOCUMENTS_2
]
),
),
forward=False,
).expect(
Packet(
placement=create_placement(4),
obj=SectionEnd(),
)
).run_and_validate(
stream=answer_stream
)
# LLM Stream Response 4
handler.add_response(
LLMReasoningResponse(
reasoning_tokens=tokenise(REASONING_RESPONSE_3),
)
).add_response(
LLMAnswerResponse(
answer_tokens=tokenise(ANSWER_RESPONSE),
)
).expect_reasoning(
reasoning_tokens=tokenise(REASONING_RESPONSE_3),
turn_index=5,
).expect_agent_response(
answer_tokens=tokenise(ANSWER_RESPONSE),
turn_index=6,
final_documents=expected_internal_docs
+ expected_web_docs
+ [
mock_web_content_to_search_doc(content)
for content in OPEN_URL_DOCUMENTS_1
]
+ [
mock_web_content_to_search_doc(content)
for content in OPEN_URL_DOCUMENTS_2
],
).run_and_validate(
stream=answer_stream
)
# End stream
with pytest.raises(StopIteration):
next(answer_stream)

View File

@@ -4,16 +4,14 @@ import os
from pathlib import Path
from typing import Optional
import nltk # type: ignore
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import SqlEngine
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.file_store import get_default_file_store
from onyx.indexing.models import IndexingSetting
from onyx.setup import setup_document_indices
from onyx.setup import setup_postgres
from onyx.setup import setup_vespa
from shared_configs import configs as shared_configs_module
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
from tests.external_dependency_unit.constants import TEST_TENANT_ID
@@ -32,7 +30,6 @@ def ensure_full_deployment_setup(
- Runs setup_onyx (Postgres defaults, Vespa indices)
- Initializes file store (best-effort)
- Ensures Vespa indices exist
- Installs NLTK stopwords and punkt_tab
"""
global _SETUP_COMPLETE
if _SETUP_COMPLETE:
@@ -49,9 +46,6 @@ def ensure_full_deployment_setup(
# Avoid warm-up network calls during setup
shared_configs_module.SKIP_WARM_UP = True
nltk.download("stopwords", quiet=True)
nltk.download("punkt_tab", quiet=True)
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant)
original_cwd = os.getcwd()
backend_dir = Path(__file__).resolve().parents[2] # points to 'backend'
@@ -73,8 +67,8 @@ def ensure_full_deployment_setup(
document_index = get_default_document_index(
active.primary, active.secondary
)
ok = setup_vespa(
document_index=document_index,
ok = setup_document_indices(
document_indices=[document_index],
index_setting=IndexingSetting.from_db_model(active.primary),
secondary_index_setting=(
IndexingSetting.from_db_model(active.secondary)

View File

@@ -282,12 +282,12 @@ def test_anthropic_prompt_caching_reduces_costs(
Anthropic requires explicit cache_control parameters.
"""
# Create Anthropic LLM
# NOTE: prompt caching support is model-specific; `claude-3-5-haiku-20241022` is known
# NOTE: prompt caching support is model-specific; `claude-3-haiku-20240307` is known
# to return cache_creation/cache_read usage metrics, while some newer aliases may not.
llm = LitellmLLM(
api_key=os.environ["ANTHROPIC_API_KEY"],
model_provider="anthropic",
model_name="claude-3-5-haiku-20241022",
model_name="claude-3-haiku-20240307",
max_input_tokens=200000,
)

Some files were not shown because too many files have changed in this diff Show More