Compare commits

..

41 Commits

Author SHA1 Message Date
Bo-Onyx
2a7f6e4ffc fix(api memory): replace glibc with jemalloc for memory allocating (#9196) 2026-03-25 14:42:24 -07:00
Nikolas Garza
b6b14ffaf5 feat(slack): convert markdown tables to Slack-friendly format (#8999) 2026-03-04 11:54:38 -08:00
Justin Tahara
9fb76042a2 fix(celery): Guardrail for User File Processing (#8633) 2026-03-01 10:30:03 -08:00
Nikolas Garza
caad67a34a fix(slack): sanitize HTML tags and broken citation links in bot responses (#8767) 2026-02-26 17:27:24 -08:00
dependabot[bot]
c33437488f chore(deps): Bump mistune from 0.8.4 to 3.1.4 in /backend (#6407)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-02-26 17:27:24 -08:00
Jamison Lahman
9f66ee7240 chore(devtools): upgrade ods: v0.6.1->v0.6.2 (#8773) 2026-02-26 16:26:37 -08:00
justin-tahara
e6ef2b5074 Fixing mypy 2026-02-09 15:47:10 -08:00
justin-tahara
74132175a8 Fixing mypy 2026-02-09 15:47:10 -08:00
Justin Tahara
29f707ee2d fix(posthog): Chat metrics for Cloud (#8278) 2026-02-09 15:47:10 -08:00
Justin Tahara
f0eb86fb9f fix(ui): Updating Dropdown Modal component (#8033) 2026-02-06 11:59:09 -08:00
Justin Tahara
b422496a4c fix(agents): Removing Label Dependency (#8189) 2026-02-06 11:39:09 -08:00
Justin Tahara
31d6a45b23 chore(chat): Cleaning Error Codes + Tests (#8186) 2026-02-06 11:02:41 -08:00
Justin Tahara
36f3ac1ec5 feat: onyx discord bot - supervisord and kube deployment (#7706) 2026-02-02 15:05:21 -08:00
Wenxi Onyx
74f5b3025a fix: discord svg (can't cherry-pick) 2026-02-02 10:03:39 -08:00
Justin Tahara
c18545d74c feat(desktop): Ensure that UI reflects Light/Dark Toggle (#7684) 2026-02-02 10:03:39 -08:00
Justin Tahara
48171e3700 fix(ui): Agent Saving with other people files (#8095) 2026-02-02 10:03:39 -08:00
Wenxi
f5a5709876 feat: onyx discord bot - frontend (#7497) 2026-02-02 10:03:39 -08:00
Justin Tahara
85868b1b83 fix(desktop): Remove Global Shortcuts (#7914) 2026-01-30 13:46:20 -08:00
Justin Tahara
8dc14c23e6 fix(asana): Workspace Team ID mismatch (#7674) 2026-01-30 13:19:02 -08:00
Jamison Lahman
23821cc0e8 chore(mypy): fix mypy cache issues switching between HEAD and release (#7732) 2026-01-27 15:52:57 -08:00
Jamison Lahman
b359e13281 fix(citations): enable citation sidebar w/ web_search-only assistants (#7888) 2026-01-27 13:26:29 -08:00
Justin Tahara
717f410a4a fix(llm): Hide private models from Agent Creation (#7873) 2026-01-27 12:21:06 -08:00
SubashMohan
ada0946a62 fix(layout): adjust footer margin and prevent page refresh on chatsession drop (#7759) 2026-01-27 11:57:18 -08:00
Jamison Lahman
eb2ac8f5a3 fix(fe): inline code text wraps (#7574) 2026-01-27 11:33:03 -08:00
Nikolas Garza
fbeb57c592 fix(slack): Extract person names and filter garbage in query expansion (#7632) 2026-01-27 11:26:52 -08:00
Nikolas Garza
d6da9c9b85 fix: scroll to bottom when loading existing conversations (#7614) 2026-01-27 11:26:52 -08:00
Nikolas Garza
5aea2e223e fix(billing): remove grandfathered pricing option when subscription lapses (#7583) 2026-01-27 11:26:52 -08:00
Nikolas Garza
1ff91de07e fix: deflake chat user journey test (#7646) 2026-01-27 11:18:27 -08:00
Nikolas Garza
b3dbc69faf fix(tests): use crawler-friendly search query in Exa integration test (#7746) 2026-01-27 11:13:01 -08:00
Yuhong Sun
431597b0f9 fix: LiteLLM Azure models don't stream (#7761) 2026-01-27 10:49:17 -08:00
Yuhong Sun
51b4e5f2fb fix: Azure OpenAI Tool Calls (#7727) 2026-01-27 10:49:17 -08:00
Justin Tahara
9afa04a26b fix(ui): Coda Logo (#7656) 2026-01-26 17:43:54 -08:00
Justin Tahara
70a3a9c0cd fix(ui): User Groups Connectors Fix (#7658) 2026-01-26 17:43:45 -08:00
Justin Tahara
080165356c fix(ui): First Connector Result (#7657) 2026-01-26 17:43:35 -08:00
Justin Tahara
3ae974bdf6 fix(ui): Fix Token Rate Limits Page (#7659) 2026-01-26 17:42:57 -08:00
Justin Tahara
1471658151 fix(vertex ai): Extra Args for Opus 4.5 (#7586) 2026-01-26 17:42:43 -08:00
Justin Tahara
3e85e9c1a3 feat(desktop): Domain Configuration (#7655) 2026-01-26 17:12:33 -08:00
Justin Tahara
851033be5f feat(desktop): Properly Sign Mac App (#7608) 2026-01-26 17:12:24 -08:00
Jamison Lahman
91e974a6cc chore(desktop): make artifact filename version-agnostic (#7679) 2026-01-26 16:20:39 -08:00
Jamison Lahman
38ba4f8a1c chore(deployments): fix region (#7640) 2026-01-26 16:20:39 -08:00
Jamison Lahman
6f02473064 chore(deployments): fetch secrets from AWS (#7584) 2026-01-26 16:20:39 -08:00
300 changed files with 7035 additions and 16595 deletions

View File

@@ -404,7 +404,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -477,7 +477,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -537,7 +537,7 @@ jobs:
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -615,7 +615,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -696,7 +696,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -764,7 +764,7 @@ jobs:
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -839,7 +839,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -911,7 +911,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -970,7 +970,7 @@ jobs:
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
@@ -1049,7 +1049,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
@@ -1128,7 +1128,7 @@ jobs:
latest=false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
with:
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
@@ -1193,7 +1193,7 @@ jobs:
parse-json-secrets: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -21,7 +21,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -29,7 +29,6 @@ jobs:
run: |
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/

View File

@@ -94,7 +94,7 @@ jobs:
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3

View File

@@ -45,9 +45,6 @@ env:
# TODO: debug why this is failing and enable
CODE_INTERPRETER_BASE_URL: http://localhost:8000
# OpenSearch
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
jobs:
discover-test-dirs:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -128,13 +125,11 @@ jobs:
docker compose \
-f docker-compose.yml \
-f docker-compose.dev.yml \
-f docker-compose.opensearch.yml \
up -d \
minio \
relational_db \
cache \
index \
opensearch \
code-interpreter
- name: Run migrations
@@ -163,7 +158,7 @@ jobs:
cd deployment/docker_compose
# Get list of running containers
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
# Collect logs from each container
for container in $containers; do

View File

@@ -88,7 +88,6 @@ jobs:
echo "=== Adding Helm repositories ==="
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
helm repo add opensearch https://opensearch-project.github.io/helm-charts
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
@@ -181,11 +180,6 @@ jobs:
trap cleanup EXIT
# Run the actual installation with detailed logging
# Note that opensearch.enabled is true whereas others in this install
# are false. There is some work that needs to be done to get this
# entire step working in CI, enabling opensearch here is a small step
# in that direction. If this is causing issues, disabling it in this
# step should be ok in the short term.
echo "=== Starting ct install ==="
set +e
ct install --all \
@@ -193,8 +187,6 @@ jobs:
--set=nginx.enabled=false \
--set=minio.enabled=false \
--set=vespa.enabled=false \
--set=opensearch.enabled=true \
--set=auth.opensearch.enabled=true \
--set=slackbot.enabled=false \
--set=postgresql.enabled=true \
--set=postgresql.nameOverride=cloudnative-pg \

View File

@@ -103,7 +103,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit

View File

@@ -95,7 +95,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling Vespa, Redis, Postgres, and Minio images
# otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit

View File

@@ -85,7 +85,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/

View File

@@ -50,8 +50,9 @@ jobs:
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
with:
path: backend/.mypy_cache
key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
restore-keys: |
mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
mypy-${{ runner.os }}-
- name: Run MyPy

View File

@@ -70,7 +70,7 @@ jobs:
password: ${{ secrets.DOCKER_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
- name: Build and load
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6

3
.gitignore vendored
View File

@@ -1,8 +1,5 @@
# editors
.vscode
!/.vscode/env_template.txt
!/.vscode/launch.json
!/.vscode/tasks.template.jsonc
.zed
.cursor

View File

@@ -74,13 +74,6 @@ repos:
# pass_filenames: true
# files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
hooks:
- id: check-added-large-files
name: Check for added large files
args: ["--maxkb=1500"]
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
hooks:

View File

@@ -1,3 +1,5 @@
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
@@ -22,7 +24,7 @@
"Slack Bot",
"Celery primary",
"Celery light",
"Celery heavy",
"Celery background",
"Celery docfetching",
"Celery docprocessing",
"Celery beat"
@@ -149,6 +151,24 @@
},
"consoleTitle": "Slack Bot Console"
},
{
"name": "Discord Bot",
"consoleName": "Discord Bot",
"type": "debugpy",
"request": "launch",
"program": "onyx/onyxbot/discord/client.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"presentation": {
"group": "2"
},
"consoleTitle": "Discord Bot Console"
},
{
"name": "MCP Server",
"consoleName": "MCP Server",
@@ -577,99 +597,6 @@
"group": "3"
}
},
{
// Dummy entry used to label the group
"name": "--- Database ---",
"type": "node",
"request": "launch",
"presentation": {
"group": "4",
"order": 0
}
},
{
"name": "Clean restore seeded database dump (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--fetch-seeded",
"--clean",
"--yes"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Create database snapshot",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"dump",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Clean restore database snapshot (destructive)",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"restore",
"--clean",
"--yes",
"backup.dump"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
"name": "Upgrade database to head revision",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeArgs": [
"run",
"--with",
"onyx-devtools",
"ods",
"db",
"upgrade"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "4"
}
},
{
// script to generate the openapi schema
"name": "Onyx OpenAPI Schema Generator",

View File

@@ -37,6 +37,10 @@ CVE-2023-50868
CVE-2023-52425
CVE-2024-28757
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
# No impact in our settings
CVE-2023-7104
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
# Accept the risk
CVE-2023-25193

View File

@@ -42,7 +42,9 @@ RUN apt-get update && \
pkg-config \
gcc \
nano \
vim && \
vim \
libjemalloc2 \
&& \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
@@ -89,6 +91,12 @@ RUN uv pip install --system --no-cache-dir --upgrade \
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt_tab', quiet=True);"
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
# Pre-downloading tiktoken for setups with limited egress
RUN python -c "import tiktoken; \
tiktoken.get_encoding('cl100k_base')"
@@ -124,6 +132,13 @@ ENV PYTHONPATH=/app
ARG ONYX_VERSION=0.0.0-dev
ENV ONYX_VERSION=${ONYX_VERSION}
# Use jemalloc instead of glibc malloc to reduce memory fragmentation
# in long-running Python processes (API server, Celery workers).
# The soname is architecture-independent; the dynamic linker resolves
# the correct path from standard library directories.
# Placed after all RUN steps so build-time processes are unaffected.
ENV LD_PRELOAD=libjemalloc.so.2
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@@ -1,42 +0,0 @@
"""add_unique_constraint_to_inputprompt_prompt_user_id
Revision ID: 2c2430828bdf
Revises: fb80bdd256de
Create Date: 2026-01-20 16:01:54.314805
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "2c2430828bdf"
down_revision = "fb80bdd256de"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create unique constraint on (prompt, user_id) for user-owned prompts
# This ensures each user can only have one shortcut with a given name
op.create_unique_constraint(
"uq_inputprompt_prompt_user_id",
"inputprompt",
["prompt", "user_id"],
)
# Create partial unique index for public prompts (where user_id IS NULL)
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
# so we need a partial index to ensure public prompt names are also unique
op.execute(
"""
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
ON inputprompt (prompt)
WHERE user_id IS NULL
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")

View File

@@ -1,29 +0,0 @@
"""remove default prompt shortcuts
Revision ID: 41fa44bef321
Revises: 2c2430828bdf
Create Date: 2025-01-21
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "41fa44bef321"
down_revision = "2c2430828bdf"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Delete any user associations for the default prompts first (foreign key constraint)
op.execute(
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
)
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
op.execute("DELETE FROM inputprompt WHERE id < 0")
def downgrade() -> None:
# We don't restore the default prompts on downgrade
pass

View File

@@ -1,31 +0,0 @@
"""add chat_background to user
Revision ID: fb80bdd256de
Revises: 8b5ce697290e
Create Date: 2026-01-16 16:15:59.222617
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "fb80bdd256de"
down_revision = "8b5ce697290e"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"user",
sa.Column(
"chat_background",
sa.String(),
nullable=True,
),
)
def downgrade() -> None:
op.drop_column("user", "chat_background")

View File

@@ -17,8 +17,7 @@ from onyx.context.search.models import InferenceChunk
from onyx.context.search.pipeline import merge_individual_chunks
from onyx.context.search.pipeline import search_pipeline
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.factory import get_current_primary_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.factory import get_default_llm
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
@@ -43,13 +42,11 @@ def _run_single_search(
document_index: DocumentIndex,
user: User | None,
db_session: Session,
num_hits: int | None = None,
) -> list[InferenceChunk]:
"""Execute a single search query and return chunks."""
chunk_search_request = ChunkSearchRequest(
query=query,
user_selected_filters=filters,
limit=num_hits,
)
return search_pipeline(
@@ -75,9 +72,7 @@ def stream_search_query(
Used by both streaming and non-streaming endpoints.
"""
# Get document index
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
document_index = get_current_primary_default_document_index(db_session)
# Determine queries to execute
original_query = request.search_query
@@ -119,7 +114,6 @@ def stream_search_query(
document_index=document_index,
user=user,
db_session=db_session,
num_hits=request.num_hits,
)
else:
# Multiple queries - run in parallel and merge with RRF
@@ -127,14 +121,7 @@ def stream_search_query(
search_functions = [
(
_run_single_search,
(
query,
request.filters,
document_index,
user,
db_session,
request.num_hits,
),
(query, request.filters, document_index, user, db_session),
)
for query in all_executed_queries
]
@@ -181,9 +168,6 @@ def stream_search_query(
# Merge chunks into sections
sections = merge_individual_chunks(chunks)
# Truncate to the requested number of hits
sections = sections[: request.num_hits]
# Apply LLM document selection if requested
# num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
# The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it

View File

@@ -10,8 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
("/enterprise-settings/logo", {"GET"}),
("/enterprise-settings/logotype", {"GET"}),
("/enterprise-settings/custom-analytics-script", {"GET"}),
# Stripe publishable key is safe to expose publicly
("/tenants/stripe-publishable-key", {"GET"}),
]

View File

@@ -32,7 +32,6 @@ class SendSearchQueryRequest(BaseModel):
filters: BaseFilters | None = None
num_docs_fed_to_llm_selection: int | None = None
run_query_expansion: bool = False
num_hits: int = 50
include_content: bool = False
stream: bool = False

View File

@@ -1,6 +1,3 @@
import asyncio
import httpx
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
@@ -15,14 +12,11 @@ from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
from ee.onyx.server.tenants.models import ProductGatingRequest
from ee.onyx.server.tenants.models import ProductGatingResponse
from ee.onyx.server.tenants.models import StripePublishableKeyResponse
from ee.onyx.server.tenants.models import SubscriptionSessionResponse
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
from ee.onyx.server.tenants.product_gating import store_product_gating
from onyx.auth.users import User
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
from onyx.configs.app_configs import WEB_DOMAIN
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
@@ -32,10 +26,6 @@ logger = setup_logger()
router = APIRouter(prefix="/tenants")
# Cache for Stripe publishable key to avoid hitting S3 on every request
_stripe_publishable_key_cache: str | None = None
_stripe_key_lock = asyncio.Lock()
@router.post("/product-gating")
def gate_product(
@@ -123,67 +113,3 @@ async def create_subscription_session(
except Exception as e:
logger.exception("Failed to create subscription session")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stripe-publishable-key")
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
"""
Fetch the Stripe publishable key.
Priority: env var override (for testing) > S3 bucket (production).
This endpoint is public (no auth required) since publishable keys are safe to expose.
The key is cached in memory to avoid hitting S3 on every request.
"""
global _stripe_publishable_key_cache
# Fast path: return cached value without lock
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Use lock to prevent concurrent S3 requests
async with _stripe_key_lock:
# Double-check after acquiring lock (another request may have populated cache)
if _stripe_publishable_key_cache:
return StripePublishableKeyResponse(
publishable_key=_stripe_publishable_key_cache
)
# Check for env var override first (for local testing with pk_test_* keys)
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
# Fall back to S3 bucket
if not STRIPE_PUBLISHABLE_KEY_URL:
raise HTTPException(
status_code=500,
detail="Stripe publishable key is not configured",
)
try:
async with httpx.AsyncClient() as client:
response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
response.raise_for_status()
key = response.text.strip()
# Validate key format
if not key.startswith("pk_"):
raise HTTPException(
status_code=500,
detail="Invalid Stripe publishable key format",
)
_stripe_publishable_key_cache = key
return StripePublishableKeyResponse(publishable_key=key)
except httpx.HTTPError:
raise HTTPException(
status_code=500,
detail="Failed to fetch Stripe publishable key",
)

View File

@@ -105,7 +105,3 @@ class PendingUserSnapshot(BaseModel):
class ApproveUserRequest(BaseModel):
email: str
class StripePublishableKeyResponse(BaseModel):
publishable_key: str

View File

@@ -11,7 +11,6 @@ from typing import Any
from typing import cast
from typing import Dict
from typing import List
from typing import Literal
from typing import Optional
from typing import Protocol
from typing import Tuple
@@ -1457,9 +1456,6 @@ def get_default_admin_user_emails_() -> list[str]:
STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
STATE_TOKEN_LIFETIME_SECONDS = 3600
CSRF_TOKEN_KEY = "csrftoken"
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
class OAuth2AuthorizeResponse(BaseModel):
@@ -1467,19 +1463,13 @@ class OAuth2AuthorizeResponse(BaseModel):
def generate_state_token(
data: Dict[str, str],
secret: SecretType,
lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
) -> str:
data["aud"] = STATE_TOKEN_AUDIENCE
return generate_jwt(data, secret, lifetime_seconds)
def generate_csrf_token() -> str:
return secrets.token_urlsafe(32)
# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
def create_onyx_oauth_router(
oauth_client: BaseOAuth2,
@@ -1508,13 +1498,6 @@ def get_oauth_router(
redirect_url: Optional[str] = None,
associate_by_email: bool = False,
is_verified_by_default: bool = False,
*,
csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
csrf_token_cookie_path: str = "/",
csrf_token_cookie_domain: Optional[str] = None,
csrf_token_cookie_secure: Optional[bool] = None,
csrf_token_cookie_httponly: bool = True,
csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
) -> APIRouter:
"""Generate a router with the OAuth routes."""
router = APIRouter()
@@ -1531,9 +1514,6 @@ def get_oauth_router(
route_name=callback_route_name,
)
if csrf_token_cookie_secure is None:
csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
@router.get(
"/authorize",
name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1541,10 +1521,8 @@ def get_oauth_router(
)
async def authorize(
request: Request,
response: Response,
redirect: bool = Query(False),
scopes: List[str] = Query(None),
) -> Response | OAuth2AuthorizeResponse:
) -> OAuth2AuthorizeResponse:
referral_source = request.cookies.get("referral_source", None)
if redirect_url is not None:
@@ -1554,11 +1532,9 @@ def get_oauth_router(
next_url = request.query_params.get("next", "/")
csrf_token = generate_csrf_token()
state_data: Dict[str, str] = {
"next_url": next_url,
"referral_source": referral_source or "default_referral",
CSRF_TOKEN_KEY: csrf_token,
}
state = generate_state_token(state_data, state_secret)
@@ -1575,31 +1551,6 @@ def get_oauth_router(
authorization_url, {"access_type": "offline", "prompt": "consent"}
)
if redirect:
redirect_response = RedirectResponse(authorization_url, status_code=302)
redirect_response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return redirect_response
response.set_cookie(
key=csrf_token_cookie_name,
value=csrf_token,
max_age=STATE_TOKEN_LIFETIME_SECONDS,
path=csrf_token_cookie_path,
domain=csrf_token_cookie_domain,
secure=csrf_token_cookie_secure,
httponly=csrf_token_cookie_httponly,
samesite=csrf_token_cookie_samesite,
)
return OAuth2AuthorizeResponse(authorization_url=authorization_url)
@log_function_time(print_only=True)
@@ -1649,33 +1600,7 @@ def get_oauth_router(
try:
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
except jwt.DecodeError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
),
)
except jwt.ExpiredSignatureError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(
ErrorCode,
"ACCESS_TOKEN_ALREADY_EXPIRED",
"ACCESS_TOKEN_ALREADY_EXPIRED",
),
)
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
if (
not cookie_csrf_token
or not state_csrf_token
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)
next_url = state_data.get("next_url", "/")
referral_source = state_data.get("referral_source", None)

View File

@@ -26,13 +26,10 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
from onyx.background.celery.celery_utils import make_probe_path
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
from onyx.configs.constants import OnyxRedisLocks
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.document_index.opensearch.client import (
wait_for_opensearch_with_timeout,
)
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_connector import RedisConnector
@@ -519,17 +516,15 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
"""Waits for Vespa to become ready subject to a timeout.
Raises WorkerShutdown if the timeout is reached."""
if ENABLE_OPENSEARCH_FOR_ONYX:
# TODO(andrei): Do some similar liveness checking for OpenSearch.
return
if not wait_for_vespa_with_timeout():
msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
if not wait_for_opensearch_with_timeout():
msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
logger.error(msg)
raise WorkerShutdown(msg)
# File for validating worker liveness
class LivenessProbe(bootsteps.StartStopStep):

View File

@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.document_batch_storage import DocumentBatchStorage
from onyx.file_store.document_batch_storage import get_document_batch_storage
from onyx.httpx.httpx_pool import HttpxPool
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
callback=callback,
)
document_indices = get_all_document_indices(
document_index = get_default_document_index(
index_attempt.search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_indices=document_indices,
document_index=document_index,
ignore_time_skip=True, # Documents are already filtered during extraction
db_session=db_session,
tenant_id=tenant_id,

View File

@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.relationships import delete_document_references_from_kg
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_pool import get_redis_client
@@ -97,17 +97,13 @@ def document_by_cc_pair_cleanup_task(
action = "skip"
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates and deletion so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
active_search_settings.primary,
active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
count = get_document_connector_count(db_session, document_id)
if count == 1:
@@ -117,12 +113,11 @@ def document_by_cc_pair_cleanup_task(
chunk_count = fetch_chunk_count_for_document(document_id, db_session)
for retry_document_index in retry_document_indices:
_ = retry_document_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
_ = retry_index.delete_single(
document_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
delete_document_references_from_kg(
db_session=db_session,
@@ -160,18 +155,14 @@ def document_by_cc_pair_cleanup_task(
hidden=doc.hidden,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# there are still other cc_pair references to the doc, so just resync to Vespa
delete_document_by_connector_credential_pair__no_commit(

View File

@@ -12,6 +12,7 @@ from retry import retry
from sqlalchemy import select
from onyx.background.celery.apps.app_base import task_logger
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
from onyx.configs.app_configs import MANAGED_VESPA
@@ -19,12 +20,14 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
from onyx.connectors.file.connector import LocalFileConnector
from onyx.connectors.models import Document
from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -32,7 +35,7 @@ from onyx.db.enums import UserFileStatus
from onyx.db.models import UserFile
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_active_search_settings_list
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from onyx.file_store.file_store import get_default_file_store
@@ -53,6 +56,17 @@ def _user_file_lock_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"
def _user_file_queued_key(user_file_id: str | UUID) -> str:
"""Key that exists while a process_single_user_file task is sitting in the queue.
The beat generator sets this with a TTL equal to CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
before enqueuing and the worker deletes it as its first action. This prevents
the beat from adding duplicate tasks for files that already have a live task
in flight.
"""
return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"
def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
@@ -116,7 +130,24 @@ def _get_document_chunk_count(
def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
"""Scan for user files with PROCESSING status and enqueue per-file tasks.
Uses direct Redis locks to avoid overlapping runs.
Three mechanisms prevent queue runaway:
1. **Queue depth backpressure** if the broker queue already has more than
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH items we skip this beat cycle
entirely. Workers are clearly behind; adding more tasks would only make
the backlog worse.
2. **Per-file queued guard** before enqueuing a task we set a short-lived
Redis key (TTL = CELERY_USER_FILE_PROCESSING_TASK_EXPIRES). If that key
already exists the file already has a live task in the queue, so we skip
it. The worker deletes the key the moment it picks up the task so the
next beat cycle can re-enqueue if the file is still PROCESSING.
3. **Task expiry** every enqueued task carries an `expires` value equal to
CELERY_USER_FILE_PROCESSING_TASK_EXPIRES. If a task is still sitting in
the queue after that deadline, Celery discards it without touching the DB.
This is a belt-and-suspenders defence: even if the guard key is lost (e.g.
Redis restart), stale tasks evict themselves rather than piling up forever.
"""
task_logger.info("check_user_file_processing - Starting")
@@ -131,7 +162,21 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
return None
enqueued = 0
skipped_guard = 0
try:
# --- Protection 1: queue depth backpressure ---
r_celery = self.app.broker_connection().channel().client # type: ignore
queue_len = celery_get_queue_length(
OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
)
if queue_len > USER_FILE_PROCESSING_MAX_QUEUE_DEPTH:
task_logger.warning(
f"check_user_file_processing - Queue depth {queue_len} exceeds "
f"{USER_FILE_PROCESSING_MAX_QUEUE_DEPTH}, skipping enqueue for "
f"tenant={tenant_id}"
)
return None
with get_session_with_current_tenant() as db_session:
user_file_ids = (
db_session.execute(
@@ -144,12 +189,35 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
)
for user_file_id in user_file_ids:
self.app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
# --- Protection 2: per-file queued guard ---
queued_key = _user_file_queued_key(user_file_id)
guard_set = redis_client.set(
queued_key,
1,
ex=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
nx=True,
)
if not guard_set:
skipped_guard += 1
continue
# --- Protection 3: task expiry ---
# If task submission fails, clear the guard immediately so the
# next beat cycle can retry enqueuing this file.
try:
self.app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={
"user_file_id": str(user_file_id),
"tenant_id": tenant_id,
},
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
priority=OnyxCeleryPriority.HIGH,
expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
)
except Exception:
redis_client.delete(queued_key)
raise
enqueued += 1
finally:
@@ -157,7 +225,8 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
lock.release()
task_logger.info(
f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} "
f"tasks for tenant={tenant_id}"
)
return None
@@ -172,6 +241,12 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
start = time.monotonic()
redis_client = get_redis_client(tenant_id=tenant_id)
# Clear the "queued" guard set by the beat generator so that the next beat
# cycle can re-enqueue this file if it is still in PROCESSING state after
# this task completes or fails.
redis_client.delete(_user_file_queued_key(user_file_id))
file_lock: RedisLock = redis_client.lock(
_user_file_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
@@ -244,8 +319,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
search_settings=current_search_settings,
)
# This flow is for indexing so we get all indices.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
current_search_settings,
None,
httpx_client=HttpxPool.get("vespa"),
@@ -259,7 +333,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
# real work happens here!
index_pipeline_result = run_indexing_pipeline(
embedder=embedding_model,
document_indices=document_indices,
document_index=document_index,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -413,16 +487,12 @@ def process_single_user_file_delete(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(document_index)
index_name = active_search_settings.primary.index_name
selection = f"{index_name}.document_id=='{user_file_id}'"
@@ -443,12 +513,11 @@ def process_single_user_file_delete(
else:
chunk_count = user_file.chunk_count
for retry_document_index in retry_document_indices:
retry_document_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
retry_index.delete_single(
doc_id=user_file_id,
tenant_id=tenant_id,
chunk_count=chunk_count,
)
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
file_store = get_default_file_store()
@@ -570,16 +639,12 @@ def process_single_user_file_project_sync(
httpx_init_vespa_pool(20)
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
if not user_file:
@@ -589,14 +654,13 @@ def process_single_user_file_project_sync(
return None
project_ids = [project.id for project in user_file.projects]
for retry_document_index in retry_document_indices:
retry_document_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
retry_index.update_single(
doc_id=str(user_file.id),
tenant_id=tenant_id,
chunk_count=user_file.chunk_count,
fields=None,
user_fields=VespaDocumentUserFields(user_projects=project_ids),
)
task_logger.info(
f"process_single_user_file_project_sync - User file id={user_file_id}"

View File

@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
from onyx.db.sync_record import cleanup_sync_records
from onyx.db.sync_record import insert_sync_record
from onyx.db.sync_record import update_sync_record_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import VespaDocumentFields
from onyx.httpx.httpx_pool import HttpxPool
from onyx.redis.redis_document_set import RedisDocumentSet
@@ -70,8 +70,6 @@ logger = setup_logger()
# celery auto associates tasks created inside another task,
# which bloats the result metadata considerably. trail=False prevents this.
# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
# generic *document_index*.
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
ignore_result=True,
@@ -467,17 +465,13 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
try:
with get_session_with_current_tenant() as db_session:
active_search_settings = get_active_search_settings(db_session)
# This flow is for updates so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
search_settings=active_search_settings.primary,
secondary_search_settings=active_search_settings.secondary,
httpx_client=HttpxPool.get("vespa"),
)
retry_document_indices: list[RetryDocumentIndex] = [
RetryDocumentIndex(document_index)
for document_index in document_indices
]
retry_index = RetryDocumentIndex(doc_index)
doc = get_document(document_id, db_session)
if not doc:
@@ -506,18 +500,14 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
# aggregated_boost_factor=doc.aggregated_boost_factor,
)
for retry_document_index in retry_document_indices:
# TODO(andrei): Previously there was a comment here saying
# it was ok if a doc did not exist in the document index. I
# don't agree with that claim, so keep an eye on this task
# to see if this raises.
retry_document_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
retry_index.update_single(
document_id,
tenant_id=tenant_id,
chunk_count=doc.chunk_count,
fields=fields,
user_fields=None,
)
# update db last. Worst case = we crash right before this and
# the sync might repeat again later

View File

@@ -7,7 +7,6 @@ from typing import Any
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.emitter import Emitter
from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import OverallStop
from onyx.server.query_and_chat.streaming_models import Packet
@@ -16,11 +15,6 @@ from onyx.tools.models import ToolCallInfo
from onyx.utils.threadpool_concurrency import run_in_background
from onyx.utils.threadpool_concurrency import wait_on_background
# Type alias for search doc deduplication key
# Simple key: just document_id (str)
# Full key: (document_id, chunk_ind, match_highlights)
SearchDocKey = str | tuple[str, int, tuple[str, ...]]
class ChatStateContainer:
"""Container for accumulating state during LLM loop execution.
@@ -46,10 +40,6 @@ class ChatStateContainer:
# True if this turn is a clarification question (deep research flow)
self.is_clarification: bool = False
# Note: LLM cost tracking is now handled in multi_llm.py
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
# Track which citation numbers were actually emitted during streaming
self._emitted_citations: set[int] = set()
def add_tool_call(self, tool_call: ToolCallInfo) -> None:
"""Add a tool call to the accumulated state."""
@@ -101,54 +91,6 @@ class ChatStateContainer:
with self._lock:
return self.is_clarification
@staticmethod
def create_search_doc_key(
search_doc: SearchDoc, use_simple_key: bool = True
) -> SearchDocKey:
"""Create a unique key for a SearchDoc for deduplication.
Args:
search_doc: The SearchDoc to create a key for
use_simple_key: If True (default), use only document_id for deduplication.
If False, include chunk_ind and match_highlights so that the same
document/chunk with different highlights are stored separately.
"""
if use_simple_key:
return search_doc.document_id
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def add_search_docs(
self, search_docs: list[SearchDoc], use_simple_key: bool = True
) -> None:
"""Add search docs to the accumulated collection with deduplication.
Args:
search_docs: List of SearchDoc objects to add
use_simple_key: If True (default), deduplicate by document_id only.
If False, deduplicate by document_id + chunk_ind + match_highlights.
"""
with self._lock:
for doc in search_docs:
key = self.create_search_doc_key(doc, use_simple_key)
if key not in self._all_search_docs:
self._all_search_docs[key] = doc
def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
"""Thread-safe getter for all accumulated search docs (returns a copy)."""
with self._lock:
return self._all_search_docs.copy()
def add_emitted_citation(self, citation_num: int) -> None:
"""Add a citation number that was actually emitted during streaming."""
with self._lock:
self._emitted_citations.add(citation_num)
def get_emitted_citations(self) -> set[int]:
"""Thread-safe getter for emitted citations (returns a copy)."""
with self._lock:
return self._emitted_citations.copy()
def run_chat_loop_with_state_containers(
func: Callable[..., None],

View File

@@ -53,50 +53,6 @@ def update_citation_processor_from_tool_response(
citation_processor.update_citation_mapping(citation_to_doc)
def extract_citation_order_from_text(text: str) -> list[int]:
"""Extract citation numbers from text in order of first appearance.
Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
the citation numbers in the order they first appear in the text.
Args:
text: The text containing citations
Returns:
List of citation numbers in order of first appearance (no duplicates)
"""
# Same pattern used in collapse_citations and DynamicCitationProcessor
# Group 2 captures the number in double bracket format: [[1]], 【【1】】
# Group 4 captures the numbers in single bracket format: [1], [1, 2]
citation_pattern = re.compile(
r"([\[【[]{2}(\d+)[\]】]]{2})|([\[【[]([\d]+(?: *, *\d+)*)[\]】]])"
)
seen: set[int] = set()
order: list[int] = []
for match in citation_pattern.finditer(text):
# Group 2 is for double bracket single number, group 4 is for single bracket
if match.group(2):
nums_str = match.group(2)
elif match.group(4):
nums_str = match.group(4)
else:
continue
for num_str in nums_str.split(","):
num_str = num_str.strip()
if num_str:
try:
num = int(num_str)
if num not in seen:
seen.add(num)
order.append(num)
except ValueError:
continue
return order
def collapse_citations(
answer_text: str,
existing_citation_mapping: CitationMapping,

View File

@@ -45,7 +45,6 @@ from onyx.tools.tool_implementations.images.models import (
FinalImageGenerationResponse,
)
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tracing.framework.create import trace
@@ -454,16 +453,12 @@ def run_llm_loop(
# The section below calculates the available tokens for history a bit more accurately
# now that project files are loaded in.
if persona and persona.replace_base_system_prompt:
if persona and persona.replace_base_system_prompt and persona.system_prompt:
# Handles the case where user has checked off the "Replace base system prompt" checkbox
system_prompt = (
ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
)
if persona.system_prompt
else None
system_prompt = ChatMessageSimple(
message=persona.system_prompt,
token_count=token_counter(persona.system_prompt),
message_type=MessageType.SYSTEM,
)
custom_agent_prompt_msg = None
else:
@@ -617,7 +612,6 @@ def run_llm_loop(
next_citation_num=citation_processor.get_next_citation_number(),
max_concurrent_tools=None,
skip_search_query_expansion=has_called_search_tool,
url_snippet_map=extract_url_snippet_map(gathered_documents or []),
)
tool_responses = parallel_tool_call_results.tool_responses
citation_mapping = parallel_tool_call_results.updated_citation_mapping
@@ -656,15 +650,8 @@ def run_llm_loop(
# Extract search_docs if this is a search tool response
search_docs = None
displayed_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
search_docs = tool_response.rich_response.search_docs
displayed_docs = tool_response.rich_response.displayed_docs
# Add ALL search docs to state container for DB persistence
if search_docs:
state_container.add_search_docs(search_docs)
if gathered_documents:
gathered_documents.extend(search_docs)
else:
@@ -698,7 +685,7 @@ def run_llm_loop(
reasoning_tokens=llm_step_result.reasoning, # All tool calls from this loop share the same reasoning
tool_call_arguments=tool_call.tool_args,
tool_call_response=saved_response,
search_docs=displayed_docs or search_docs,
search_docs=search_docs,
generated_images=generated_images,
)
# Add to state container for partial save support

View File

@@ -14,7 +14,6 @@ from onyx.chat.emitter import Emitter
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDoc
from onyx.file_store.models import ChatFileType
@@ -433,7 +432,7 @@ def translate_history_to_llm_format(
for idx, msg in enumerate(history):
# if the message is being added to the history
if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
if msg.message_type in [
MessageType.SYSTEM,
MessageType.USER,
MessageType.ASSISTANT,
@@ -860,11 +859,6 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(
result.citation_number
)
else:
# When citation_processor is None, use delta.content directly without modification
accumulated_answer += delta.content
@@ -991,9 +985,6 @@ def run_llm_step_pkt_generator(
),
obj=result,
)
# Track emitted citation for saving
if state_container:
state_container.add_emitted_citation(result.citation_number)
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)

View File

@@ -42,6 +42,7 @@ from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType
from onyx.configs.constants import MilestoneRecordType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_chat_session_by_id
@@ -85,10 +86,6 @@ from onyx.utils.logger import setup_logger
from onyx.utils.long_term_log import LongTermLogger
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.timing import log_function_time
from onyx.utils.variable_functionality import (
fetch_versioned_implementation_with_fallback,
)
from onyx.utils.variable_functionality import noop_fallback
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
@@ -361,21 +358,20 @@ def handle_stream_message_objects(
event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
)
# Track user message in PostHog for analytics
fetch_versioned_implementation_with_fallback(
module="onyx.utils.telemetry",
attribute="event_telemetry",
fallback=noop_fallback,
)(
distinct_id=user.email if user else tenant_id,
event="user_message_sent",
mt_cloud_telemetry(
tenant_id=tenant_id,
distinct_id=(
user.email
if user and not getattr(user, "is_anonymous", False)
else tenant_id
),
event=MilestoneRecordType.USER_MESSAGE_SENT,
properties={
"origin": new_msg_req.origin.value,
"has_files": len(new_msg_req.file_descriptors) > 0,
"has_project": chat_session.project_id is not None,
"has_persona": persona is not None and persona.id != DEFAULT_PERSONA_ID,
"deep_research": new_msg_req.deep_research,
"tenant_id": tenant_id,
},
)
@@ -743,16 +739,27 @@ def llm_loop_completion_handle(
else:
final_answer = "The generation was stopped by the user."
# Build citation_docs_info from accumulated citations in state container
citation_docs_info: list[CitationDocInfo] = []
seen_citation_nums: set[int] = set()
for citation_num, search_doc in state_container.citation_to_doc.items():
if citation_num not in seen_citation_nums:
seen_citation_nums.add(citation_num)
citation_docs_info.append(
CitationDocInfo(
search_doc=search_doc,
citation_number=citation_num,
)
)
save_chat_turn(
message_text=final_answer,
reasoning_tokens=state_container.reasoning_tokens,
citation_to_doc=state_container.citation_to_doc,
citation_docs_info=citation_docs_info,
tool_calls=state_container.tool_calls,
all_search_docs=state_container.get_all_search_docs(),
db_session=db_session,
assistant_message=assistant_message,
is_clarification=state_container.is_clarification,
emitted_citations=state_container.get_emitted_citations(),
)

View File

@@ -2,9 +2,8 @@ import json
from sqlalchemy.orm import Session
from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.chat_state import SearchDocKey
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import CitationDocInfo
from onyx.context.search.models import SearchDoc
from onyx.db.chat import add_search_docs_to_chat_message
from onyx.db.chat import add_search_docs_to_tool_call
@@ -20,6 +19,22 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
"""
Create a unique key for a SearchDoc that accounts for different versions of the same
document/chunk with different match_highlights.
Args:
search_doc: The SearchDoc pydantic model to create a key for
Returns:
A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
this specific version of the document
"""
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
def _create_and_link_tool_calls(
tool_calls: list[ToolCallInfo],
assistant_message: ChatMessage,
@@ -139,36 +154,38 @@ def save_chat_turn(
message_text: str,
reasoning_tokens: str | None,
tool_calls: list[ToolCallInfo],
citation_to_doc: dict[int, SearchDoc],
all_search_docs: dict[SearchDocKey, SearchDoc],
citation_docs_info: list[CitationDocInfo],
db_session: Session,
assistant_message: ChatMessage,
is_clarification: bool = False,
emitted_citations: set[int] | None = None,
) -> None:
"""
Save a chat turn by populating the assistant_message and creating related entities.
This function:
1. Updates the ChatMessage with text, reasoning tokens, and token count
2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
3. Builds tool_call -> search_doc mapping for displayed docs
4. Builds citation mapping from citation_to_doc
5. Links all unique SearchDocs to the ChatMessage
2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
4. Builds citation mapping from citation_docs_info
5. Links all unique SearchDocs from tool calls to the ChatMessage
6. Creates ToolCall entries and links SearchDocs to them
7. Builds the citations mapping for the ChatMessage
Deduplication Logic:
- SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
- This ensures that the same document/chunk with different match_highlights (from different
queries) are stored as separate SearchDoc entries
- Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
matches its specific query highlights
Args:
message_text: The message content to save
reasoning_tokens: Optional reasoning tokens for the message
tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
citation_to_doc: Mapping from citation number to SearchDoc for building citations
all_search_docs: Pre-deduplicated search docs from ChatStateContainer
citation_docs_info: List of citation document information for building citations mapping
db_session: Database session for persistence
assistant_message: The ChatMessage object to populate (should already exist in DB)
is_clarification: Whether this assistant message is a clarification question (deep research flow)
emitted_citations: Set of citation numbers that were actually emitted during streaming.
If provided, only citations in this set will be saved; others are filtered out.
"""
# 1. Update ChatMessage with message content, reasoning tokens, and token count
assistant_message.message = message_text
@@ -183,53 +200,53 @@ def save_chat_turn(
else:
assistant_message.token_count = 0
# 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
search_doc_key_to_id: dict[SearchDocKey, int] = {}
for key, search_doc_py in all_search_docs.items():
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[key] = db_search_doc.id
# 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
# 2. Create SearchDoc entries from tool_calls
# Build mapping from SearchDoc to DB SearchDoc ID
# Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
# while ensuring different versions with different highlights are stored separately
search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
tool_call_to_search_doc_ids: dict[str, list[int]] = {}
# Process tool calls and their search docs
for tool_call_info in tool_calls:
if tool_call_info.search_docs:
search_doc_ids_for_tool: list[int] = []
for search_doc_py in tool_call_info.search_docs:
key = ChatStateContainer.create_search_doc_key(search_doc_py)
if key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[key])
# Create a unique key for this SearchDoc version
search_doc_key = _create_search_doc_key(search_doc_py)
# Check if we've already created this exact SearchDoc version
if search_doc_key in search_doc_key_to_id:
search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
else:
# Displayed doc not in all_search_docs - create it
# This can happen if displayed_docs contains docs not in search_docs
# Create new DB SearchDoc entry
db_search_doc = create_db_search_doc(
server_search_doc=search_doc_py,
db_session=db_session,
commit=False,
)
search_doc_key_to_id[key] = db_search_doc.id
search_doc_key_to_id[search_doc_key] = db_search_doc.id
search_doc_ids_for_tool.append(db_search_doc.id)
tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
set(search_doc_ids_for_tool)
)
# Collect all search doc IDs for ChatMessage linking
all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())
# 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
# Use a set to deduplicate by ID (since we've already deduplicated by key above)
all_search_doc_ids_set: set[int] = set()
for search_doc_ids in tool_call_to_search_doc_ids.values():
all_search_doc_ids_set.update(search_doc_ids)
# 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
# Only include citations that were actually emitted during streaming
# 4. Build citation mapping from citation_docs_info
citation_number_to_search_doc_id: dict[int, int] = {}
for citation_num, search_doc_py in citation_to_doc.items():
# Skip citations that weren't actually emitted (if emitted_citations is provided)
if emitted_citations is not None and citation_num not in emitted_citations:
continue
for citation_doc_info in citation_docs_info:
# Extract SearchDoc pydantic model
search_doc_py = citation_doc_info.search_doc
# Create the unique key for this SearchDoc version
search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)
search_doc_key = _create_search_doc_key(search_doc_py)
# Get the search doc ID (should already exist from processing tool_calls)
if search_doc_key in search_doc_key_to_id:
@@ -266,7 +283,10 @@ def save_chat_turn(
all_search_doc_ids_set.add(db_search_doc_id)
# Build mapping from citation number to search doc ID
citation_number_to_search_doc_id[citation_num] = db_search_doc_id
if citation_doc_info.citation_number is not None:
citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
db_search_doc_id
)
# 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
@@ -286,10 +306,23 @@ def save_chat_turn(
tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
)
# 7. Build citations mapping - use the mapping we already built in step 4
assistant_message.citations = (
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
)
# 7. Build citations mapping from citation_docs_info
# Any citation_doc_info with a citation_number appeared in the text and should be mapped
citations: dict[int, int] = {}
for citation_doc_info in citation_docs_info:
if citation_doc_info.citation_number is not None:
search_doc_id = citation_number_to_search_doc_id.get(
citation_doc_info.citation_number
)
if search_doc_id is not None:
citations[citation_doc_info.citation_number] = search_doc_id
else:
logger.warning(
f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
f"but no matching search doc ID in mapping"
)
assistant_message.citations = citations if citations else None
# Finally save the messages, tool calls, and docs
db_session.commit()

View File

@@ -208,19 +208,8 @@ OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 920
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")
# This is the "base" config for now, the idea is that at least for our dev
# environments we always want to be dual indexing into both OpenSearch and Vespa
# to stress test the new codepaths. Only enable this if there is some instance
# of OpenSearch running for the relevant Onyx instance.
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
)
# Given that the "base" config above is true, this enables whether we want to
# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
# in the event we see issues with OpenSearch retrieval in our dev environments.
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
ENABLE_OPENSEARCH_FOR_ONYX = (
os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
@@ -749,10 +738,6 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default
LOG_ONYX_MODEL_INTERACTIONS = (
os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
)
PROMPT_CACHE_CHAT_HISTORY = (
os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
)
# If set to `true` will enable additional logs about Vespa query performance
# (time spent on finding the right docs + time spent fetching summaries from disk)
LOG_VESPA_TIMING_INFORMATION = (
@@ -1031,14 +1016,3 @@ INSTANCE_TYPE = (
## Discord Bot Configuration
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
## Stripe Configuration
# URL to fetch the Stripe publishable key from a public S3 bucket.
# Publishable keys are safe to expose publicly - they can only initialize
# Stripe.js and tokenize payment info, not make charges or access data.
STRIPE_PUBLISHABLE_KEY_URL = (
"https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
)
# Override for local testing with Stripe test keys (pk_test_*)
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")

View File

@@ -1,5 +1,6 @@
import os
INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
NUM_RETURNED_HITS = 50

View File

@@ -153,6 +153,17 @@ CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300 # 5 min
CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT = 30 * 60 # 30 minutes (in seconds)
# How long a queued user-file task is valid before workers discard it.
# Should be longer than the beat interval (20 s) but short enough to prevent
# indefinite queue growth. Workers drop tasks older than this without touching
# the DB, so a shorter value = faster drain of stale duplicates.
CELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60 # 1 minute (in seconds)
# Maximum number of tasks allowed in the user-file-processing queue before the
# beat generator stops adding more. Prevents unbounded queue growth when workers
# fall behind.
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
@@ -341,6 +352,7 @@ class MilestoneRecordType(str, Enum):
CREATED_CONNECTOR = "created_connector"
CONNECTOR_SUCCEEDED = "connector_succeeded"
RAN_QUERY = "ran_query"
USER_MESSAGE_SENT = "user_message_sent"
MULTIPLE_ASSISTANTS = "multiple_assistants"
CREATED_ASSISTANT = "created_assistant"
CREATED_ONYX_BOT = "created_onyx_bot"
@@ -423,6 +435,9 @@ class OnyxRedisLocks:
# User file processing
USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
# Short-lived key set when a task is enqueued; cleared when the worker picks it up.
# Prevents the beat from re-enqueuing the same file while a task is already queued.
USER_FILE_QUEUED_PREFIX = "da_lock:user_file_queued"
USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"

View File

@@ -25,11 +25,17 @@ class AsanaConnector(LoadConnector, PollConnector):
batch_size: int = INDEX_BATCH_SIZE,
continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE,
) -> None:
self.workspace_id = asana_workspace_id
self.project_ids_to_index: list[str] | None = (
asana_project_ids.split(",") if asana_project_ids is not None else None
)
self.asana_team_id = asana_team_id
self.workspace_id = asana_workspace_id.strip()
if asana_project_ids:
project_ids = [
project_id.strip()
for project_id in asana_project_ids.split(",")
if project_id.strip()
]
self.project_ids_to_index = project_ids or None
else:
self.project_ids_to_index = None
self.asana_team_id = (asana_team_id.strip() or None) if asana_team_id else None
self.batch_size = batch_size
self.continue_on_failure = continue_on_failure
logger.info(

View File

@@ -15,7 +15,6 @@ from onyx.federated_connectors.slack.models import SlackEntities
from onyx.llm.interfaces import LLM
from onyx.llm.models import UserMessage
from onyx.llm.utils import llm_response_to_string
from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
from onyx.onyxbot.slack.models import ChannelType
from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
@@ -114,7 +113,7 @@ def is_recency_query(query: str) -> bool:
if not has_recency_keyword:
return False
# Get combined stop words (English + Slack-specific)
# Get combined stop words (NLTK + Slack-specific)
all_stop_words = _get_combined_stop_words()
# Extract content words (excluding stop words)
@@ -489,7 +488,7 @@ def build_channel_override_query(channel_references: set[str], time_filter: str)
return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"
# Slack-specific stop words (in addition to standard English stop words)
# Slack-specific stop words (in addition to standard NLTK stop words)
# These include Slack-specific terms and temporal/recency keywords
SLACK_SPECIFIC_STOP_WORDS = frozenset(
RECENCY_KEYWORDS
@@ -509,16 +508,27 @@ SLACK_SPECIFIC_STOP_WORDS = frozenset(
)
def _get_combined_stop_words() -> frozenset[str]:
"""Get combined English + Slack-specific stop words.
def _get_combined_stop_words() -> set[str]:
"""Get combined NLTK + Slack-specific stop words.
Returns a frozenset of stop words for filtering content words.
Returns a set of stop words for filtering content words.
Falls back to just Slack-specific stop words if NLTK is unavailable.
Note: Currently only supports English stop words. Non-English queries
may have suboptimal content word extraction. Future enhancement could
detect query language and load appropriate stop words.
"""
return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS
try:
from nltk.corpus import stopwords # type: ignore
# TODO: Support multiple languages - currently hardcoded to English
# Could detect language or allow configuration
nltk_stop_words = set(stopwords.words("english"))
except Exception:
# Fallback if NLTK not available
nltk_stop_words = set()
return nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS
def extract_content_words_from_recency_query(
@@ -526,7 +536,7 @@ def extract_content_words_from_recency_query(
) -> list[str]:
"""Extract meaningful content words from a recency query.
Filters out English stop words, Slack-specific terms, channel references, and proper nouns.
Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.
Args:
query_text: The user's query text
@@ -535,7 +545,7 @@ def extract_content_words_from_recency_query(
Returns:
List of content words (up to MAX_CONTENT_WORDS)
"""
# Get combined stop words (English + Slack-specific)
# Get combined stop words (NLTK + Slack-specific)
all_stop_words = _get_combined_stop_words()
words = query_text.split()

View File

@@ -144,6 +144,10 @@ class BasicChunkRequest(BaseModel):
# In case some queries favor recency more than other queries.
recency_bias_multiplier: float = 1.0
# Sometimes we may want to extract specific keywords from a more semantic query for
# a better keyword search.
query_keywords: list[str] | None = None # Not used currently
limit: int | None = None
offset: int | None = None # This one is not set currently
@@ -162,8 +166,6 @@ class ChunkIndexRequest(BasicChunkRequest):
# Calculated final filters
filters: IndexFilters
query_keywords: list[str] | None = None
class ContextExpansionType(str, Enum):
NOT_RELEVANT = "not_relevant"
@@ -370,10 +372,6 @@ class SearchDocsResponse(BaseModel):
# document id is the most staightforward way.
citation_mapping: dict[int, str]
# For cases where the frontend only needs to display a subset of the search docs
# The whole list is typically still needed for later steps but this set should be saved separately
displayed_docs: list[SearchDoc] | None = None
class SavedSearchDoc(SearchDoc):
db_doc_id: int
@@ -432,6 +430,11 @@ class SavedSearchDoc(SearchDoc):
return self_score < other_score
class CitationDocInfo(BaseModel):
search_doc: SearchDoc
citation_number: int | None
class SavedSearchDocWithContent(SavedSearchDoc):
"""Used for endpoints that need to return the actual contents of the retrieved
section in addition to the match_highlights."""

View File

@@ -19,7 +19,6 @@ from onyx.db.models import Persona
from onyx.db.models import User
from onyx.document_index.interfaces import DocumentIndex
from onyx.llm.interfaces import LLM
from onyx.natural_language_processing.english_stopwords import strip_stopwords
from onyx.secondary_llm_flows.source_filter import extract_source_filter
from onyx.secondary_llm_flows.time_filter import extract_time_filter
from onyx.utils.logger import setup_logger
@@ -279,16 +278,12 @@ def search_pipeline(
bypass_acl=chunk_search_request.bypass_acl,
)
query_keywords = strip_stopwords(chunk_search_request.query)
query_request = ChunkIndexRequest(
query=chunk_search_request.query,
hybrid_alpha=chunk_search_request.hybrid_alpha,
recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
query_keywords=query_keywords,
query_keywords=chunk_search_request.query_keywords,
filters=filters,
limit=chunk_search_request.limit,
offset=chunk_search_request.offset,
)
retrieved_chunks = search_chunks(

View File

@@ -23,6 +23,45 @@ from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
logger = setup_logger()
def _dedupe_chunks(
chunks: list[InferenceChunk],
) -> list[InferenceChunk]:
used_chunks: dict[tuple[str, int], InferenceChunk] = {}
for chunk in chunks:
key = (chunk.document_id, chunk.chunk_id)
if key not in used_chunks:
used_chunks[key] = chunk
else:
stored_chunk_score = used_chunks[key].score or 0
this_chunk_score = chunk.score or 0
if stored_chunk_score < this_chunk_score:
used_chunks[key] = chunk
return list(used_chunks.values())
def download_nltk_data() -> None:
import nltk # type: ignore[import-untyped]
resources = {
"stopwords": "corpora/stopwords",
# "wordnet": "corpora/wordnet", # Not in use
"punkt_tab": "tokenizers/punkt_tab",
}
for resource_name, resource_path in resources.items():
try:
nltk.data.find(resource_path)
logger.info(f"{resource_name} is already downloaded.")
except LookupError:
try:
logger.info(f"Downloading {resource_name}...")
nltk.download(resource_name, quiet=True)
logger.info(f"{resource_name} downloaded successfully.")
except Exception as e:
logger.error(f"Failed to download {resource_name}. Error: {e}")
def combine_retrieval_results(
chunk_sets: list[list[InferenceChunk]],
) -> list[InferenceChunk]:

View File

@@ -3,8 +3,6 @@ from uuid import UUID
from fastapi import HTTPException
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
@@ -20,6 +18,45 @@ from onyx.utils.logger import setup_logger
logger = setup_logger()
def insert_input_prompt_if_not_exists(
user: User | None,
input_prompt_id: int | None,
prompt: str,
content: str,
active: bool,
is_public: bool,
db_session: Session,
commit: bool = True,
) -> InputPrompt:
if input_prompt_id is not None:
input_prompt = (
db_session.query(InputPrompt).filter_by(id=input_prompt_id).first()
)
else:
query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt)
if user:
query = query.filter(InputPrompt.user_id == user.id)
else:
query = query.filter(InputPrompt.user_id.is_(None))
input_prompt = query.first()
if input_prompt is None:
input_prompt = InputPrompt(
id=input_prompt_id,
prompt=prompt,
content=content,
active=active,
is_public=is_public or user is None,
user_id=user.id if user else None,
)
db_session.add(input_prompt)
if commit:
db_session.commit()
return input_prompt
def insert_input_prompt(
prompt: str,
content: str,
@@ -27,41 +64,16 @@ def insert_input_prompt(
user: User | None,
db_session: Session,
) -> InputPrompt:
user_id = user.id if user else None
# Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
# to avoid race conditions with the uniqueness check
stmt = pg_insert(InputPrompt).values(
input_prompt = InputPrompt(
prompt=prompt,
content=content,
active=True,
is_public=is_public,
user_id=user_id,
user_id=user.id if user is not None else None,
)
# Use the appropriate constraint based on whether this is a user-owned or public prompt
if user_id is not None:
stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
else:
# Partial unique indexes cannot be targeted by constraint name;
# must use index_elements + index_where
stmt = stmt.on_conflict_do_nothing(
index_elements=[InputPrompt.prompt],
index_where=InputPrompt.user_id.is_(None),
)
stmt = stmt.returning(InputPrompt)
result = db_session.execute(stmt)
input_prompt = result.scalar_one_or_none()
if input_prompt is None:
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
db_session.add(input_prompt)
db_session.commit()
return input_prompt
@@ -86,40 +98,23 @@ def update_input_prompt(
input_prompt.content = content
input_prompt.active = active
try:
db_session.commit()
except IntegrityError:
db_session.rollback()
raise HTTPException(
status_code=409,
detail=f"A prompt shortcut with the name '{prompt}' already exists",
)
db_session.commit()
return input_prompt
def validate_user_prompt_authorization(
user: User | None, input_prompt: InputPrompt
) -> bool:
"""
Check if the user is authorized to modify the given input prompt.
Returns True only if the user owns the prompt.
Returns False for public prompts (only admins can modify those),
unless auth is disabled (then anyone can manage public prompts).
"""
prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)
# Public prompts cannot be modified via the user API (unless auth is disabled)
if prompt.is_public or prompt.user_id is None:
return AUTH_TYPE == AuthType.DISABLED
if prompt.user_id is not None:
if user is None:
return False
# User must be logged in
if user is None:
return False
# User must own the prompt
user_details = UserInfo.from_model(user)
return str(user_details.id) == str(prompt.user_id)
user_details = UserInfo.from_model(user)
if str(user_details.id) != str(prompt.user_id):
return False
return True
def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:

View File

@@ -9,9 +9,6 @@ def get_memories(user: User | None, db_session: Session) -> list[str]:
if user is None:
return []
if not user.use_memories:
return []
user_info = [
f"User's name: {user.personal_name}" if user.personal_name else "",
f"User's role: {user.personal_role}" if user.personal_role else "",

View File

@@ -188,7 +188,6 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
nullable=True,
default=None,
)
chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
# personalization fields are exposed via the chat user settings "Personalization" tab
personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
@@ -2933,8 +2932,6 @@ class PersonaLabel(Base):
"Persona",
secondary=Persona__PersonaLabel.__table__,
back_populates="labels",
cascade="all, delete-orphan",
single_parent=True,
)
@@ -3627,18 +3624,6 @@ class InputPrompt(Base):
ForeignKey("user.id", ondelete="CASCADE"), nullable=True
)
__table_args__ = (
# Unique constraint on (prompt, user_id) for user-owned prompts
UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
# Partial unique index for public prompts (user_id IS NULL)
Index(
"uq_inputprompt_prompt_public",
"prompt",
unique=True,
postgresql_where=text("user_id IS NULL"),
),
)
class InputPrompt__User(Base):
__tablename__ = "inputprompt__user"
@@ -3647,7 +3632,7 @@ class InputPrompt__User(Base):
ForeignKey("inputprompt.id"), primary_key=True
)
user_id: Mapped[UUID | None] = mapped_column(
ForeignKey("user.id"), primary_key=True
ForeignKey("inputprompt.id"), primary_key=True
)
disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

View File

@@ -917,7 +917,9 @@ def upsert_persona(
existing_persona.icon_name = icon_name
existing_persona.is_visible = is_visible
existing_persona.search_start_date = search_start_date
existing_persona.labels = labels or []
if label_ids is not None:
existing_persona.labels.clear()
existing_persona.labels = labels or []
existing_persona.is_default_persona = (
is_default_persona
if is_default_persona is not None

View File

@@ -20,7 +20,7 @@ from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.key_value_store.factory import get_kv_store
from onyx.utils.logger import setup_logger
@@ -80,43 +80,39 @@ def _perform_index_swap(
db_session=db_session,
)
# This flow is for checking and possibly creating an index so we get all
# indices.
document_indices = get_all_document_indices(new_search_settings, None, None)
# remove the old index from the vector db
document_index = get_default_document_index(new_search_settings, None)
WAIT_SECONDS = 5
for document_index in document_indices:
success = False
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
try:
logger.notice(
f"Document index {document_index.__class__.__name__} swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
)
document_index.ensure_indices_exist(
primary_embedding_dim=new_search_settings.final_embedding_dim,
primary_embedding_precision=new_search_settings.embedding_precision,
# just finished swap, no more secondary index
secondary_index_embedding_dim=None,
secondary_index_embedding_precision=None,
)
logger.notice("Document index swap complete.")
success = True
break
except Exception:
logger.exception(
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
if not success:
logger.error(
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
success = False
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
try:
logger.notice(
f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
)
return None
document_index.ensure_indices_exist(
primary_embedding_dim=new_search_settings.final_embedding_dim,
primary_embedding_precision=new_search_settings.embedding_precision,
# just finished swap, no more secondary index
secondary_index_embedding_dim=None,
secondary_index_embedding_precision=None,
)
logger.notice("Vespa index swap complete.")
success = True
break
except Exception:
logger.exception(
f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
if not success:
logger.error(
f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
)
return None
return current_search_settings

View File

@@ -139,20 +139,6 @@ def update_user_theme_preference(
db_session.commit()
def update_user_chat_background(
user_id: UUID,
chat_background: str | None,
db_session: Session,
) -> None:
"""Update user's chat background setting."""
db_session.execute(
update(User)
.where(User.id == user_id) # type: ignore
.values(chat_background=chat_background)
)
db_session.commit()
def update_user_personalization(
user_id: UUID,
*,

View File

@@ -287,7 +287,6 @@ def run_deep_research_llm_loop(
token_count=100,
message_type=MessageType.USER,
)
truncated_message_history = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,

View File

@@ -2,18 +2,13 @@ from onyx.configs.app_configs import BLURB_SIZE
from onyx.configs.constants import RETURN_SEPARATOR
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceChunkUncleaned
from onyx.indexing.models import DocAwareChunk
from onyx.indexing.models import DocMetadataAwareIndexChunk
def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
def generate_enriched_content_for_chunk(chunk: DocMetadataAwareIndexChunk) -> str:
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"
def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
def cleanup_content_for_chunks(
chunks: list[InferenceChunkUncleaned],
) -> list[InferenceChunk]:

View File

@@ -1,8 +1,9 @@
import httpx
from sqlalchemy.orm import Session
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
from onyx.db.models import SearchSettings
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchOldDocumentIndex,
@@ -16,24 +17,17 @@ def get_default_document_index(
secondary_search_settings: SearchSettings | None,
httpx_client: httpx.Client | None = None,
) -> DocumentIndex:
"""Gets the default document index from env vars.
"""Primary index is the index that is used for querying/updating etc.
Secondary index is for when both the currently used index and the upcoming
index both need to be updated, updates are applied to both indices"""
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated, updates are applied to both indices.
"""
secondary_index_name: str | None = None
secondary_large_chunks_enabled: bool | None = None
if secondary_search_settings:
secondary_index_name = secondary_search_settings.index_name
secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled
if ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX:
if ENABLE_OPENSEARCH_FOR_ONYX:
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
secondary_index_name=secondary_index_name,
@@ -53,48 +47,12 @@ def get_default_document_index(
)
def get_all_document_indices(
search_settings: SearchSettings,
secondary_search_settings: SearchSettings | None,
httpx_client: httpx.Client | None = None,
) -> list[DocumentIndex]:
"""Gets all document indices.
NOTE: Will only return an OpenSearch index interface if
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
where we know it won't be enabled.
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
"""
vespa_document_index = VespaIndex(
index_name=search_settings.index_name,
secondary_index_name=(
secondary_search_settings.index_name if secondary_search_settings else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
TODO: Use redis to cache this or something
"""
search_settings = get_current_search_settings(db_session)
return get_default_document_index(
search_settings,
None,
)
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)
result: list[DocumentIndex] = [vespa_document_index]
if opensearch_document_index:
result.append(opensearch_document_index)
return result

View File

@@ -1,5 +1,4 @@
import logging
import time
from typing import Any
from typing import Generic
from typing import TypeVar
@@ -570,9 +569,6 @@ class OpenSearchClient:
def close(self) -> None:
"""Closes the client.
TODO(andrei): Can we have some way to auto close when the client no
longer has any references?
Raises:
Exception: There was an error closing the client.
"""
@@ -600,55 +596,3 @@ class OpenSearchClient:
)
hits_second_layer: list[Any] = hits_first_layer.get("hits", [])
return hits_second_layer
def wait_for_opensearch_with_timeout(
wait_interval_s: int = 5,
wait_limit_s: int = 60,
client: OpenSearchClient | None = None,
) -> bool:
"""Waits for OpenSearch to become ready subject to a timeout.
Will create a new dummy client if no client is provided. Will close this
client at the end of the function. Will not close the client if it was
supplied.
Args:
wait_interval_s: The interval in seconds to wait between checks.
Defaults to 5.
wait_limit_s: The total timeout in seconds to wait for OpenSearch to
become ready. Defaults to 60.
client: The OpenSearch client to use for pinging. If None, a new dummy
client will be created. Defaults to None.
Returns:
True if OpenSearch is ready, False otherwise.
"""
made_client = False
try:
if client is None:
# NOTE: index_name does not matter because we are only using this object
# to ping.
# TODO(andrei): Make this better.
client = OpenSearchClient(index_name="")
made_client = True
time_start = time.monotonic()
while True:
if client.ping():
logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
return True
time_elapsed = time.monotonic() - time_start
if time_elapsed > wait_limit_s:
logger.info(
f"[OpenSearch] Readiness probe did not succeed within the timeout "
f"({wait_limit_s} seconds)."
)
return False
logger.info(
f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
)
time.sleep(wait_interval_s)
finally:
if made_client:
assert client is not None
client.close()

View File

@@ -17,7 +17,7 @@ from onyx.db.enums import EmbeddingPrecision
from onyx.db.models import DocumentSource
from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk_text,
generate_enriched_content_for_chunk,
)
from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
from onyx.document_index.interfaces import (
@@ -140,12 +140,9 @@ def _convert_onyx_chunk_to_opensearch_document(
return DocumentChunk(
document_id=chunk.source_document.id,
chunk_index=chunk.chunk_id,
# Use get_title_for_document_index to match the logic used when creating
# the title_embedding in the embedder. This method falls back to
# semantic_identifier when title is None (but not empty string).
title=chunk.source_document.get_title_for_document_index(),
title=chunk.source_document.title,
title_vector=chunk.title_embedding,
content=generate_enriched_content_for_chunk_text(chunk),
content=generate_enriched_content_for_chunk(chunk),
content_vector=chunk.embeddings.full_embedding,
source_type=chunk.source_document.source.value,
metadata_list=chunk.source_document.get_metadata_str_attributes(),
@@ -424,24 +421,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
def verify_and_create_index_if_necessary(
self, embedding_dim: int, embedding_precision: EmbeddingPrecision
) -> None:
"""Verifies and creates the index if necessary.
Also puts the desired search pipeline state, creating the pipelines if
they do not exist and updating them otherwise.
Args:
embedding_dim: Vector dimensionality for the vector similarity part
of the search.
embedding_precision: Precision of the values of the vectors for the
similarity part of the search.
Raises:
RuntimeError: There was an error verifying or creating the index or
search pipelines.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary."
)
expected_mappings = DocumentSchema.get_document_schema(
embedding_dim, self._tenant_state.multitenant
)
@@ -471,9 +450,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
indexing_metadata: IndexingMetadata,
) -> list[DocumentInsertionRecord]:
logger.debug(
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
)
# Set of doc IDs.
unique_docs_to_be_indexed: set[str] = set()
document_indexing_results: list[DocumentInsertionRecord] = []
@@ -518,8 +494,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
def delete(self, document_id: str, chunk_count: int | None = None) -> int:
"""Deletes all chunks for a given document.
Does nothing if the specified document ID does not exist.
TODO(andrei): Make this method require supplying source type.
TODO(andrei): Consider implementing this method to delete on document
chunk IDs vs querying for matching document chunks.
@@ -536,9 +510,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
Returns:
The number of chunks successfully deleted.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
)
query_body = DocumentQuery.delete_from_document_id_query(
document_id=document_id,
tenant_state=self._tenant_state,
@@ -552,7 +523,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
) -> None:
"""Updates some set of chunks.
NOTE: Will raise if the specified document chunks do not exist.
NOTE: Requires document chunk count be known; will raise if it is not.
NOTE: Each update request must have some field to update; if not it is
assumed there is a bug in the caller and this will raise.
@@ -569,9 +539,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
RuntimeError: Failed to update some or all of the chunks for the
specified documents.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
)
for update_request in update_requests:
properties_to_update: dict[str, Any] = dict()
# TODO(andrei): Nit but consider if we can use DocumentChunk
@@ -637,9 +604,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
TODO(andrei): Consider implementing this method to retrieve on document
chunk IDs vs querying for matching document chunks.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
)
results: list[InferenceChunk] = []
for chunk_request in chunk_requests:
search_hits: list[SearchHit[DocumentChunk]] = []
@@ -679,9 +643,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
num_to_retrieve: int,
offset: int = 0,
) -> list[InferenceChunk]:
logger.debug(
f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
)
query_body = DocumentQuery.get_hybrid_search_query(
query_text=query,
query_vector=query_embedding,

View File

@@ -17,7 +17,7 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
get_experts_stores_representations,
)
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk_text,
generate_enriched_content_for_chunk,
)
from onyx.document_index.document_index_utils import get_uuid_from_chunk
from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
@@ -186,7 +186,7 @@ def _index_vespa_chunk(
# For the BM25 index, the keyword suffix is used, the vector is already generated with the more
# natural language representation of the metadata section
CONTENT: remove_invalid_unicode_chars(
generate_enriched_content_for_chunk_text(chunk)
generate_enriched_content_for_chunk(chunk)
),
# This duplication of `content` is needed for keyword highlighting
# Note that it's not exactly the same as the actual content

View File

@@ -7,9 +7,6 @@ from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import DocumentFailure
from onyx.db.models import SearchSettings
from onyx.document_index.chunk_content_enrichment import (
generate_enriched_content_for_chunk_embedding,
)
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import DocAwareChunk
@@ -129,7 +126,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder):
if chunk.large_chunk_reference_ids:
large_chunks_present = True
chunk_text = (
generate_enriched_content_for_chunk_embedding(chunk)
f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
) or chunk.source_document.get_title_for_document_index()
if not chunk_text:

View File

@@ -37,7 +37,6 @@ from onyx.document_index.document_index_utils import (
get_multipass_config,
)
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.interfaces import DocumentInsertionRecord
from onyx.document_index.interfaces import DocumentMetadata
from onyx.document_index.interfaces import IndexBatchParams
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
@@ -164,7 +163,7 @@ def index_doc_batch_with_handler(
*,
chunker: Chunker,
embedder: IndexingEmbedder,
document_indices: list[DocumentIndex],
document_index: DocumentIndex,
document_batch: list[Document],
request_id: str | None,
tenant_id: str,
@@ -177,7 +176,7 @@ def index_doc_batch_with_handler(
index_pipeline_result = index_doc_batch(
chunker=chunker,
embedder=embedder,
document_indices=document_indices,
document_index=document_index,
document_batch=document_batch,
request_id=request_id,
tenant_id=tenant_id,
@@ -628,7 +627,7 @@ def index_doc_batch(
document_batch: list[Document],
chunker: Chunker,
embedder: IndexingEmbedder,
document_indices: list[DocumentIndex],
document_index: DocumentIndex,
request_id: str | None,
tenant_id: str,
adapter: IndexingBatchAdapter,
@@ -744,57 +743,47 @@ def index_doc_batch(
short_descriptor_log = str(short_descriptor_list)[:1024]
logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
for document_index in document_indices:
# A document will not be spread across different batches, so all the
# documents with chunks in this set, are fully represented by the chunks
# in this set
(
insertion_records,
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
)
# A document will not be spread across different batches, so all the
# documents with chunks in this set, are fully represented by the chunks
# in this set
(
insertion_records,
vector_db_write_failures,
) = write_chunks_to_vector_db_with_backoff(
document_index=document_index,
chunks=result.chunks,
index_batch_params=IndexBatchParams(
doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
tenant_id=tenant_id,
large_chunks_enabled=chunker.enable_large_chunks,
),
)
all_returned_doc_ids: set[str] = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
f"This occured for document index {document_index.__class__.__name__}"
)
# We treat the first document index we got as the primary one used
# for reporting the state of indexing.
if primary_doc_idx_insertion_records is None:
primary_doc_idx_insertion_records = insertion_records
if primary_doc_idx_vector_db_write_failures is None:
primary_doc_idx_vector_db_write_failures = vector_db_write_failures
adapter.post_index(
context=context,
@@ -803,15 +792,11 @@ def index_doc_batch(
result=result,
)
assert primary_doc_idx_insertion_records is not None
assert primary_doc_idx_vector_db_write_failures is not None
return IndexingPipelineResult(
new_docs=len(
[r for r in primary_doc_idx_insertion_records if not r.already_existed]
),
new_docs=len([r for r in insertion_records if not r.already_existed]),
total_docs=len(filtered_documents),
total_chunks=len(chunks_with_embeddings),
failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
failures=vector_db_write_failures + embedding_failures,
)
@@ -820,7 +805,7 @@ def run_indexing_pipeline(
document_batch: list[Document],
request_id: str | None,
embedder: IndexingEmbedder,
document_indices: list[DocumentIndex],
document_index: DocumentIndex,
db_session: Session,
tenant_id: str,
adapter: IndexingBatchAdapter,
@@ -861,7 +846,7 @@ def run_indexing_pipeline(
return index_doc_batch_with_handler(
chunker=chunker,
embedder=embedder,
document_indices=document_indices,
document_index=document_index,
document_batch=document_batch,
request_id=request_id,
tenant_id=tenant_id,

View File

@@ -41,11 +41,6 @@ alphanum_regex = re.compile(r"[^a-z0-9]+")
rem_email_regex = re.compile(r"(?<=\S)@([a-z0-9-]+)\.([a-z]{2,6})$")
def _ngrams(sequence: str, n: int) -> list[tuple[str, ...]]:
"""Generate n-grams from a sequence."""
return [tuple(sequence[i : i + n]) for i in range(len(sequence) - n + 1)]
def _clean_name(entity_name: str) -> str:
"""
Clean an entity string by removing non-alphanumeric characters and email addresses.
@@ -63,6 +58,8 @@ def _normalize_one_entity(
attributes: dict[str, str],
allowed_docs_temp_view_name: str | None = None,
) -> str | None:
from nltk import ngrams # type: ignore
"""
Matches a single entity to the best matching entity of the same type.
"""
@@ -153,16 +150,16 @@ def _normalize_one_entity(
# step 2: do a weighted ngram analysis and damerau levenshtein distance to rerank
n1, n2, n3 = (
set(_ngrams(cleaned_entity, 1)),
set(_ngrams(cleaned_entity, 2)),
set(_ngrams(cleaned_entity, 3)),
set(ngrams(cleaned_entity, 1)),
set(ngrams(cleaned_entity, 2)),
set(ngrams(cleaned_entity, 3)),
)
for i, (candidate_id_name, candidate_name, _) in enumerate(candidates):
cleaned_candidate = _clean_name(candidate_name)
h_n1, h_n2, h_n3 = (
set(_ngrams(cleaned_candidate, 1)),
set(_ngrams(cleaned_candidate, 2)),
set(_ngrams(cleaned_candidate, 3)),
set(ngrams(cleaned_candidate, 1)),
set(ngrams(cleaned_candidate, 2)),
set(ngrams(cleaned_candidate, 3)),
)
# compute ngram overlap, renormalize scores if the names are too short for larger ngrams

View File

@@ -369,6 +369,8 @@ def _patch_openai_responses_chunk_parser() -> None:
# New output item added
output_item = parsed_chunk.get("item", {})
if output_item.get("type") == "function_call":
# Track that we've received tool calls via streaming
self._has_streamed_tool_calls = True
return GenericStreamingChunk(
text="",
tool_use=ChatCompletionToolCallChunk(
@@ -394,6 +396,8 @@ def _patch_openai_responses_chunk_parser() -> None:
elif event_type == "response.function_call_arguments.delta":
content_part: Optional[str] = parsed_chunk.get("delta", None)
if content_part:
# Track that we've received tool calls via streaming
self._has_streamed_tool_calls = True
return GenericStreamingChunk(
text="",
tool_use=ChatCompletionToolCallChunk(
@@ -491,22 +495,72 @@ def _patch_openai_responses_chunk_parser() -> None:
elif event_type == "response.completed":
# Final event signaling all output items (including parallel tool calls) are done
# Check if we already received tool calls via streaming events
# There is an issue where OpenAI (not via Azure) will give back the tool calls streamed out as tokens
# But on Azure, it's only given out all at once. OpenAI also happens to give back the tool calls in the
# response.completed event so we need to throw it out here or there are duplicate tool calls.
has_streamed_tool_calls = getattr(self, "_has_streamed_tool_calls", False)
response_data = parsed_chunk.get("response", {})
# Determine finish reason based on response content
finish_reason = "stop"
if response_data.get("output"):
for item in response_data["output"]:
if isinstance(item, dict) and item.get("type") == "function_call":
finish_reason = "tool_calls"
break
return GenericStreamingChunk(
text="",
tool_use=None,
is_finished=True,
finish_reason=finish_reason,
usage=None,
output_items = response_data.get("output", [])
# Check if there are function_call items in the output
has_function_calls = any(
isinstance(item, dict) and item.get("type") == "function_call"
for item in output_items
)
if has_function_calls and not has_streamed_tool_calls:
# Azure's Responses API returns all tool calls in response.completed
# without streaming them incrementally. Extract them here.
from litellm.types.utils import (
Delta,
ModelResponseStream,
StreamingChoices,
)
tool_calls = []
for idx, item in enumerate(output_items):
if isinstance(item, dict) and item.get("type") == "function_call":
tool_calls.append(
ChatCompletionToolCallChunk(
id=item.get("call_id"),
index=idx,
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=item.get("name"),
arguments=item.get("arguments", ""),
),
)
)
return ModelResponseStream(
choices=[
StreamingChoices(
index=0,
delta=Delta(tool_calls=tool_calls),
finish_reason="tool_calls",
)
]
)
elif has_function_calls:
# Tool calls were already streamed, just signal completion
return GenericStreamingChunk(
text="",
tool_use=None,
is_finished=True,
finish_reason="tool_calls",
usage=None,
)
else:
return GenericStreamingChunk(
text="",
tool_use=None,
is_finished=True,
finish_reason="stop",
usage=None,
)
else:
pass
@@ -631,6 +685,40 @@ def _patch_openai_responses_transform_response() -> None:
LiteLLMResponsesTransformationHandler.transform_response = _patched_transform_response # type: ignore[method-assign]
def _patch_azure_responses_should_fake_stream() -> None:
"""
Patches AzureOpenAIResponsesAPIConfig.should_fake_stream to always return False.
By default, LiteLLM uses "fake streaming" (MockResponsesAPIStreamingIterator) for models
not in its database. This causes Azure custom model deployments to buffer the entire
response before yielding, resulting in poor time-to-first-token.
Azure's Responses API supports native streaming, so we override this to always use
real streaming (SyncResponsesAPIStreamingIterator).
"""
from litellm.llms.azure.responses.transformation import (
AzureOpenAIResponsesAPIConfig,
)
if (
getattr(AzureOpenAIResponsesAPIConfig.should_fake_stream, "__name__", "")
== "_patched_should_fake_stream"
):
return
def _patched_should_fake_stream(
self: Any,
model: Optional[str],
stream: Optional[bool],
custom_llm_provider: Optional[str] = None,
) -> bool:
# Azure Responses API supports native streaming - never fake it
return False
_patched_should_fake_stream.__name__ = "_patched_should_fake_stream"
AzureOpenAIResponsesAPIConfig.should_fake_stream = _patched_should_fake_stream # type: ignore[method-assign]
def apply_monkey_patches() -> None:
"""
Apply all necessary monkey patches to LiteLLM for compatibility.
@@ -640,12 +728,13 @@ def apply_monkey_patches() -> None:
- Patching OllamaChatCompletionResponseIterator.chunk_parser for streaming content
- Patching OpenAiResponsesToChatCompletionStreamIterator.chunk_parser for OpenAI Responses API
- Patching LiteLLMResponsesTransformationHandler.transform_response for non-streaming responses
- Patching LiteLLMResponsesTransformationHandler._convert_content_str_to_input_text for tool content types
- Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming
"""
_patch_ollama_transform_request()
_patch_ollama_chunk_parser()
_patch_openai_responses_chunk_parser()
_patch_openai_responses_transform_response()
_patch_azure_responses_should_fake_stream()
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -54,6 +54,11 @@
"model_vendor": "amazon",
"model_version": "v1:0"
},
"anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -1460,6 +1465,11 @@
"model_vendor": "mistral",
"model_version": "v0:1"
},
"bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"chat-bison": {
"display_name": "Chat Bison",
"model_vendor": "google",
@@ -1490,6 +1500,16 @@
"model_vendor": "openai",
"model_version": "latest"
},
"claude-3-5-haiku-20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"claude-3-5-haiku-latest": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "latest"
},
"claude-3-5-sonnet-20240620": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -1695,6 +1715,11 @@
"model_vendor": "amazon",
"model_version": "v1:0"
},
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022-v1:0"
},
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -3226,6 +3251,15 @@
"model_vendor": "anthropic",
"model_version": "latest"
},
"openrouter/anthropic/claude-3-5-haiku": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic"
},
"openrouter/anthropic/claude-3-5-haiku-20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"openrouter/anthropic/claude-3-haiku": {
"display_name": "Claude Haiku 3",
"model_vendor": "anthropic"
@@ -3740,6 +3774,11 @@
"model_vendor": "amazon",
"model_version": "1:0"
},
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic",
@@ -3860,6 +3899,15 @@
"model_vendor": "twelvelabs",
"model_version": "v1:0"
},
"vertex_ai/claude-3-5-haiku": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic"
},
"vertex_ai/claude-3-5-haiku@20241022": {
"display_name": "Claude Haiku 3.5",
"model_vendor": "anthropic",
"model_version": "20241022"
},
"vertex_ai/claude-3-5-sonnet": {
"display_name": "Claude Sonnet 3.5",
"model_vendor": "anthropic"

View File

@@ -1,225 +0,0 @@
import re
ENGLISH_STOPWORDS = [
"a",
"about",
"above",
"after",
"again",
"against",
"ain",
"all",
"am",
"an",
"and",
"any",
"are",
"aren",
"aren't",
"as",
"at",
"be",
"because",
"been",
"before",
"being",
"below",
"between",
"both",
"but",
"by",
"can",
"couldn",
"couldn't",
"d",
"did",
"didn",
"didn't",
"do",
"does",
"doesn",
"doesn't",
"doing",
"don",
"don't",
"down",
"during",
"each",
"few",
"for",
"from",
"further",
"had",
"hadn",
"hadn't",
"has",
"hasn",
"hasn't",
"have",
"haven",
"haven't",
"having",
"he",
"he'd",
"he'll",
"he's",
"her",
"here",
"hers",
"herself",
"him",
"himself",
"his",
"how",
"i",
"i'd",
"i'll",
"i'm",
"i've",
"if",
"in",
"into",
"is",
"isn",
"isn't",
"it",
"it'd",
"it'll",
"it's",
"its",
"itself",
"just",
"ll",
"m",
"ma",
"me",
"mightn",
"mightn't",
"more",
"most",
"mustn",
"mustn't",
"my",
"myself",
"needn",
"needn't",
"no",
"nor",
"not",
"now",
"o",
"of",
"off",
"on",
"once",
"only",
"or",
"other",
"our",
"ours",
"ourselves",
"out",
"over",
"own",
"re",
"s",
"same",
"shan",
"shan't",
"she",
"she'd",
"she'll",
"she's",
"should",
"should've",
"shouldn",
"shouldn't",
"so",
"some",
"such",
"t",
"than",
"that",
"that'll",
"the",
"their",
"theirs",
"them",
"themselves",
"then",
"there",
"these",
"they",
"they'd",
"they'll",
"they're",
"they've",
"this",
"those",
"through",
"to",
"too",
"under",
"until",
"up",
"ve",
"very",
"was",
"wasn",
"wasn't",
"we",
"we'd",
"we'll",
"we're",
"we've",
"were",
"weren",
"weren't",
"what",
"when",
"where",
"which",
"while",
"who",
"whom",
"why",
"will",
"with",
"won",
"won't",
"wouldn",
"wouldn't",
"y",
"you",
"you'd",
"you'll",
"you're",
"you've",
"your",
"yours",
"yourself",
"yourselves",
]
ENGLISH_STOPWORDS_SET = frozenset(ENGLISH_STOPWORDS)
def strip_stopwords(text: str) -> list[str]:
"""Remove English stopwords from text.
Matching is case-insensitive and ignores leading/trailing punctuation
on each word. Internal punctuation (like apostrophes in contractions)
is preserved for matching, so "you're" matches the stopword "you're"
but "youre" would not.
"""
words = text.split()
result = []
for word in words:
# Strip leading/trailing punctuation to get the core word for comparison
# This preserves internal punctuation like apostrophes
core = re.sub(r"^[^\w']+|[^\w']+$", "", word)
if core.lower() not in ENGLISH_STOPWORDS_SET:
result.append(word)
return result

View File

@@ -0,0 +1,287 @@
# Discord Bot Multitenant Architecture
This document analyzes how the Discord cache manager and API client coordinate to handle multitenant API keys from a single Discord client.
## Overview
The Discord bot uses a **single-client, multi-tenant** architecture where one `OnyxDiscordClient` instance serves multiple tenants (organizations) simultaneously. Tenant isolation is achieved through:
- **Cache Manager**: Maps Discord guilds to tenants and stores per-tenant API keys
- **API Client**: Stateless HTTP client that accepts dynamic API keys per request
```
┌─────────────────────────────────────────────────────────────────────┐
│ OnyxDiscordClient │
│ │
│ ┌─────────────────────────┐ ┌─────────────────────────────┐ │
│ │ DiscordCacheManager │ │ OnyxAPIClient │ │
│ │ │ │ │ │
│ │ guild_id → tenant_id │───▶│ send_chat_message( │ │
│ │ tenant_id → api_key │ │ message, │ │
│ │ │ │ api_key=<per-tenant>, │ │
│ └─────────────────────────┘ │ persona_id=... │ │
│ │ ) │ │
│ └─────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────┘
```
---
## Component Details
### 1. Cache Manager (`backend/onyx/onyxbot/discord/cache.py`)
The `DiscordCacheManager` maintains two critical in-memory mappings:
```python
class DiscordCacheManager:
_guild_tenants: dict[int, str] # guild_id → tenant_id
_api_keys: dict[str, str] # tenant_id → api_key
_lock: asyncio.Lock # Concurrency control
```
#### Key Responsibilities
| Function | Purpose |
|----------|---------|
| `get_tenant(guild_id)` | O(1) lookup: guild → tenant |
| `get_api_key(tenant_id)` | O(1) lookup: tenant → API key |
| `refresh_all()` | Full cache rebuild from database |
| `refresh_guild()` | Incremental update for single guild |
#### API Key Provisioning Strategy
API keys are **lazily provisioned** - only created when first needed:
```python
async def _load_tenant_data(self, tenant_id: str) -> tuple[list[int], str | None]:
needs_key = tenant_id not in self._api_keys
with get_session_with_tenant(tenant_id) as db:
# Load guild configs
configs = get_discord_bot_configs(db)
guild_ids = [c.guild_id for c in configs if c.enabled]
# Only provision API key if not already cached
api_key = None
if needs_key:
api_key = get_or_create_discord_service_api_key(db, tenant_id)
return guild_ids, api_key
```
This optimization avoids repeated database calls for API key generation.
#### Concurrency Control
All write operations acquire an async lock to prevent race conditions:
```python
async def refresh_all(self) -> None:
async with self._lock:
# Safe to modify _guild_tenants and _api_keys
for tenant_id in get_all_tenant_ids():
guild_ids, api_key = await self._load_tenant_data(tenant_id)
# Update mappings...
```
Read operations (`get_tenant`, `get_api_key`) are lock-free since Python dict lookups are atomic.
---
### 2. API Client (`backend/onyx/onyxbot/discord/api_client.py`)
The `OnyxAPIClient` is a **stateless async HTTP client** that communicates with Onyx API pods.
#### Key Design: Per-Request API Key Injection
```python
class OnyxAPIClient:
async def send_chat_message(
self,
message: str,
api_key: str, # Injected per-request
persona_id: int | None,
...
) -> ChatFullResponse:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}", # Tenant-specific auth
}
# Make request...
```
The client accepts `api_key` as a parameter to each method, enabling **dynamic tenant selection at request time**. This design allows a single client instance to serve multiple tenants:
```python
# Same client, different tenants
await api_client.send_chat_message(msg, api_key=key_for_tenant_1, ...)
await api_client.send_chat_message(msg, api_key=key_for_tenant_2, ...)
```
---
## Coordination Flow
### Message Processing Pipeline
When a Discord message arrives, the client coordinates cache and API client:
```python
async def on_message(self, message: Message) -> None:
guild_id = message.guild.id
# Step 1: Cache lookup - guild → tenant
tenant_id = self.cache.get_tenant(guild_id)
if not tenant_id:
return # Guild not registered
# Step 2: Cache lookup - tenant → API key
api_key = self.cache.get_api_key(tenant_id)
if not api_key:
logger.warning(f"No API key for tenant {tenant_id}")
return
# Step 3: API call with tenant-specific credentials
await process_chat_message(
message=message,
api_key=api_key, # Tenant-specific
persona_id=persona_id, # Tenant-specific
api_client=self.api_client,
)
```
### Startup Sequence
```python
async def setup_hook(self) -> None:
# 1. Initialize API client (create aiohttp session)
await self.api_client.initialize()
# 2. Populate cache with all tenants
await self.cache.refresh_all()
# 3. Start background refresh task
self._cache_refresh_task = self.loop.create_task(
self._periodic_cache_refresh() # Every 60 seconds
)
```
### Shutdown Sequence
```python
async def close(self) -> None:
# 1. Cancel background refresh
if self._cache_refresh_task:
self._cache_refresh_task.cancel()
# 2. Close Discord connection
await super().close()
# 3. Close API client session
await self.api_client.close()
# 4. Clear cache
self.cache.clear()
```
---
## Tenant Isolation Mechanisms
### 1. Per-Tenant API Keys
Each tenant has a dedicated service API key:
```python
# backend/onyx/db/discord_bot.py
def get_or_create_discord_service_api_key(db_session: Session, tenant_id: str) -> str:
existing = get_discord_service_api_key(db_session)
if existing:
return regenerate_key(existing)
# Create LIMITED role key (chat-only permissions)
return insert_api_key(
db_session=db_session,
api_key_args=APIKeyArgs(
name=DISCORD_SERVICE_API_KEY_NAME,
role=UserRole.LIMITED, # Minimal permissions
),
user_id=None, # Service account (system-owned)
).api_key
```
### 2. Database Context Variables
The cache uses context variables for proper tenant-scoped DB sessions:
```python
context_token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
with get_session_with_tenant(tenant_id) as db:
# All DB operations scoped to this tenant
...
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(context_token)
```
### 3. Enterprise Gating Support
Gated tenants are filtered during cache refresh:
```python
gated_tenants = fetch_ee_implementation_or_noop(
"onyx.server.tenants.product_gating",
"get_gated_tenants",
set(),
)()
for tenant_id in get_all_tenant_ids():
if tenant_id in gated_tenants:
continue # Skip gated tenants
```
---
## Cache Refresh Strategy
| Trigger | Method | Scope |
|---------|--------|-------|
| Startup | `refresh_all()` | All tenants |
| Periodic (60s) | `refresh_all()` | All tenants |
| Guild registration | `refresh_guild()` | Single tenant |
### Error Handling
- **Tenant-level errors**: Logged and skipped (doesn't stop other tenants)
- **Missing API key**: Bot silently ignores messages from that guild
- **Network errors**: Logged, cache continues with stale data until next refresh
---
## Key Design Insights
1. **Single Client, Multiple Tenants**: One `OnyxAPIClient` and one `DiscordCacheManager` instance serves all tenants via dynamic API key injection.
2. **Cache-First Architecture**: Guild lookups are O(1) in-memory; API keys are cached after first provisioning to avoid repeated DB calls.
3. **Graceful Degradation**: If an API key is missing or stale, the bot simply doesn't respond (no crash or error propagation).
4. **Thread Safety Without Blocking**: `asyncio.Lock` prevents race conditions while maintaining async concurrency for reads.
5. **Lazy Provisioning**: API keys are only created when first needed, then cached for performance.
6. **Stateless API Client**: The HTTP client holds no tenant state - all tenant context is injected per-request via the `api_key` parameter.
---
## File References
| Component | Path |
|-----------|------|
| Cache Manager | `backend/onyx/onyxbot/discord/cache.py` |
| API Client | `backend/onyx/onyxbot/discord/api_client.py` |
| Discord Client | `backend/onyx/onyxbot/discord/client.py` |
| API Key DB Operations | `backend/onyx/db/discord_bot.py` |
| Cache Manager Tests | `backend/tests/unit/onyx/onyxbot/discord/test_cache_manager.py` |
| API Client Tests | `backend/tests/unit/onyx/onyxbot/discord/test_api_client.py` |

View File

@@ -592,11 +592,8 @@ def build_slack_response_blocks(
)
citations_blocks = []
document_blocks = []
if answer.citation_info:
citations_blocks = _build_citations_blocks(answer)
else:
document_blocks = _priority_ordered_documents_blocks(answer)
citations_divider = [DividerBlock()] if citations_blocks else []
buttons_divider = [DividerBlock()] if web_follow_up_block or follow_up_block else []
@@ -608,7 +605,6 @@ def build_slack_response_blocks(
+ ai_feedback_block
+ citations_divider
+ citations_blocks
+ document_blocks
+ buttons_divider
+ web_follow_up_block
+ follow_up_block

View File

@@ -1,65 +1,270 @@
from mistune import Markdown # type: ignore[import-untyped]
from mistune import Renderer
import re
from collections.abc import Callable
from typing import Any
from mistune import create_markdown
from mistune import HTMLRenderer
# Tags that should be replaced with a newline (line-break and block-level elements)
_HTML_NEWLINE_TAG_PATTERN = re.compile(
r"<br\s*/?>|</(?:p|div|li|h[1-6]|tr|blockquote|section|article)>",
re.IGNORECASE,
)
# Strips HTML tags but excludes autolinks like <https://...> and <mailto:...>
_HTML_TAG_PATTERN = re.compile(
r"<(?!https?://|mailto:)/?[a-zA-Z][^>]*>",
)
# Matches fenced code blocks (``` ... ```) so we can skip sanitization inside them
_FENCED_CODE_BLOCK_PATTERN = re.compile(r"```[\s\S]*?```")
# Matches the start of any markdown link: [text]( or [[n]](
# The inner group handles nested brackets for citation links like [[1]](.
_MARKDOWN_LINK_PATTERN = re.compile(r"\[(?:[^\[\]]|\[[^\]]*\])*\]\(")
# Matches Slack-style links <url|text> that LLMs sometimes output directly.
# Mistune doesn't recognise this syntax, so text() would escape the angle
# brackets and Slack would render them as literal text instead of links.
_SLACK_LINK_PATTERN = re.compile(r"<(https?://[^|>]+)\|([^>]+)>")
def _sanitize_html(text: str) -> str:
"""Strip HTML tags from a text fragment.
Block-level closing tags and <br> are converted to newlines.
All other HTML tags are removed. Autolinks (<https://...>) are preserved.
"""
text = _HTML_NEWLINE_TAG_PATTERN.sub("\n", text)
text = _HTML_TAG_PATTERN.sub("", text)
return text
def _transform_outside_code_blocks(
message: str, transform: Callable[[str], str]
) -> str:
"""Apply *transform* only to text outside fenced code blocks."""
parts = _FENCED_CODE_BLOCK_PATTERN.split(message)
code_blocks = _FENCED_CODE_BLOCK_PATTERN.findall(message)
result: list[str] = []
for i, part in enumerate(parts):
result.append(transform(part))
if i < len(code_blocks):
result.append(code_blocks[i])
return "".join(result)
def _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:
"""Extract markdown link destination, allowing nested parentheses in the URL."""
depth = 0
i = start_idx
while i < len(message):
curr = message[i]
if curr == "\\":
i += 2
continue
if curr == "(":
depth += 1
elif curr == ")":
if depth == 0:
return message[start_idx:i], i
depth -= 1
i += 1
return message[start_idx:], None
def _normalize_link_destinations(message: str) -> str:
"""Wrap markdown link URLs in angle brackets so the parser handles special chars safely.
Markdown link syntax [text](url) breaks when the URL contains unescaped
parentheses, spaces, or other special characters. Wrapping the URL in angle
brackets — [text](<url>) — tells the parser to treat everything inside as
a literal URL. This applies to all links, not just citations.
"""
if "](" not in message:
return message
normalized_parts: list[str] = []
cursor = 0
while match := _MARKDOWN_LINK_PATTERN.search(message, cursor):
normalized_parts.append(message[cursor : match.end()])
destination_start = match.end()
destination, end_idx = _extract_link_destination(message, destination_start)
if end_idx is None:
normalized_parts.append(message[destination_start:])
return "".join(normalized_parts)
already_wrapped = destination.startswith("<") and destination.endswith(">")
if destination and not already_wrapped:
destination = f"<{destination}>"
normalized_parts.append(destination)
normalized_parts.append(")")
cursor = end_idx + 1
normalized_parts.append(message[cursor:])
return "".join(normalized_parts)
def _convert_slack_links_to_markdown(message: str) -> str:
"""Convert Slack-style <url|text> links to standard markdown [text](url).
LLMs sometimes emit Slack mrkdwn link syntax directly. Mistune doesn't
recognise it, so the angle brackets would be escaped by text() and Slack
would render the link as literal text instead of a clickable link.
"""
return _transform_outside_code_blocks(
message, lambda text: _SLACK_LINK_PATTERN.sub(r"[\2](\1)", text)
)
def format_slack_message(message: str | None) -> str:
return Markdown(renderer=SlackRenderer()).render(message)
if message is None:
return ""
message = _transform_outside_code_blocks(message, _sanitize_html)
message = _convert_slack_links_to_markdown(message)
normalized_message = _normalize_link_destinations(message)
md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough", "table"])
result = md(normalized_message)
# With HTMLRenderer, result is always str (not AST list)
assert isinstance(result, str)
return result.rstrip("\n")
class SlackRenderer(Renderer):
class SlackRenderer(HTMLRenderer):
"""Renders markdown as Slack mrkdwn format instead of HTML.
Overrides all HTMLRenderer methods that produce HTML tags to ensure
no raw HTML ever appears in Slack messages.
"""
SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}
def __init__(self) -> None:
super().__init__()
self._table_headers: list[str] = []
self._current_row_cells: list[str] = []
def escape_special(self, text: str) -> str:
for special, replacement in self.SPECIALS.items():
text = text.replace(special, replacement)
return text
def header(self, text: str, level: int, raw: str | None = None) -> str:
return f"*{text}*\n"
def heading(self, text: str, level: int, **attrs: Any) -> str: # noqa: ARG002
return f"*{text}*\n\n"
def emphasis(self, text: str) -> str:
return f"_{text}_"
def double_emphasis(self, text: str) -> str:
def strong(self, text: str) -> str:
return f"*{text}*"
def strikethrough(self, text: str) -> str:
return f"~{text}~"
def list(self, body: str, ordered: bool = True) -> str:
lines = body.split("\n")
def list(self, text: str, ordered: bool, **attrs: Any) -> str:
lines = text.split("\n")
count = 0
for i, line in enumerate(lines):
if line.startswith("li: "):
count += 1
prefix = f"{count}. " if ordered else ""
lines[i] = f"{prefix}{line[4:]}"
return "\n".join(lines)
return "\n".join(lines) + "\n"
def list_item(self, text: str) -> str:
return f"li: {text}\n"
def link(self, link: str, title: str | None, content: str | None) -> str:
escaped_link = self.escape_special(link)
if content:
return f"<{escaped_link}|{content}>"
def link(self, text: str, url: str, title: str | None = None) -> str:
escaped_url = self.escape_special(url)
if text:
return f"<{escaped_url}|{text}>"
if title:
return f"<{escaped_link}|{title}>"
return f"<{escaped_link}>"
return f"<{escaped_url}|{title}>"
return f"<{escaped_url}>"
def image(self, src: str, title: str | None, text: str | None) -> str:
escaped_src = self.escape_special(src)
def image(self, text: str, url: str, title: str | None = None) -> str:
escaped_url = self.escape_special(url)
display_text = title or text
return f"<{escaped_src}|{display_text}>" if display_text else f"<{escaped_src}>"
return f"<{escaped_url}|{display_text}>" if display_text else f"<{escaped_url}>"
def codespan(self, text: str) -> str:
return f"`{text}`"
def block_code(self, text: str, lang: str | None) -> str:
return f"```\n{text}\n```\n"
def block_code(self, code: str, info: str | None = None) -> str: # noqa: ARG002
return f"```\n{code.rstrip(chr(10))}\n```\n\n"
def linebreak(self) -> str:
return "\n"
def thematic_break(self) -> str:
return "---\n\n"
def block_quote(self, text: str) -> str:
lines = text.strip().split("\n")
quoted = "\n".join(f">{line}" for line in lines)
return quoted + "\n\n"
def block_html(self, html: str) -> str:
return _sanitize_html(html) + "\n\n"
def block_error(self, text: str) -> str:
return f"```\n{text}\n```\n\n"
def text(self, text: str) -> str:
# Only escape the three entities Slack recognizes: & < >
# HTMLRenderer.text() also escapes " to &quot; which Slack renders
# as literal &quot; text since Slack doesn't recognize that entity.
return self.escape_special(text)
# -- Table rendering (converts markdown tables to vertical cards) --
def table_cell(
self, text: str, align: str | None = None, head: bool = False # noqa: ARG002
) -> str:
if head:
self._table_headers.append(text.strip())
else:
self._current_row_cells.append(text.strip())
return ""
def table_head(self, text: str) -> str: # noqa: ARG002
self._current_row_cells = []
return ""
def table_row(self, text: str) -> str: # noqa: ARG002
cells = self._current_row_cells
self._current_row_cells = []
# First column becomes the bold title, remaining columns are bulleted fields
lines: list[str] = []
if cells:
title = cells[0]
if title:
# Avoid double-wrapping if cell already contains bold markup
if title.startswith("*") and title.endswith("*") and len(title) > 1:
lines.append(title)
else:
lines.append(f"*{title}*")
for i, cell in enumerate(cells[1:], start=1):
if i < len(self._table_headers):
lines.append(f"{self._table_headers[i]}: {cell}")
else:
lines.append(f"{cell}")
return "\n".join(lines) + "\n\n"
def table_body(self, text: str) -> str:
return text
def table(self, text: str) -> str:
self._table_headers = []
self._current_row_cells = []
return text + "\n"
def paragraph(self, text: str) -> str:
return f"{text}\n"
def autolink(self, link: str, is_email: bool) -> str:
return link if is_email else self.link(link, None, None)
return f"{text}\n\n"

View File

@@ -32,6 +32,9 @@ from onyx.configs.constants import MessageType
from onyx.configs.constants import OnyxRedisLocks
from onyx.configs.onyxbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
from onyx.connectors.slack.utils import expert_info_from_slack_id
from onyx.context.search.retrieval.search_runner import (
download_nltk_data,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.sql_engine import get_session_with_tenant
from onyx.db.engine.sql_engine import SqlEngine
@@ -1126,6 +1129,9 @@ if __name__ == "__main__":
set_is_ee_based_on_env_variable()
logger.info("Verifying query preprocessing (NLTK) data is downloaded")
download_nltk_data()
try:
# Keep the main thread alive
while tenant_handler.running:

View File

@@ -96,7 +96,7 @@ ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."
CHAT_NAMING_SYSTEM_PROMPT = """
Given the conversation history, provide a SHORT name for the conversation. Focus the name on the important keywords to convey the topic of the conversation. \
Make sure the name is in the same language as the user's first message.
Make sure the name is in the same language as the user's language.
IMPORTANT: DO NOT OUTPUT ANYTHING ASIDE FROM THE NAME. MAKE IT AS CONCISE AS POSSIBLE. NEVER USE MORE THAN 5 WORDS, LESS IS FINE.
""".strip()

View File

@@ -19,7 +19,7 @@ If you need to ask questions, follow these guidelines:
- Be concise and do not ask more than 5 questions.
- If there are ambiguous terms or questions, ask the user to clarify.
- Your questions should be a numbered list for clarity.
- Respond in the same language as the user's query.
- Respond in the user's language.
- Make sure to gather all the information needed to carry out the research task in a concise, well-structured manner.{{internal_search_clarification_guidance}}
- Wrap up with a quick sentence on what the clarification will help with, it's ok to reference the user query closely here.
""".strip()
@@ -44,9 +44,9 @@ For context, the date is {current_datetime}.
The research plan should be formatted as a numbered list of steps and have 6 or less individual steps.
Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps. The plan should be in the same language as the user's query.
Each step should be a standalone exploration question or topic that can be researched independently but may build on previous steps.
Output only the numbered list of steps with no additional prefix or suffix.
Output only the numbered list of steps with no additional prefix or suffix. Respond in the user's language.
""".strip()
@@ -76,11 +76,10 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, other research agents, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.
@@ -130,7 +129,7 @@ For context, the date is {current_datetime}.
Users have explicitly selected the deep research mode and will expect a long and detailed answer. It is ok and encouraged that your response is several pages long.
You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible.
You use different text styles and formatting to make the response easier to read. You may use markdown rarely when necessary to make the response more digestible. Respond in the user's language.
Not every fact retrieved will be relevant to the user's query.
@@ -166,11 +165,10 @@ You have currently used {{current_cycle_count}} of {{max_cycles}} max research c
## {RESEARCH_AGENT_TOOL_NAME}
The research task provided to the {RESEARCH_AGENT_TOOL_NAME} should be reasonably high level with a clear direction for investigation. \
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation. \
The research task should be in the same language as the overall research plan.
It should not be a single short query, rather it should be 1 (or 2 if necessary) descriptive sentences that outline the direction of the investigation.
CRITICAL - the {RESEARCH_AGENT_TOOL_NAME} only receives the task and has no additional context about the user's query, research plan, or message history. \
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}.{{internal_search_research_task_guidance}}
You absolutely must provide all of the context needed to complete the task in the argument to the {RESEARCH_AGENT_TOOL_NAME}. The research task should be in the user's language.{{internal_search_research_task_guidance}}
You should call the {RESEARCH_AGENT_TOOL_NAME} MANY times before completing with the {GENERATE_REPORT_TOOL_NAME} tool.

View File

@@ -48,7 +48,7 @@ Do not use the "site:" operator in your web search queries.
OPEN_URLS_GUIDANCE = """
## open_url
Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your web searches or user specified URLs.
Use the `open_url` tool to read the content of one or more URLs. Use this tool to access the contents of the most promising web pages from your searches.
You can open many URLs at once by passing multiple URLs in the array if multiple pages seem promising. Prioritize the most promising pages and reputable sources.
You should almost always use open_url after a web_search call. Use this tool when a user asks about a specific provided URL.
"""

View File

@@ -109,6 +109,7 @@ class TenantRedis(redis.Redis):
"unlock",
"get",
"set",
"setex",
"delete",
"exists",
"incrby",

View File

@@ -0,0 +1,24 @@
input_prompts:
- id: -5
prompt: "Elaborate"
content: "Elaborate on the above, give me a more in depth explanation."
active: true
is_public: true
- id: -4
prompt: "Reword"
content: "Help me rewrite the following politely and concisely for professional communication:\n"
active: true
is_public: true
- id: -3
prompt: "Email"
content: "Write a professional email for me including a subject line, signature, etc. Template the parts that need editing with [ ]. The email should cover the following points:\n"
active: true
is_public: true
- id: -2
prompt: "Debug"
content: "Provide step-by-step troubleshooting instructions for the following issue:\n"
active: true
is_public: true

View File

@@ -0,0 +1,40 @@
import yaml
from sqlalchemy.orm import Session
from onyx.configs.chat_configs import INPUT_PROMPT_YAML
from onyx.db.input_prompt import insert_input_prompt_if_not_exists
from onyx.utils.logger import setup_logger
logger = setup_logger()
def load_input_prompts_from_yaml(
db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML
) -> None:
with open(input_prompts_yaml, "r") as file:
data = yaml.safe_load(file)
all_input_prompts = data.get("input_prompts", [])
for input_prompt in all_input_prompts:
# If these prompts are deleted (which is a hard delete in the DB), on server startup
# they will be recreated, but the user can always just deactivate them, just a light inconvenience
insert_input_prompt_if_not_exists(
user=None,
input_prompt_id=input_prompt.get("id"),
prompt=input_prompt["prompt"],
content=input_prompt["content"],
is_public=input_prompt["is_public"],
active=input_prompt.get("active", True),
db_session=db_session,
commit=True,
)
def load_chat_yamls(
db_session: Session,
input_prompts_yaml: str = INPUT_PROMPT_YAML,
) -> None:
"""Load all chat-related YAML configurations (such as the prompt shortcuts which are called input prompts on the backend)"""
load_input_prompts_from_yaml(db_session, input_prompts_yaml)

View File

@@ -32,7 +32,6 @@ def get_document_info(
db_session: Session = Depends(get_session),
) -> DocumentInfo:
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
user_acl_filters = build_access_filters_for_user(user, db_session)
@@ -77,7 +76,6 @@ def get_chunk_info(
db_session: Session = Depends(get_session),
) -> ChunkInfo:
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
user_acl_filters = build_access_filters_for_user(user, db_session)

View File

@@ -821,36 +821,20 @@ def _ensure_mcp_server_owner_or_admin(server: DbMCPServer, user: User | None) ->
def _db_mcp_server_to_api_mcp_server(
db_server: DbMCPServer,
db: Session,
request_user: User | None,
include_auth_config: bool = False,
db_server: DbMCPServer, email: str, db: Session, include_auth_config: bool = False
) -> MCPServer:
"""Convert database MCP server to API model"""
email = request_user.email if request_user else ""
# Check if user has authentication configured and extract credentials
auth_performer = db_server.auth_performer
user_authenticated: bool | None = None
user_credentials = None
admin_credentials = None
can_view_admin_credentials = bool(include_auth_config) and (
request_user is not None
and (
request_user.role == UserRole.ADMIN
or (request_user.email and request_user.email == db_server.owner)
)
)
if db_server.auth_type == MCPAuthenticationType.NONE:
user_authenticated = True # No auth required
elif auth_performer == MCPAuthenticationPerformer.ADMIN:
user_authenticated = db_server.admin_connection_config is not None
if (
can_view_admin_credentials
and db_server.admin_connection_config is not None
and include_auth_config
):
if include_auth_config and db_server.admin_connection_config is not None:
if db_server.auth_type == MCPAuthenticationType.API_TOKEN:
admin_credentials = {
"api_key": db_server.admin_connection_config.config["headers"][
@@ -906,12 +890,11 @@ def _db_mcp_server_to_api_mcp_server(
if client_info:
if not client_info.client_id or not client_info.client_secret:
raise ValueError("Stored client info had empty client ID or secret")
if can_view_admin_credentials:
admin_credentials = {
"client_id": client_info.client_id,
"client_secret": client_info.client_secret,
}
elif can_view_admin_credentials:
admin_credentials = {
"client_id": client_info.client_id,
"client_secret": client_info.client_secret,
}
else:
admin_credentials = {}
logger.warning(f"No client info found for server {db_server.name}")
@@ -978,13 +961,14 @@ def get_mcp_servers_for_assistant(
logger.info(f"Fetching MCP servers for assistant: {assistant_id}")
email = user.email if user else ""
try:
persona_id = int(assistant_id)
db_mcp_servers = get_mcp_servers_for_persona(persona_id, db, user)
# Convert to API model format with opportunistic token refresh for OAuth
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
_db_mcp_server_to_api_mcp_server(db_server, email, db)
for db_server in db_mcp_servers
]
@@ -997,25 +981,6 @@ def get_mcp_servers_for_assistant(
raise HTTPException(status_code=500, detail="Failed to fetch MCP servers")
@router.get("/servers", response_model=MCPServersResponse)
def get_mcp_servers_for_user(
db: Session = Depends(get_session),
user: User | None = Depends(current_user),
) -> MCPServersResponse:
"""List all MCP servers for use in agent configuration and chat UI.
This endpoint is intentionally available to all authenticated users so they
can attach MCP actions to assistants. Sensitive admin credentials are never
returned.
"""
db_mcp_servers = get_all_mcp_servers(db)
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
for db_server in db_mcp_servers
]
return MCPServersResponse(mcp_servers=mcp_servers)
def _get_connection_config(
mcp_server: DbMCPServer, is_admin: bool, user: User | None, db_session: Session
) -> MCPConnectionConfig | None:
@@ -1563,6 +1528,8 @@ def get_mcp_server_detail(
_ensure_mcp_server_owner_or_admin(server, user)
email = user.email if user else ""
# TODO: user permissions per mcp server not yet implemented, for now
# permissions are based on access to assistants
# # Quick permission check admin or user has access
@@ -1570,10 +1537,7 @@ def get_mcp_server_detail(
# raise HTTPException(status_code=403, detail="Forbidden")
return _db_mcp_server_to_api_mcp_server(
server,
db_session,
include_auth_config=True,
request_user=user,
server, email, db_session, include_auth_config=True
)
@@ -1632,12 +1596,13 @@ def get_mcp_servers_for_admin(
logger.info("Fetching all MCP servers for admin display")
email = user.email if user else ""
try:
db_mcp_servers = get_all_mcp_servers(db)
# Convert to API model format
mcp_servers = [
_db_mcp_server_to_api_mcp_server(db_server, db, request_user=user)
_db_mcp_server_to_api_mcp_server(db_server, email, db)
for db_server in db_mcp_servers
]
@@ -1880,9 +1845,7 @@ def update_mcp_server_simple(
db_session.commit()
# Return the updated server in API format
return _db_mcp_server_to_api_mcp_server(
updated_server, db_session, request_user=user
)
return _db_mcp_server_to_api_mcp_server(updated_server, user.email, db_session)
@admin_router.delete("/server/{server_id}")

View File

@@ -13,7 +13,6 @@ from onyx.configs.app_configs import PASSWORD_MIN_LENGTH
from onyx.configs.constants import DEV_VERSION_PATTERN
from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.constants import STABLE_VERSION_PATTERN
from onyx.db.auth import get_user_count
from onyx.server.manage.models import AllVersions
from onyx.server.manage.models import AuthTypeResponse
from onyx.server.manage.models import ContainerVersions
@@ -29,14 +28,12 @@ def healthcheck() -> StatusResponse:
@router.get("/auth/type", tags=PUBLIC_API_TAGS)
async def get_auth_type() -> AuthTypeResponse:
user_count = await get_user_count()
def get_auth_type() -> AuthTypeResponse:
return AuthTypeResponse(
auth_type=AUTH_TYPE,
requires_verification=user_needs_to_be_verified(),
anonymous_user_enabled=anonymous_user_enabled(),
password_min_length=PASSWORD_MIN_LENGTH,
has_users=user_count > 0,
)

View File

@@ -410,26 +410,20 @@ def list_llm_provider_basics(
all_providers = fetch_existing_llm_providers(db_session)
user_group_ids = fetch_user_group_ids(db_session, user) if user else set()
is_admin = user and user.role == UserRole.ADMIN
is_admin = user is not None and user.role == UserRole.ADMIN
accessible_providers = []
for provider in all_providers:
# Include all public providers
if provider.is_public:
accessible_providers.append(LLMProviderDescriptor.from_model(provider))
continue
# Include restricted providers user has access to via groups
if is_admin:
# Admins see all providers
accessible_providers.append(LLMProviderDescriptor.from_model(provider))
elif provider.groups:
# User must be in at least one of the provider's groups
if user_group_ids.intersection({g.id for g in provider.groups}):
accessible_providers.append(LLMProviderDescriptor.from_model(provider))
elif not provider.personas:
# No restrictions = accessible
# Use centralized access control logic with persona=None since we're
# listing providers without a specific persona context. This correctly:
# - Includes all public providers
# - Includes providers user can access via group membership
# - Excludes persona-only restricted providers (requires specific persona)
# - Excludes non-public providers with no restrictions (admin-only)
if can_user_access_llm_provider(
provider, user_group_ids, persona=None, is_admin=is_admin
):
accessible_providers.append(LLMProviderDescriptor.from_model(provider))
end_time = datetime.now(timezone.utc)

View File

@@ -44,8 +44,6 @@ class AuthTypeResponse(BaseModel):
requires_verification: bool
anonymous_user_enabled: bool | None = None
password_min_length: int
# whether there are any users in the system
has_users: bool = True
class UserSpecificAssistantPreference(BaseModel):
@@ -67,7 +65,6 @@ class UserPreferences(BaseModel):
auto_scroll: bool | None = None
temperature_override_enabled: bool | None = None
theme_preference: ThemePreference | None = None
chat_background: str | None = None
# controls which tools are enabled for the user for a specific assistant
assistant_specific_configs: UserSpecificAssistantPreferences | None = None
@@ -139,7 +136,6 @@ class UserInfo(BaseModel):
auto_scroll=user.auto_scroll,
temperature_override_enabled=user.temperature_override_enabled,
theme_preference=user.theme_preference,
chat_background=user.chat_background,
assistant_specific_configs=assistant_specific_configs,
)
),
@@ -203,10 +199,6 @@ class ThemePreferenceRequest(BaseModel):
theme_preference: ThemePreference
class ChatBackgroundRequest(BaseModel):
chat_background: str | None
class PersonalizationUpdateRequest(BaseModel):
name: str | None = None
role: str | None = None

View File

@@ -6,25 +6,33 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.models import IndexModelStatus
from onyx.db.models import User
from onyx.db.search_settings import create_search_settings
from onyx.db.search_settings import delete_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_embedding_provider_from_provider_type
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.document_index_utils import get_multipass_config
from onyx.document_index.factory import get_default_document_index
from onyx.file_processing.unstructured import delete_unstructured_api_key
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import update_unstructured_api_key
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
from onyx.server.manage.models import FullModelVersionResponse
from onyx.server.models import IdReturn
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MULTI_TENANT
router = APIRouter(prefix="/search-settings")
@@ -40,97 +48,91 @@ def set_new_search_settings(
"""Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
Gives an error if the same model name is used as the current or secondary index
"""
# TODO(andrei): Re-enable.
logger.error("Setting new search settings is temporarily disabled.")
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="Setting new search settings is temporarily disabled.",
if search_settings_new.index_name:
logger.warning("Index name was specified by request, this is not suggested")
# Disallow contextual RAG for cloud deployments
if MULTI_TENANT and search_settings_new.enable_contextual_rag:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Contextual RAG disabled in Onyx Cloud",
)
# Validate cloud provider exists or create new LiteLLM provider
if search_settings_new.provider_type is not None:
cloud_provider = get_embedding_provider_from_provider_type(
db_session, provider_type=search_settings_new.provider_type
)
if cloud_provider is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
)
search_settings = get_current_search_settings(db_session)
if search_settings_new.index_name is None:
# We define index name here
index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
if (
search_settings_new.model_name == search_settings.model_name
and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
):
index_name += ALT_INDEX_SUFFIX
search_values = search_settings_new.model_dump()
search_values["index_name"] = index_name
new_search_settings_request = SavedSearchSettings(**search_values)
else:
new_search_settings_request = SavedSearchSettings(
**search_settings_new.model_dump()
)
secondary_search_settings = get_secondary_search_settings(db_session)
if secondary_search_settings:
# Cancel any background indexing jobs
expire_index_attempts(
search_settings_id=secondary_search_settings.id, db_session=db_session
)
# Mark previous model as a past model directly
update_search_settings_status(
search_settings=secondary_search_settings,
new_status=IndexModelStatus.PAST,
db_session=db_session,
)
new_search_settings = create_search_settings(
search_settings=new_search_settings_request, db_session=db_session
)
# if search_settings_new.index_name:
# logger.warning("Index name was specified by request, this is not suggested")
# # Disallow contextual RAG for cloud deployments
# if MULTI_TENANT and search_settings_new.enable_contextual_rag:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail="Contextual RAG disabled in Onyx Cloud",
# )
# Ensure Vespa has the new index immediately
get_multipass_config(search_settings)
get_multipass_config(new_search_settings)
document_index = get_default_document_index(search_settings, new_search_settings)
# # Validate cloud provider exists or create new LiteLLM provider
# if search_settings_new.provider_type is not None:
# cloud_provider = get_embedding_provider_from_provider_type(
# db_session, provider_type=search_settings_new.provider_type
# )
document_index.ensure_indices_exist(
primary_embedding_dim=search_settings.final_embedding_dim,
primary_embedding_precision=search_settings.embedding_precision,
secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
secondary_index_embedding_precision=new_search_settings.embedding_precision,
)
# if cloud_provider is None:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
# )
# Pause index attempts for the currently in use index to preserve resources
if DISABLE_INDEX_UPDATE_ON_SWAP:
expire_index_attempts(
search_settings_id=search_settings.id, db_session=db_session
)
for cc_pair in get_connector_credential_pairs(db_session):
resync_cc_pair(
cc_pair=cc_pair,
search_settings_id=new_search_settings.id,
db_session=db_session,
)
# search_settings = get_current_search_settings(db_session)
# if search_settings_new.index_name is None:
# # We define index name here
# index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
# if (
# search_settings_new.model_name == search_settings.model_name
# and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
# ):
# index_name += ALT_INDEX_SUFFIX
# search_values = search_settings_new.model_dump()
# search_values["index_name"] = index_name
# new_search_settings_request = SavedSearchSettings(**search_values)
# else:
# new_search_settings_request = SavedSearchSettings(
# **search_settings_new.model_dump()
# )
# secondary_search_settings = get_secondary_search_settings(db_session)
# if secondary_search_settings:
# # Cancel any background indexing jobs
# expire_index_attempts(
# search_settings_id=secondary_search_settings.id, db_session=db_session
# )
# # Mark previous model as a past model directly
# update_search_settings_status(
# search_settings=secondary_search_settings,
# new_status=IndexModelStatus.PAST,
# db_session=db_session,
# )
# new_search_settings = create_search_settings(
# search_settings=new_search_settings_request, db_session=db_session
# )
# # Ensure Vespa has the new index immediately
# get_multipass_config(search_settings)
# get_multipass_config(new_search_settings)
# document_index = get_default_document_index(search_settings, new_search_settings)
# document_index.ensure_indices_exist(
# primary_embedding_dim=search_settings.final_embedding_dim,
# primary_embedding_precision=search_settings.embedding_precision,
# secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
# secondary_index_embedding_precision=new_search_settings.embedding_precision,
# )
# # Pause index attempts for the currently in use index to preserve resources
# if DISABLE_INDEX_UPDATE_ON_SWAP:
# expire_index_attempts(
# search_settings_id=search_settings.id, db_session=db_session
# )
# for cc_pair in get_connector_credential_pairs(db_session):
# resync_cc_pair(
# cc_pair=cc_pair,
# search_settings_id=new_search_settings.id,
# db_session=db_session,
# )
# db_session.commit()
# return IdReturn(id=new_search_settings.id)
db_session.commit()
return IdReturn(id=new_search_settings.id)
@router.post("/cancel-new-embedding")

View File

@@ -56,7 +56,6 @@ from onyx.db.user_preferences import get_latest_access_token_for_user
from onyx.db.user_preferences import update_assistant_preferences
from onyx.db.user_preferences import update_user_assistant_visibility
from onyx.db.user_preferences import update_user_auto_scroll
from onyx.db.user_preferences import update_user_chat_background
from onyx.db.user_preferences import update_user_default_model
from onyx.db.user_preferences import update_user_personalization
from onyx.db.user_preferences import update_user_pinned_assistants
@@ -76,7 +75,6 @@ from onyx.server.documents.models import PaginatedReturn
from onyx.server.features.projects.models import UserFileSnapshot
from onyx.server.manage.models import AllUsersResponse
from onyx.server.manage.models import AutoScrollRequest
from onyx.server.manage.models import ChatBackgroundRequest
from onyx.server.manage.models import PersonalizationUpdateRequest
from onyx.server.manage.models import TenantInfo
from onyx.server.manage.models import TenantSnapshot
@@ -786,25 +784,6 @@ def update_user_theme_preference_api(
update_user_theme_preference(user.id, request.theme_preference, db_session)
@router.patch("/user/chat-background")
def update_user_chat_background_api(
request: ChatBackgroundRequest,
user: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
if user is None:
if AUTH_TYPE == AuthType.DISABLED:
store = get_kv_store()
no_auth_user = fetch_no_auth_user(store)
no_auth_user.preferences.chat_background = request.chat_background
set_no_auth_user_preferences(store, no_auth_user.preferences)
return
else:
raise RuntimeError("This should never happen")
update_user_chat_background(user.id, request.chat_background, db_session)
@router.patch("/user/default-model")
def update_user_default_model_api(
request: ChosenDefaultModelRequest,

View File

@@ -22,7 +22,7 @@ from onyx.db.models import User
from onyx.db.search_settings import get_active_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.indexing.adapters.document_indexing_adapter import (
DocumentIndexingBatchAdapter,
)
@@ -103,11 +103,9 @@ def upsert_ingestion_doc(
# Need to index for both the primary and secondary index if possible
active_search_settings = get_active_search_settings(db_session)
# This flow is for indexing so we get all indices.
document_indices = get_all_document_indices(
curr_doc_index = get_default_document_index(
active_search_settings.primary,
None,
None,
)
search_settings = get_current_search_settings(db_session)
@@ -130,7 +128,7 @@ def upsert_ingestion_doc(
indexing_pipeline_result = run_indexing_pipeline(
embedder=index_embedding_model,
document_indices=document_indices,
document_index=curr_doc_index,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -153,14 +151,13 @@ def upsert_ingestion_doc(
search_settings=sec_search_settings
)
# This flow is for indexing so we get all indices.
sec_document_indices = get_all_document_indices(
active_search_settings.secondary, None, None
sec_doc_index = get_default_document_index(
active_search_settings.secondary, None
)
run_indexing_pipeline(
embedder=new_index_embedding_model,
document_indices=sec_document_indices,
document_index=sec_doc_index,
ignore_time_skip=True,
db_session=db_session,
tenant_id=tenant_id,
@@ -195,18 +192,15 @@ def delete_ingestion_doc(
)
active_search_settings = get_active_search_settings(db_session)
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
doc_index = get_default_document_index(
active_search_settings.primary,
active_search_settings.secondary,
None,
)
for document_index in document_indices:
document_index.delete_single(
doc_id=document_id,
tenant_id=tenant_id,
chunk_count=document.chunk_count,
)
doc_index.delete_single(
doc_id=document_id,
tenant_id=tenant_id,
chunk_count=document.chunk_count,
)
# Delete from database
delete_documents_complete__no_commit(db_session, [document_id])

View File

@@ -58,6 +58,7 @@ from onyx.db.engine.sql_engine import get_session
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.feedback import create_chat_message_feedback
from onyx.db.feedback import remove_chat_message_feedback
from onyx.db.models import ChatSessionSharedStatus
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
@@ -266,7 +267,35 @@ def get_chat_session(
include_deleted=include_deleted,
)
except ValueError:
raise ValueError("Chat session does not exist or has been deleted")
try:
# If we failed to get a chat session, try to retrieve the session with
# less restrictive filters in order to identify what exactly mismatched
# so we can bubble up an accurate error code andmessage.
existing_chat_session = get_chat_session_by_id(
chat_session_id=session_id,
user_id=None,
db_session=db_session,
is_shared=False,
include_deleted=True,
)
except ValueError:
raise HTTPException(status_code=404, detail="Chat session not found")
if not include_deleted and existing_chat_session.deleted:
raise HTTPException(status_code=404, detail="Chat session has been deleted")
if is_shared:
if existing_chat_session.shared_status != ChatSessionSharedStatus.PUBLIC:
raise HTTPException(
status_code=403, detail="Chat session is not shared"
)
elif user_id is not None and existing_chat_session.user_id not in (
user_id,
None,
):
raise HTTPException(status_code=403, detail="Access denied")
raise HTTPException(status_code=404, detail="Chat session not found")
# for chat-seeding: if the session is unassigned, assign it now. This is done here
# to avoid another back and forth between FE -> BE before starting the first
@@ -530,30 +559,7 @@ def handle_new_chat_message(
return StreamingResponse(stream_generator(), media_type="text/event-stream")
@router.post(
"/send-chat-message",
response_model=ChatFullResponse,
tags=PUBLIC_API_TAGS,
responses={
200: {
"description": (
"If `stream=true`, returns `text/event-stream`.\n"
"If `stream=false`, returns `application/json` (ChatFullResponse)."
),
"content": {
"text/event-stream": {
"schema": {"type": "string"},
"examples": {
"stream": {
"summary": "Stream of NDJSON AnswerStreamPart's",
"value": "string",
}
},
},
},
}
},
)
@router.post("/send-chat-message", response_model=None, tags=PUBLIC_API_TAGS)
def handle_send_chat_message(
chat_message_req: SendMessageRequest,
request: Request,

View File

@@ -51,7 +51,6 @@ def admin_search(
tenant_id=tenant_id,
)
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
if not isinstance(document_index, VespaIndex):

View File

@@ -4,7 +4,6 @@ from typing import cast
from sqlalchemy.orm import Session
from onyx.chat.citation_utils import extract_citation_order_from_text
from onyx.configs.constants import MessageType
from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc
@@ -522,13 +521,6 @@ def translate_assistant_message_to_packets(
)
)
# Sort citations by order of appearance in message text
citation_order = extract_citation_order_from_text(chat_message.message or "")
order_map = {num: idx for idx, num in enumerate(citation_order)}
citation_info_list.sort(
key=lambda c: order_map.get(c.citation_number, float("inf"))
)
# Message comes after tool calls, with optional reasoning step beforehand
message_turn_index = max_tool_turn + 1
if chat_message.reasoning_tokens:

View File

@@ -6,6 +6,7 @@ from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
from onyx.configs.chat_configs import INPUT_PROMPT_YAML
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import KV_SEARCH_SETTINGS
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
@@ -13,6 +14,9 @@ from onyx.configs.embedding_configs import SupportedEmbeddingModel
from onyx.configs.model_configs import GEN_AI_API_KEY
from onyx.configs.model_configs import GEN_AI_MODEL_VERSION
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.retrieval.search_runner import (
download_nltk_data,
)
from onyx.db.connector import check_connectors_exist
from onyx.db.connector import create_initial_default_connector
from onyx.db.connector_credential_pair import associate_default_cc_pair
@@ -32,7 +36,7 @@ from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
@@ -42,6 +46,7 @@ from onyx.llm.constants import LlmProviderNames
from onyx.llm.well_known_providers.llm_provider_options import get_openai_model_names
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
from onyx.seeding.load_yamls import load_input_prompts_from_yaml
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.settings.store import load_settings
@@ -111,6 +116,9 @@ def setup_onyx(
f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}."
)
logger.notice("Verifying query preprocessing (NLTK) data is downloaded")
download_nltk_data()
# setup Postgres with default credential, llm providers, etc.
setup_postgres(db_session)
@@ -124,15 +132,13 @@ def setup_onyx(
# Ensure Vespa is setup correctly, this step is relatively near the end because Vespa
# takes a bit of time to start up
logger.notice("Verifying Document Index(s) is/are available.")
# This flow is for setting up the document index so we get all indices here.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
search_settings,
secondary_search_settings,
None,
)
success = setup_document_indices(
document_indices,
success = setup_vespa(
document_index,
IndexingSetting.from_db_model(search_settings),
(
IndexingSetting.from_db_model(secondary_search_settings)
@@ -141,9 +147,7 @@ def setup_onyx(
),
)
if not success:
raise RuntimeError(
"Could not connect to a document index within the specified timeout."
)
raise RuntimeError("Could not connect to Vespa within the specified timeout.")
logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}")
if search_settings.provider_type is None:
@@ -225,62 +229,44 @@ def mark_reindex_flag(db_session: Session) -> None:
kv_store.store(KV_REINDEX_KEY, False)
def setup_document_indices(
document_indices: list[DocumentIndex],
def setup_vespa(
document_index: DocumentIndex,
index_setting: IndexingSetting,
secondary_index_setting: IndexingSetting | None,
num_attempts: int = VESPA_NUM_ATTEMPTS_ON_STARTUP,
) -> bool:
"""Sets up all input document indices.
If any document index setup fails, the function will return False. Otherwise
returns True.
"""
for document_index in document_indices:
# Document index startup is a bit slow, so give it a few seconds.
WAIT_SECONDS = 5
document_index_setup_success = False
for x in range(num_attempts):
try:
logger.notice(
f"Setting up document index {document_index.__class__.__name__} (attempt {x+1}/{num_attempts})..."
)
document_index.ensure_indices_exist(
primary_embedding_dim=index_setting.final_embedding_dim,
primary_embedding_precision=index_setting.embedding_precision,
secondary_index_embedding_dim=(
secondary_index_setting.final_embedding_dim
if secondary_index_setting
else None
),
secondary_index_embedding_precision=(
secondary_index_setting.embedding_precision
if secondary_index_setting
else None
),
)
logger.notice(
f"Document index {document_index.__class__.__name__} setup complete."
)
document_index_setup_success = True
break
except Exception:
logger.exception(
f"Document index {document_index.__class__.__name__} setup did not succeed. "
"The relevant service may not be ready yet. "
f"Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
if not document_index_setup_success:
logger.error(
f"Document index {document_index.__class__.__name__} setup did not succeed. "
f"Attempt limit reached. ({num_attempts})"
# Vespa startup is a bit slow, so give it a few seconds
WAIT_SECONDS = 5
for x in range(num_attempts):
try:
logger.notice(f"Setting up Vespa (attempt {x+1}/{num_attempts})...")
document_index.ensure_indices_exist(
primary_embedding_dim=index_setting.final_embedding_dim,
primary_embedding_precision=index_setting.embedding_precision,
secondary_index_embedding_dim=(
secondary_index_setting.final_embedding_dim
if secondary_index_setting
else None
),
secondary_index_embedding_precision=(
secondary_index_setting.embedding_precision
if secondary_index_setting
else None
),
)
return False
return True
logger.notice("Vespa setup complete.")
return True
except Exception:
logger.exception(
f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
)
time.sleep(WAIT_SECONDS)
logger.error(
f"Vespa setup did not succeed. Attempt limit reached. ({num_attempts})"
)
return False
def setup_postgres(db_session: Session) -> None:
@@ -289,6 +275,10 @@ def setup_postgres(db_session: Session) -> None:
create_initial_default_connector(db_session)
associate_default_cc_pair(db_session)
# Load input prompts and user folders from YAML
logger.notice("Loading input prompts and user folders")
load_input_prompts_from_yaml(db_session, INPUT_PROMPT_YAML)
if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
# Only for dev flows
logger.notice("Setting up default OpenAI LLM for dev.")
@@ -357,8 +347,6 @@ def setup_multitenant_onyx() -> None:
def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool:
# TODO(andrei): We don't yet support OpenSearch for multi-tenant instances
# so this function remains unchanged.
# This is for local testing
WAIT_SECONDS = 5
VESPA_ATTEMPTS = 5

View File

@@ -60,7 +60,6 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
from onyx.tools.tool_runner import run_tool_calls
from onyx.tools.utils import generate_tools_description
@@ -432,14 +431,6 @@ def run_research_agent_call(
max_concurrent_tools=1,
# May be better to not do this step, hard to say, needs to be tested
skip_search_query_expansion=False,
url_snippet_map=extract_url_snippet_map(
[
search_doc
for tool_call in state_container.get_tool_calls()
if tool_call.search_docs
for search_doc in tool_call.search_docs
]
),
)
tool_responses = parallel_tool_call_results.tool_responses
citation_mapping = (
@@ -474,14 +465,8 @@ def run_research_agent_call(
)
search_docs = None
displayed_docs = None
if isinstance(tool_response.rich_response, SearchDocsResponse):
search_docs = tool_response.rich_response.search_docs
displayed_docs = tool_response.rich_response.displayed_docs
# Add ALL search docs to state container for DB persistence
if search_docs:
state_container.add_search_docs(search_docs)
# This is used for the Open URL reminder in the next cycle
# only do this if the web search tool yielded results
@@ -514,7 +499,7 @@ def run_research_agent_call(
or most_recent_reasoning,
tool_call_arguments=tool_call.tool_args,
tool_call_response=tool_response.llm_facing_response,
search_docs=displayed_docs or search_docs,
search_docs=search_docs,
generated_images=None,
)
state_container.add_tool_call(tool_call_info)

View File

@@ -36,15 +36,6 @@ class ToolCallException(Exception):
self.llm_facing_message = llm_facing_message
class ToolExecutionException(Exception):
"""Exception raise for errors during tool execution."""
def __init__(self, message: str, emit_error_packet: bool = False):
super().__init__(message)
self.emit_error_packet = emit_error_packet
class SearchToolUsage(str, Enum):
DISABLED = "disabled"
ENABLED = "enabled"
@@ -151,7 +142,6 @@ class OpenURLToolOverrideKwargs(BaseModel):
# To know what citation number to start at for constructing the string to the LLM
starting_citation_num: int
citation_mapping: dict[str, int]
url_snippet_map: dict[str, str]
# None indicates that the default value should be used

View File

@@ -19,6 +19,7 @@ from onyx.db.oauth_config import get_oauth_config
from onyx.db.search_settings import get_current_search_settings
from onyx.db.tools import get_builtin_tool
from onyx.document_index.factory import get_default_document_index
from onyx.document_index.interfaces import DocumentIndex
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMConfig
@@ -119,9 +120,18 @@ def construct_tools(
if user and user.oauth_accounts:
user_oauth_token = user.oauth_accounts[0].access_token
search_settings = get_current_search_settings(db_session)
# This flow is for search so we do not get all indices.
document_index = get_default_document_index(search_settings, None)
document_index_cache: DocumentIndex | None = None
search_settings_cache = None
def _get_document_index() -> DocumentIndex:
nonlocal document_index_cache, search_settings_cache
if document_index_cache is None:
if search_settings_cache is None:
search_settings_cache = get_current_search_settings(db_session)
document_index_cache = get_default_document_index(
search_settings_cache, None
)
return document_index_cache
added_search_tool = False
for db_tool_model in persona.tools:
@@ -164,7 +174,7 @@ def construct_tools(
user=user,
persona=persona,
llm=llm,
document_index=document_index,
document_index=_get_document_index(),
user_selected_filters=search_tool_config.user_selected_filters,
project_id=search_tool_config.project_id,
bypass_acl=search_tool_config.bypass_acl,
@@ -218,7 +228,7 @@ def construct_tools(
OpenURLTool(
tool_id=db_tool_model.id,
emitter=emitter,
document_index=document_index,
document_index=_get_document_index(),
user=user,
)
]
@@ -377,6 +387,9 @@ def construct_tools(
if not search_tool_config:
search_tool_config = SearchToolConfig()
search_settings = get_current_search_settings(db_session)
document_index = get_default_document_index(search_settings, None)
search_tool = SearchTool(
tool_id=search_tool_db_model.id,
db_session=db_session,

View File

@@ -23,7 +23,6 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.tools.interface import Tool
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.tool_implementations.images.models import (
FinalImageGenerationResponse,
@@ -189,9 +188,7 @@ class ImageGenerationTool(Tool[None]):
except requests.RequestException as e:
logger.error(f"Error fetching or converting image: {e}")
raise ToolExecutionException(
"Failed to fetch or convert the generated image", emit_error_packet=True
)
raise ValueError("Failed to fetch or convert the generated image")
except Exception as e:
logger.debug(f"Error occurred during image generation: {e}")
@@ -201,27 +198,18 @@ class ImageGenerationTool(Tool[None]):
"Your request was rejected as a result of our safety system"
in error_message
):
raise ToolExecutionException(
(
"The image generation request was rejected due to OpenAI's content policy. "
"Please try a different prompt."
),
emit_error_packet=True,
raise ValueError(
"The image generation request was rejected due to OpenAI's content policy. Please try a different prompt."
)
elif "Invalid image URL" in error_message:
raise ToolExecutionException(
"Invalid image URL provided for image generation.",
emit_error_packet=True,
)
raise ValueError("Invalid image URL provided for image generation.")
elif "invalid_request_error" in error_message:
raise ToolExecutionException(
"Invalid request for image generation. Please check your input.",
emit_error_packet=True,
raise ValueError(
"Invalid request for image generation. Please check your input."
)
raise ToolExecutionException(
f"An error occurred during image generation. error={error_message}",
emit_error_packet=True,
raise ValueError(
"An error occurred during image generation. Please try again later."
)
def run(

View File

@@ -492,7 +492,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
indexed_result, crawled_result = run_functions_tuples_in_parallel(
[
(_retrieve_indexed_with_filters, (all_requests,)),
(self._fetch_web_content, (urls, override_kwargs.url_snippet_map)),
(self._fetch_web_content, (urls,)),
],
allow_failures=True,
timeout=OPEN_URL_TIMEOUT_SECONDS,
@@ -800,7 +800,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
return merged_sections
def _fetch_web_content(
self, urls: list[str], url_snippet_map: dict[str, str]
self, urls: list[str]
) -> tuple[list[InferenceSection], list[str]]:
if not urls:
return [], []
@@ -831,11 +831,7 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
and content.full_content
and not is_insufficient
):
sections.append(
inference_section_from_internet_page_scrape(
content, url_snippet_map.get(content.link, "")
)
)
sections.append(inference_section_from_internet_page_scrape(content))
else:
# TODO: Slight improvement - if failed URL reasons are passed back to the LLM
# for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would

View File

@@ -1,239 +0,0 @@
import unicodedata
from pydantic import BaseModel
from rapidfuzz import fuzz
from rapidfuzz import utils
from onyx.utils.text_processing import is_zero_width_char
from onyx.utils.text_processing import normalize_char
class SnippetMatchResult(BaseModel):
snippet_located: bool
start_idx: int = -1
end_idx: int = -1
NegativeSnippetMatchResult = SnippetMatchResult(snippet_located=False)
def find_snippet_in_content(content: str, snippet: str) -> SnippetMatchResult:
"""
Finds where the snippet is located in the content.
Strategy:
1. Normalize the snippet & attempt to find it in the content
2. Perform a token based fuzzy search for the snippet in the content
Notes:
- If there are multiple matches of snippet, we choose the first normalised occurrence
"""
if not snippet or not content:
return NegativeSnippetMatchResult
result = _normalize_and_match(content, snippet)
if result.snippet_located:
return result
result = _token_based_match(content, snippet)
if result.snippet_located:
return result
return NegativeSnippetMatchResult
def _normalize_and_match(content: str, snippet: str) -> SnippetMatchResult:
"""
Normalizes the snippet & content, then performs a direct string match.
"""
normalized_content, content_map = _normalize_text_with_mapping(content)
normalized_snippet, url_snippet_map = _normalize_text_with_mapping(snippet)
if not normalized_content or not normalized_snippet:
return NegativeSnippetMatchResult
pos = normalized_content.find(normalized_snippet)
if pos != -1:
original_start = content_map[pos]
# Account for leading characters stripped from snippet during normalization
# (e.g., leading punctuation like "[![]![]]" that was removed)
if url_snippet_map:
first_snippet_orig_pos = url_snippet_map[0]
if first_snippet_orig_pos > 0:
# There were leading characters stripped from snippet
# Extend start position backwards to include them from content
original_start = max(original_start - first_snippet_orig_pos, 0)
# Determine end position, including any trailing characters that were
# normalized away (e.g., punctuation)
match_end_norm = pos + len(normalized_snippet)
if match_end_norm >= len(content_map):
# Match extends to end of normalized content - include all trailing chars
original_end = len(content) - 1
else:
# Match is in the middle - end at character before next normalized char
original_end = content_map[match_end_norm] - 1
# Account for trailing characters stripped from snippet during normalization
# (e.g., trailing punctuation like "\n[" that was removed)
if url_snippet_map:
last_snippet_orig_pos = url_snippet_map[-1]
trailing_stripped = len(snippet) - last_snippet_orig_pos - 1
if trailing_stripped > 0:
# Extend end position to include trailing characters from content
# that correspond to the stripped trailing snippet characters
original_end = min(original_end + trailing_stripped, len(content) - 1)
return SnippetMatchResult(
snippet_located=True,
start_idx=original_start,
end_idx=original_end,
)
return NegativeSnippetMatchResult
def _normalize_text_with_mapping(text: str) -> tuple[str, list[int]]:
"""
Text normalization that maintains position mapping.
Returns:
tuple: (normalized_text, position_map)
- position_map[i] gives the original position for normalized position i
"""
if not text:
return "", []
original_text = text
# Step 1: NFC normalization with position mapping
nfc_text = unicodedata.normalize("NFC", text)
# Build mapping from NFC positions to original start positions
nfc_to_orig: list[int] = []
orig_idx = 0
for nfc_char in nfc_text:
nfc_to_orig.append(orig_idx)
# Find how many original chars contributed to this NFC char
for length in range(1, len(original_text) - orig_idx + 1):
substr = original_text[orig_idx : orig_idx + length]
if unicodedata.normalize("NFC", substr) == nfc_char:
orig_idx += length
break
else:
orig_idx += 1 # Fallback
# Work with NFC text from here
text = nfc_text
html_entities = {
"&nbsp;": " ",
"&#160;": " ",
"&amp;": "&",
"&lt;": "<",
"&gt;": ">",
"&quot;": '"',
"&apos;": "'",
"&#39;": "'",
"&#x27;": "'",
"&ndash;": "-",
"&mdash;": "-",
"&hellip;": "...",
"&#xB0;": "°",
"&#xBA;": "°",
"&zwj;": "",
}
# Sort entities by length (longest first) for greedy matching
sorted_entities = sorted(html_entities.keys(), key=len, reverse=True)
result_chars = []
result_map = []
i = 0
last_was_space = True # Track to avoid leading spaces
while i < len(text):
# Convert NFC position to original position
orig_pos = nfc_to_orig[i] if i < len(nfc_to_orig) else len(original_text) - 1
char = text[i]
output = None
step = 1
# Check for HTML entities first (greedy match)
for entity in sorted_entities:
if text[i : i + len(entity)] == entity:
output = html_entities[entity]
step = len(entity)
break
# If no entity matched, process single character
if output is None:
# Skip zero-width characters
if is_zero_width_char(char):
i += 1
continue
output = normalize_char(char)
# Add output to result, normalizing each character from entity output
if output:
for out_char in output:
# Normalize entity output the same way as regular chars
normalized = normalize_char(out_char)
# Handle whitespace collapsing
if normalized == " ":
if not last_was_space:
result_chars.append(" ")
result_map.append(orig_pos)
last_was_space = True
else:
result_chars.append(normalized)
result_map.append(orig_pos)
last_was_space = False
i += step
# Remove trailing space if present
if result_chars and result_chars[-1] == " ":
result_chars.pop()
result_map.pop()
return "".join(result_chars), result_map
def _token_based_match(
content: str,
snippet: str,
min_threshold: float = 0.8,
) -> SnippetMatchResult:
"""
Performs a token based fuzzy search for the snippet in the content.
min_threshold exists in the range [0, 1]
"""
if not content or not snippet:
return NegativeSnippetMatchResult
res = fuzz.partial_ratio_alignment(
content, snippet, processor=utils.default_process
)
if not res:
return NegativeSnippetMatchResult
score = res.score
if score >= (min_threshold * 100):
start_idx = res.src_start
end_idx = res.src_end
return SnippetMatchResult(
snippet_located=True,
start_idx=start_idx,
end_idx=end_idx,
)
return NegativeSnippetMatchResult

View File

@@ -832,7 +832,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
top_sections=merged_sections,
citation_start=override_kwargs.starting_citation_num,
limit=override_kwargs.max_llm_chunks,
include_document_id=False,
include_document_id=True,
)
# End overall timing
@@ -844,12 +844,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
f"document expansion: {document_expansion_elapsed:.3f}s)"
)
# TODO: extension - this can include the smaller set of approved docs to be saved/displayed in the UI
# for replaying. Currently the full set is returned and saved.
return ToolResponse(
# Typically the rich response will give more docs in case it needs to be displayed in the UI
rich_response=SearchDocsResponse(
search_docs=search_docs,
citation_mapping=citation_mapping,
displayed_docs=final_ui_docs or None,
search_docs=search_docs, citation_mapping=citation_mapping
),
# The LLM facing response typically includes less docs to cut down on noise and token usage
llm_facing_response=docs_str,

View File

@@ -73,7 +73,7 @@ def convert_inference_sections_to_llm_string(
link = next(iter(chunk.source_links.values()), None)
if link:
result["url"] = link
if include_document_id:
if include_document_id and "url" not in result:
result["document_identifier"] = chunk.document_id
if chunk.metadata:
result["metadata"] = json.dumps(chunk.metadata)

View File

@@ -1,19 +1,11 @@
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import InferenceSection
from onyx.context.search.models import SearchDoc
from onyx.tools.tool_implementations.open_url.models import WebContent
from onyx.tools.tool_implementations.open_url.snippet_matcher import (
find_snippet_in_content,
)
from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
from onyx.tools.tool_implementations.web_search.models import WebSearchResult
TRUNCATED_CONTENT_SUFFIX = " [...truncated]"
TRUNCATED_CONTENT_PREFIX = "[...truncated] "
def filter_web_search_results_with_no_title_or_snippet(
results: list[WebSearchResult],
) -> list[WebSearchResult]:
@@ -34,99 +26,14 @@ def truncate_search_result_content(content: str, max_chars: int = 15000) -> str:
"""Truncate search result content to a maximum number of characters"""
if len(content) <= max_chars:
return content
return content[:max_chars] + TRUNCATED_CONTENT_SUFFIX
def _truncate_content_around_snippet(
content: str, snippet: str, max_chars: int = 15000
) -> str:
"""
Truncates content around snippet with max_chars
Assumes snippet exists
"""
result = find_snippet_in_content(content, snippet)
if not result.snippet_located:
return ""
start_idx = result.start_idx
end_idx = result.end_idx
new_start, new_end = _expand_range_centered(
start_idx, end_idx + 1, len(content), max_chars
)
truncated_content = content[new_start:new_end]
# Add the AFFIX to the start and end of truncated content
if new_start > 0:
truncated_content = TRUNCATED_CONTENT_PREFIX + truncated_content
if new_end < len(content):
truncated_content = truncated_content + TRUNCATED_CONTENT_SUFFIX
return truncated_content
def _expand_range_centered(
start_idx: int, end_idx: int, N: int, target_size: int
) -> tuple[int, int]:
"""
Expands a range [start_idx, end_idx) to be centered within a list of size N
Args:
start_idx: Starting index (inclusive)
end_idx: Ending index (exclusive)
N: Size of the list
target_size: Target size of the range
Returns:
Tuple of (new start index, new end index)
"""
current_size = end_idx - start_idx
if current_size >= target_size:
return start_idx, end_idx
padding_needed = target_size - current_size
padding_top = padding_needed // 2
padding_bottom = padding_needed - padding_top
# Try expand symmetrically
new_start = start_idx - padding_top
new_end = end_idx + padding_bottom
# Handle overflow
if new_start < 0:
overflow = -new_start
new_start = 0
new_end = min(N, new_end + overflow)
if new_end > N:
overflow = new_end - N
new_end = N
new_start = max(0, new_start - overflow)
return new_start, new_end
return content[:max_chars] + " [...truncated]"
def inference_section_from_internet_page_scrape(
result: WebContent,
snippet: str,
rank: int = 0,
) -> InferenceSection:
# truncate the content around snippet if snippet exists
truncated_content = ""
if snippet:
truncated_content = _truncate_content_around_snippet(
result.full_content, snippet
)
# Fallback if no snippet exists or we failed to find it
if not truncated_content:
truncated_content = truncate_search_result_content(result.full_content)
truncated_content = truncate_search_result_content(result.full_content)
# Calculate score using reciprocal rank to preserve ordering
score = 1.0 / (rank + 1)
@@ -190,14 +97,3 @@ def inference_section_from_internet_search_result(
chunks=[chunk],
combined_content=result.snippet,
)
def extract_url_snippet_map(documents: list[SearchDoc]) -> dict[str, str]:
"""
Given a list of SearchDocs, this will extract the url -> summary map.
"""
url_snippet_map: dict[str, str] = {}
for document in documents:
if document.source_type == DocumentSource.WEB and document.link:
url_snippet_map[document.link] = document.blurb
return url_snippet_map

View File

@@ -7,7 +7,6 @@ from onyx.chat.models import ChatMessageSimple
from onyx.configs.constants import MessageType
from onyx.context.search.models import SearchDocsResponse
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PacketException
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.tools.interface import Tool
from onyx.tools.models import ChatMinimalTextMessage
@@ -16,7 +15,6 @@ from onyx.tools.models import ParallelToolCallResponse
from onyx.tools.models import SearchToolOverrideKwargs
from onyx.tools.models import ToolCallException
from onyx.tools.models import ToolCallKickoff
from onyx.tools.models import ToolExecutionException
from onyx.tools.models import ToolResponse
from onyx.tools.models import WebSearchToolOverrideKwargs
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
@@ -154,33 +152,6 @@ def _safe_run_single_tool(
},
)
)
except ToolExecutionException as e:
# Unexpected error during tool execution
logger.error(f"Unexpected error running tool {tool.name}: {e}")
tool_response = ToolResponse(
rich_response=None,
llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
)
_error_tracing.attach_error_to_current_span(
SpanError(
message="Tool execution error (unexpected)",
data={
"tool_name": tool.name,
"tool_call_id": tool_call.tool_call_id,
"tool_args": tool_call.tool_args,
"error": str(e),
"stack_trace": traceback.format_exc(),
"error_type": type(e).__name__,
},
)
)
if e.emit_error_packet:
tool.emitter.emit(
Packet(
placement=tool_call.placement,
obj=PacketException(exception=e),
)
)
except Exception as e:
# Unexpected error during tool execution
logger.error(f"Unexpected error running tool {tool.name}: {e}")
@@ -229,8 +200,6 @@ def run_tool_calls(
max_concurrent_tools: int | None = None,
# Skip query expansion for repeat search tool calls
skip_search_query_expansion: bool = False,
# A map of url -> summary for passing web results to open url tool
url_snippet_map: dict[str, str] = {},
) -> ParallelToolCallResponse:
"""Run (optionally merged) tool calls in parallel and update citation mappings.
@@ -361,7 +330,6 @@ def run_tool_calls(
override_kwargs = OpenURLToolOverrideKwargs(
starting_citation_num=starting_citation_num,
citation_mapping=url_to_citation,
url_snippet_map=url_snippet_map,
)
starting_citation_num += 100

View File

@@ -9,36 +9,6 @@ from onyx.utils.logger import setup_logger
logger = setup_logger(__name__)
# Mapping of curly/smart quotes to straight quotes
CURLY_TO_STRAIGHT_QUOTES: dict[str, str] = {
"\u2019": "'", # Right single quotation mark
"\u2018": "'", # Left single quotation mark
"\u201c": '"', # Left double quotation mark
"\u201d": '"', # Right double quotation mark
}
# Zero-width characters that should typically be removed during text normalization
ZERO_WIDTH_CHARS: set[str] = {
"\u200b", # Zero-width space
"\u200c", # Zero-width non-joiner
"\u200d", # Zero-width joiner
"\ufeff", # Byte order mark / zero-width no-break space
"\u2060", # Word joiner
}
def normalize_curly_quotes(text: str) -> str:
"""Convert curly/smart quotes to straight quotes."""
for curly, straight in CURLY_TO_STRAIGHT_QUOTES.items():
text = text.replace(curly, straight)
return text
def is_zero_width_char(c: str) -> bool:
"""Check if a character is a zero-width character."""
return c in ZERO_WIDTH_CHARS
ESCAPE_SEQUENCE_RE = re.compile(
r"""
( \\U........ # 8-digit hex escapes
@@ -287,15 +257,3 @@ def remove_invalid_unicode_chars(text: str) -> str:
- Unicode non-characters
"""
return _INVALID_UNICODE_CHARS_RE.sub("", text)
def normalize_char(c: str) -> str:
"""Normalize a single character (curly quotes, whitespace, punctuation)."""
if c in CURLY_TO_STRAIGHT_QUOTES:
c = CURLY_TO_STRAIGHT_QUOTES[c]
if c.isspace():
return " "
elif re.match(r"[^\w\s\']", c):
return " "
else:
return c.lower()

View File

@@ -255,11 +255,11 @@ fastapi==0.116.1
# onyx
fastapi-limiter==0.1.6
# via onyx
fastapi-users==15.0.2
fastapi-users==14.0.1
# via
# fastapi-users-db-sqlalchemy
# onyx
fastapi-users-db-sqlalchemy==7.0.0
fastapi-users-db-sqlalchemy==5.0.0
# via onyx
fastavro==1.12.1
# via cohere
@@ -573,7 +573,7 @@ mcp==1.25.0
# onyx
mdurl==0.1.2
# via markdown-it-py
mistune==0.8.4
mistune==3.2.0
# via onyx
more-itertools==10.8.0
# via
@@ -608,7 +608,9 @@ mypy-extensions==1.0.0
nest-asyncio==1.6.0
# via onyx
nltk==3.9.1
# via unstructured
# via
# onyx
# unstructured
numpy==2.4.1
# via
# magika
@@ -782,7 +784,7 @@ psycopg2-binary==2.9.9
# via onyx
puremagic==1.28
# via onyx
pwdlib==0.3.0
pwdlib==0.2.1
# via fastapi-users
py==1.11.0
# via retry
@@ -902,7 +904,7 @@ python-json-logger==4.0.0
# via pydocket
python-magic==0.4.27
# via unstructured
python-multipart==0.0.21
python-multipart==0.0.20
# via
# fastapi-users
# mcp

View File

@@ -298,7 +298,7 @@ numpy==2.4.1
# pandas-stubs
# shapely
# voyageai
onyx-devtools==0.4.0
onyx-devtools==0.6.2
# via onyx
openai==2.14.0
# via

View File

@@ -45,9 +45,7 @@ from onyx.db.connector_credential_pair import (
get_connector_credential_pair,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.document_index.factory import (
get_all_document_indices,
)
from onyx.document_index.factory import get_default_document_index
from onyx.file_store.file_store import get_default_file_store
# pylint: enable=E402
@@ -61,7 +59,7 @@ _DELETION_BATCH_SIZE = 1000
def _unsafe_deletion(
db_session: Session,
document_indices: list[DocumentIndex],
document_index: DocumentIndex,
cc_pair: ConnectorCredentialPair,
pair_id: int,
) -> int:
@@ -82,12 +80,11 @@ def _unsafe_deletion(
break
for document in documents:
for document_index in document_indices:
document_index.delete_single(
doc_id=document.id,
tenant_id=POSTGRES_DEFAULT_SCHEMA,
chunk_count=document.chunk_count,
)
document_index.delete_single(
doc_id=document.id,
tenant_id=POSTGRES_DEFAULT_SCHEMA,
chunk_count=document.chunk_count,
)
delete_documents_complete__no_commit(
db_session=db_session,
@@ -214,16 +211,14 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None:
try:
logger.notice("Deleting information from Vespa and Postgres")
active_search_settings = get_active_search_settings(db_session)
# This flow is for deletion so we get all indices.
document_indices = get_all_document_indices(
document_index = get_default_document_index(
active_search_settings.primary,
active_search_settings.secondary,
None,
)
files_deleted_count = _unsafe_deletion(
db_session=db_session,
document_indices=document_indices,
document_index=document_index,
cc_pair=cc_pair,
pair_id=cc_pair_id,
)

View File

@@ -3,8 +3,28 @@
# We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
source "$(dirname "$0")/../../.vscode/.env"
cd "$(dirname "$0")/../../deployment/docker_compose"
OPENSEARCH_CONTAINER_NAME="onyx-opensearch"
OPENSEARCH_IMAGE="opensearchproject/opensearch:3.4.0"
# First check the env for OPENSEARCH_REST_API_PORT, else hardcode to 9200.
OPENSEARCH_REST_API_PORT=${OPENSEARCH_REST_API_PORT:-9200}
OPENSEARCH_PERFORMANCE_ANALYZER_PORT=9600
# Start OpenSearch.
echo "Forcefully starting fresh OpenSearch container..."
docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch
function stop_and_remove_opensearch_container() {
echo "Stopping and removing the existing OpenSearch container..."
docker stop "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
docker rm "$OPENSEARCH_CONTAINER_NAME" 2>/dev/null || true
}
# Set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file.
if [ -z "$OPENSEARCH_ADMIN_PASSWORD" ]; then
echo "Error: OPENSEARCH_ADMIN_PASSWORD environment variable is not set." >&2
echo "Please set OPENSEARCH_ADMIN_PASSWORD=<some password> in your .env file." >&2
exit 1
fi
# Stop and remove the existing container.
stop_and_remove_opensearch_container
# Start the OpenSearch container.
echo "Starting OpenSearch container..."
docker run --detach --name "$OPENSEARCH_CONTAINER_NAME" --publish "$OPENSEARCH_REST_API_PORT:9200" --publish "$OPENSEARCH_PERFORMANCE_ANALYZER_PORT:9600" -e "discovery.type=single-node" -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_ADMIN_PASSWORD" "$OPENSEARCH_IMAGE"

Some files were not shown because too many files have changed in this diff Show More