mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-12 03:02:43 +00:00
Compare commits
43 Commits
test-tests
...
v2.9.8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1c30974f5 | ||
|
|
81bf07fb15 | ||
|
|
b565bf8291 | ||
|
|
b4da99cbdd | ||
|
|
f910feea0f | ||
|
|
e3af8c6c8a | ||
|
|
d6e46ed792 | ||
|
|
4ce1f4ecdd | ||
|
|
a4678884d7 | ||
|
|
c861ba68f1 | ||
|
|
b1d0e0bb0b | ||
|
|
0d78bf52e3 | ||
|
|
bd743282e6 | ||
|
|
d44d1d92b3 | ||
|
|
4cedcfee59 | ||
|
|
90a721a76e | ||
|
|
3ccd99e931 | ||
|
|
9076bf603f | ||
|
|
8c6e0a70c3 | ||
|
|
bebe9555d4 | ||
|
|
c530722c9f | ||
|
|
68380b4ddb | ||
|
|
b3380746ab | ||
|
|
56be114c87 | ||
|
|
54f467da5c | ||
|
|
8726b112fe | ||
|
|
92181d07b2 | ||
|
|
3a73f7fab2 | ||
|
|
7dabaca7cd | ||
|
|
dec4748825 | ||
|
|
072836cd86 | ||
|
|
2705b5fb0e | ||
|
|
37dcde4226 | ||
|
|
a765b5f622 | ||
|
|
5e093368d1 | ||
|
|
f945ab6b05 | ||
|
|
11b7a22404 | ||
|
|
8e34f944cc | ||
|
|
32606dc752 | ||
|
|
1f6c4b40bf | ||
|
|
1943f1c745 | ||
|
|
82460729a6 | ||
|
|
c445e6a8c0 |
24
.github/workflows/deployment.yml
vendored
24
.github/workflows/deployment.yml
vendored
@@ -404,7 +404,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -477,7 +477,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -537,7 +537,7 @@ jobs:
|
||||
parse-json-secrets: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -615,7 +615,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -696,7 +696,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -764,7 +764,7 @@ jobs:
|
||||
parse-json-secrets: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -839,7 +839,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -911,7 +911,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -970,7 +970,7 @@ jobs:
|
||||
parse-json-secrets: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
@@ -1049,7 +1049,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
with:
|
||||
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
|
||||
|
||||
@@ -1128,7 +1128,7 @@ jobs:
|
||||
latest=false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
with:
|
||||
buildkitd-flags: ${{ vars.DOCKER_DEBUG == 'true' && '--debug' || '' }}
|
||||
|
||||
@@ -1193,7 +1193,7 @@ jobs:
|
||||
parse-json-secrets: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
|
||||
2
.github/workflows/docker-tag-beta.yml
vendored
2
.github/workflows/docker-tag-beta.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
|
||||
2
.github/workflows/docker-tag-latest.yml
vendored
2
.github/workflows/docker-tag-latest.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
|
||||
1
.github/workflows/helm-chart-releases.yml
vendored
1
.github/workflows/helm-chart-releases.yml
vendored
@@ -29,7 +29,6 @@ jobs:
|
||||
run: |
|
||||
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
|
||||
helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
|
||||
helm repo add opensearch https://opensearch-project.github.io/helm-charts
|
||||
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
|
||||
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
|
||||
helm repo add minio https://charts.min.io/
|
||||
|
||||
2
.github/workflows/nightly-scan-licenses.yml
vendored
2
.github/workflows/nightly-scan-licenses.yml
vendored
@@ -94,7 +94,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
|
||||
@@ -45,9 +45,6 @@ env:
|
||||
# TODO: debug why this is failing and enable
|
||||
CODE_INTERPRETER_BASE_URL: http://localhost:8000
|
||||
|
||||
# OpenSearch
|
||||
OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
|
||||
|
||||
jobs:
|
||||
discover-test-dirs:
|
||||
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
|
||||
@@ -128,13 +125,11 @@ jobs:
|
||||
docker compose \
|
||||
-f docker-compose.yml \
|
||||
-f docker-compose.dev.yml \
|
||||
-f docker-compose.opensearch.yml \
|
||||
up -d \
|
||||
minio \
|
||||
relational_db \
|
||||
cache \
|
||||
index \
|
||||
opensearch \
|
||||
code-interpreter
|
||||
|
||||
- name: Run migrations
|
||||
@@ -163,7 +158,7 @@ jobs:
|
||||
cd deployment/docker_compose
|
||||
|
||||
# Get list of running containers
|
||||
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
|
||||
containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)
|
||||
|
||||
# Collect logs from each container
|
||||
for container in $containers; do
|
||||
|
||||
8
.github/workflows/pr-helm-chart-testing.yml
vendored
8
.github/workflows/pr-helm-chart-testing.yml
vendored
@@ -88,7 +88,6 @@ jobs:
|
||||
echo "=== Adding Helm repositories ==="
|
||||
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
|
||||
helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
|
||||
helm repo add opensearch https://opensearch-project.github.io/helm-charts
|
||||
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
|
||||
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
|
||||
helm repo add minio https://charts.min.io/
|
||||
@@ -181,11 +180,6 @@ jobs:
|
||||
trap cleanup EXIT
|
||||
|
||||
# Run the actual installation with detailed logging
|
||||
# Note that opensearch.enabled is true whereas others in this install
|
||||
# are false. There is some work that needs to be done to get this
|
||||
# entire step working in CI, enabling opensearch here is a small step
|
||||
# in that direction. If this is causing issues, disabling it in this
|
||||
# step should be ok in the short term.
|
||||
echo "=== Starting ct install ==="
|
||||
set +e
|
||||
ct install --all \
|
||||
@@ -193,8 +187,6 @@ jobs:
|
||||
--set=nginx.enabled=false \
|
||||
--set=minio.enabled=false \
|
||||
--set=vespa.enabled=false \
|
||||
--set=opensearch.enabled=true \
|
||||
--set=auth.opensearch.enabled=true \
|
||||
--set=slackbot.enabled=false \
|
||||
--set=postgresql.enabled=true \
|
||||
--set=postgresql.nameOverride=cloudnative-pg \
|
||||
|
||||
6
.github/workflows/pr-integration-tests.yml
vendored
6
.github/workflows/pr-integration-tests.yml
vendored
@@ -103,7 +103,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
@@ -208,7 +208,7 @@ jobs:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling openapitools/openapi-generator-cli
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling Vespa, Redis, Postgres, and Minio images
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
@@ -214,7 +214,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling openapitools/openapi-generator-cli
|
||||
# otherwise, we hit the "Unauthenticated users" limit
|
||||
|
||||
6
.github/workflows/pr-playwright-tests.yml
vendored
6
.github/workflows/pr-playwright-tests.yml
vendored
@@ -85,7 +85,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
@@ -207,7 +207,7 @@ jobs:
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
|
||||
|
||||
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
|
||||
# https://docs.docker.com/docker-hub/usage/
|
||||
|
||||
3
.github/workflows/pr-python-checks.yml
vendored
3
.github/workflows/pr-python-checks.yml
vendored
@@ -50,8 +50,9 @@ jobs:
|
||||
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
|
||||
with:
|
||||
path: backend/.mypy_cache
|
||||
key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
|
||||
key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
|
||||
restore-keys: |
|
||||
mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
|
||||
mypy-${{ runner.os }}-
|
||||
|
||||
- name: Run MyPy
|
||||
|
||||
138
.github/workflows/pr-python-model-tests.yml
vendored
138
.github/workflows/pr-python-model-tests.yml
vendored
@@ -5,6 +5,11 @@ on:
|
||||
# This cron expression runs the job daily at 16:00 UTC (9am PT)
|
||||
- cron: "0 16 * * *"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
description: 'Branch to run the workflow on'
|
||||
required: false
|
||||
default: 'main'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -26,11 +31,7 @@ env:
|
||||
jobs:
|
||||
model-check:
|
||||
# See https://runs-on.com/runners/linux/
|
||||
runs-on:
|
||||
- runs-on
|
||||
- runner=4cpu-linux-arm64
|
||||
- "run-id=${{ github.run_id }}-model-check"
|
||||
- "extras=ecr-cache"
|
||||
runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
|
||||
timeout-minutes: 45
|
||||
|
||||
env:
|
||||
@@ -42,83 +43,104 @@ jobs:
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup Python and Install Dependencies
|
||||
uses: ./.github/actions/setup-python-and-install-dependencies
|
||||
with:
|
||||
requirements: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
|
||||
- name: Format branch name for cache
|
||||
id: format-branch
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
run: |
|
||||
if [ -n "${PR_NUMBER}" ]; then
|
||||
CACHE_SUFFIX="${PR_NUMBER}"
|
||||
else
|
||||
# shellcheck disable=SC2001
|
||||
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
|
||||
fi
|
||||
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
|
||||
# tag every docker image with "test" so that we can spin up the correct set
|
||||
# of images during testing
|
||||
|
||||
- name: Build and load
|
||||
uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
|
||||
env:
|
||||
TAG: model-server-${{ github.run_id }}
|
||||
# We don't need to build the Web Docker image since it's not yet used
|
||||
# in the integration tests. We have a separate action to verify that it builds
|
||||
# successfully.
|
||||
- name: Pull Model Server Docker image
|
||||
run: |
|
||||
docker pull onyxdotapp/onyx-model-server:latest
|
||||
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
|
||||
with:
|
||||
load: true
|
||||
targets: model-server
|
||||
set: |
|
||||
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
|
||||
model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
|
||||
model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
|
||||
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
|
||||
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
|
||||
model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
backend/requirements/default.txt
|
||||
backend/requirements/dev.txt
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
|
||||
pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
|
||||
|
||||
- name: Start Docker containers
|
||||
id: start_docker
|
||||
env:
|
||||
IMAGE_TAG: model-server-${{ github.run_id }}
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose \
|
||||
-f docker-compose.yml \
|
||||
-f docker-compose.dev.yml \
|
||||
up -d --wait \
|
||||
inference_model_server
|
||||
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
|
||||
AUTH_TYPE=basic \
|
||||
REQUIRE_EMAIL_VERIFICATION=false \
|
||||
DISABLE_TELEMETRY=true \
|
||||
IMAGE_TAG=test \
|
||||
docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
|
||||
id: start_docker
|
||||
|
||||
- name: Wait for service to be ready
|
||||
run: |
|
||||
echo "Starting wait-for-service script..."
|
||||
|
||||
start_time=$(date +%s)
|
||||
timeout=300 # 5 minutes in seconds
|
||||
|
||||
while true; do
|
||||
current_time=$(date +%s)
|
||||
elapsed_time=$((current_time - start_time))
|
||||
|
||||
if [ $elapsed_time -ge $timeout ]; then
|
||||
echo "Timeout reached. Service did not become ready in 5 minutes."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Use curl with error handling to ignore specific exit code 56
|
||||
response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
|
||||
|
||||
if [ "$response" = "200" ]; then
|
||||
echo "Service is ready!"
|
||||
break
|
||||
elif [ "$response" = "curl_error" ]; then
|
||||
echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
|
||||
else
|
||||
echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
echo "Finished waiting for service."
|
||||
|
||||
- name: Run Tests
|
||||
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
|
||||
run: |
|
||||
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
|
||||
py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding
|
||||
|
||||
- name: Alert on Failure
|
||||
if: failure() && github.event_name == 'schedule'
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
|
||||
failed-jobs: model-check
|
||||
title: "🚨 Scheduled Model Tests failed!"
|
||||
ref-name: ${{ github.ref_name }}
|
||||
env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO: ${{ github.repository }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H 'Content-type: application/json' \
|
||||
--data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
|
||||
$SLACK_WEBHOOK
|
||||
|
||||
- name: Dump all-container logs (optional)
|
||||
if: always()
|
||||
run: |
|
||||
cd deployment/docker_compose
|
||||
docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,8 +1,5 @@
|
||||
# editors
|
||||
.vscode
|
||||
!/.vscode/env_template.txt
|
||||
!/.vscode/launch.json
|
||||
!/.vscode/tasks.template.jsonc
|
||||
.zed
|
||||
.cursor
|
||||
|
||||
|
||||
@@ -74,13 +74,6 @@ repos:
|
||||
# pass_filenames: true
|
||||
# files: ^backend/.*\.py$
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
name: Check for added large files
|
||||
args: ["--maxkb=1500"]
|
||||
|
||||
- repo: https://github.com/rhysd/actionlint
|
||||
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
|
||||
hooks:
|
||||
@@ -153,22 +146,6 @@ repos:
|
||||
pass_filenames: false
|
||||
files: \.tf$
|
||||
|
||||
- id: npm-install
|
||||
name: npm install
|
||||
description: "Automatically run 'npm install' after a checkout, pull or rebase"
|
||||
language: system
|
||||
entry: bash -c 'cd web && npm install --no-save'
|
||||
pass_filenames: false
|
||||
files: ^web/package(-lock)?\.json$
|
||||
stages: [post-checkout, post-merge, post-rewrite]
|
||||
- id: npm-install-check
|
||||
name: npm install --package-lock-only
|
||||
description: "Check the 'web/package-lock.json' is updated"
|
||||
language: system
|
||||
entry: bash -c 'cd web && npm install --package-lock-only'
|
||||
pass_filenames: false
|
||||
files: ^web/package(-lock)?\.json$
|
||||
|
||||
# Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
|
||||
# This is a preview package - if it breaks:
|
||||
# 1. Try updating: cd web && npm update @typescript/native-preview
|
||||
|
||||
6
.vscode/env_template.txt
vendored
6
.vscode/env_template.txt
vendored
@@ -17,6 +17,12 @@ LOG_ONYX_MODEL_INTERACTIONS=True
|
||||
LOG_LEVEL=debug
|
||||
|
||||
|
||||
# This passes top N results to LLM an additional time for reranking prior to
|
||||
# answer generation.
|
||||
# This step is quite heavy on token usage so we disable it for dev generally.
|
||||
DISABLE_LLM_DOC_RELEVANCE=False
|
||||
|
||||
|
||||
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
|
||||
OAUTH_CLIENT_ID=<REPLACE THIS>
|
||||
OAUTH_CLIENT_SECRET=<REPLACE THIS>
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
|
||||
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
@@ -22,7 +24,7 @@
|
||||
"Slack Bot",
|
||||
"Celery primary",
|
||||
"Celery light",
|
||||
"Celery heavy",
|
||||
"Celery background",
|
||||
"Celery docfetching",
|
||||
"Celery docprocessing",
|
||||
"Celery beat"
|
||||
@@ -577,99 +579,6 @@
|
||||
"group": "3"
|
||||
}
|
||||
},
|
||||
{
|
||||
// Dummy entry used to label the group
|
||||
"name": "--- Database ---",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"presentation": {
|
||||
"group": "4",
|
||||
"order": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Clean restore seeded database dump (destructive)",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"--with",
|
||||
"onyx-devtools",
|
||||
"ods",
|
||||
"db",
|
||||
"restore",
|
||||
"--fetch-seeded",
|
||||
"--clean",
|
||||
"--yes"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
"group": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Create database snapshot",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"--with",
|
||||
"onyx-devtools",
|
||||
"ods",
|
||||
"db",
|
||||
"dump",
|
||||
"backup.dump"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
"group": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Clean restore database snapshot (destructive)",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"--with",
|
||||
"onyx-devtools",
|
||||
"ods",
|
||||
"db",
|
||||
"restore",
|
||||
"--clean",
|
||||
"--yes",
|
||||
"backup.dump"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
"group": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Upgrade database to head revision",
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"runtimeExecutable": "uv",
|
||||
"runtimeArgs": [
|
||||
"run",
|
||||
"--with",
|
||||
"onyx-devtools",
|
||||
"ods",
|
||||
"db",
|
||||
"upgrade"
|
||||
],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"console": "integratedTerminal",
|
||||
"presentation": {
|
||||
"group": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
// script to generate the openapi schema
|
||||
"name": "Onyx OpenAPI Schema Generator",
|
||||
263
CONTRIBUTING.md
263
CONTRIBUTING.md
@@ -1,31 +1,262 @@
|
||||
<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
|
||||
|
||||
# Contributing to Onyx
|
||||
|
||||
Hey there! We are so excited that you're interested in Onyx.
|
||||
|
||||
As an open source project in a rapidly changing space, we welcome all contributions.
|
||||
|
||||
## Contribution Opportunities
|
||||
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.
|
||||
## 💃 Guidelines
|
||||
|
||||
If you have your own feature that you would like to build please create an issue and community members can provide feedback and
|
||||
thumb it up if they feel a common need.
|
||||
### Contribution Opportunities
|
||||
|
||||
The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.
|
||||
|
||||
## Contributing Code
|
||||
Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
|
||||
1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
|
||||
2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
|
||||
3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.
|
||||
To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
|
||||
via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
|
||||
|
||||
To contribute, please follow the
|
||||
Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
|
||||
will be marked with the `approved by maintainers` label.
|
||||
Issues marked `good first issue` are an especially great place to start.
|
||||
|
||||
**Connectors** to other tools are another great place to contribute. For details on how, refer to this
|
||||
[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
|
||||
|
||||
If you have a new/different contribution in mind, we'd love to hear about it!
|
||||
Your input is vital to making sure that Onyx moves in the right direction.
|
||||
Before starting on implementation, please raise a GitHub issue.
|
||||
|
||||
Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
|
||||
[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
|
||||
|
||||
### Contributing Code
|
||||
|
||||
To contribute to this project, please follow the
|
||||
["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
|
||||
When opening a pull request, mention related issues and feel free to tag relevant maintainers.
|
||||
|
||||
Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
|
||||
See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
|
||||
|
||||
### Getting Help 🙋
|
||||
|
||||
Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
|
||||
That way we can help future contributors and users can avoid the same issue.
|
||||
|
||||
We also have support channels and generally interesting discussions on our
|
||||
[Discord](https://discord.gg/4NA5SbzrWb).
|
||||
|
||||
We would love to see you there!
|
||||
|
||||
## Get Started 🚀
|
||||
|
||||
Onyx being a fully functional app, relies on some external software, specifically:
|
||||
|
||||
- [Postgres](https://www.postgresql.org/) (Relational DB)
|
||||
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
|
||||
- [Redis](https://redis.io/) (Cache)
|
||||
- [MinIO](https://min.io/) (File Store)
|
||||
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
|
||||
|
||||
> **Note:**
|
||||
> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
|
||||
> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
|
||||
|
||||
### Local Set Up
|
||||
|
||||
Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
|
||||
|
||||
If using a lower version, modifications will have to be made to the code.
|
||||
If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
|
||||
|
||||
#### Backend: Python requirements
|
||||
|
||||
Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
|
||||
|
||||
For convenience here's a command for it:
|
||||
|
||||
```bash
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
_For Windows, activate the virtual environment using Command Prompt:_
|
||||
|
||||
```bash
|
||||
.venv\Scripts\activate
|
||||
```
|
||||
|
||||
If using PowerShell, the command slightly differs:
|
||||
|
||||
```powershell
|
||||
.venv\Scripts\Activate.ps1
|
||||
```
|
||||
|
||||
Install the required python dependencies:
|
||||
|
||||
```bash
|
||||
uv sync --all-extras
|
||||
```
|
||||
|
||||
Install Playwright for Python (headless browser required by the Web Connector):
|
||||
|
||||
```bash
|
||||
uv run playwright install
|
||||
```
|
||||
|
||||
#### Frontend: Node dependencies
|
||||
|
||||
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
|
||||
to manage your Node installations. Once installed, you can run
|
||||
|
||||
```bash
|
||||
nvm install 22 && nvm use 22
|
||||
node -v # verify your active version
|
||||
```
|
||||
|
||||
Navigate to `onyx/web` and run:
|
||||
|
||||
```bash
|
||||
npm i
|
||||
```
|
||||
|
||||
## Formatting and Linting
|
||||
|
||||
### Backend
|
||||
|
||||
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
|
||||
|
||||
Then run:
|
||||
|
||||
```bash
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
Additionally, we use `mypy` for static type checking.
|
||||
Onyx is fully type-annotated, and we want to keep it that way!
|
||||
To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
|
||||
|
||||
### Web
|
||||
|
||||
We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
|
||||
To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
|
||||
|
||||
Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
|
||||
Re-stage your changes and commit again.
|
||||
|
||||
# Running the application for development
|
||||
|
||||
## Developing using VSCode Debugger (recommended)
|
||||
|
||||
**We highly recommend using VSCode debugger for development.**
|
||||
See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
|
||||
|
||||
Otherwise, you can follow the instructions below to run the application for development.
|
||||
|
||||
## Manually running the application for development
|
||||
### Docker containers for external software
|
||||
|
||||
You will need Docker installed to run these containers.
|
||||
|
||||
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
|
||||
```
|
||||
|
||||
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
|
||||
|
||||
### Running Onyx locally
|
||||
|
||||
To start the frontend, navigate to `onyx/web` and run:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Next, start the model server which runs the local NLP models.
|
||||
Navigate to `onyx/backend` and run:
|
||||
|
||||
```bash
|
||||
uvicorn model_server.main:app --reload --port 9000
|
||||
```
|
||||
|
||||
_For Windows (for compatibility with both PowerShell and Command Prompt):_
|
||||
|
||||
```bash
|
||||
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
|
||||
```
|
||||
|
||||
The first time running Onyx, you will need to run the DB migrations for Postgres.
|
||||
After the first time, this is no longer required unless the DB models change.
|
||||
|
||||
Navigate to `onyx/backend` and with the venv active, run:
|
||||
|
||||
```bash
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
Next, start the task queue which orchestrates the background jobs.
|
||||
Jobs that take more time are run async from the API server.
|
||||
|
||||
Still in `onyx/backend`, run:
|
||||
|
||||
```bash
|
||||
python ./scripts/dev_run_background_jobs.py
|
||||
```
|
||||
|
||||
To run the backend API server, navigate back to `onyx/backend` and run:
|
||||
|
||||
```bash
|
||||
AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
|
||||
```
|
||||
|
||||
_For Windows (for compatibility with both PowerShell and Command Prompt):_
|
||||
|
||||
```bash
|
||||
powershell -Command "
|
||||
$env:AUTH_TYPE='disabled'
|
||||
uvicorn onyx.main:app --reload --port 8080
|
||||
"
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
|
||||
|
||||
#### Wrapping up
|
||||
|
||||
You should now have 4 servers running:
|
||||
|
||||
- Web server
|
||||
- Backend API
|
||||
- Model server
|
||||
- Background jobs
|
||||
|
||||
Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
|
||||
|
||||
You've successfully set up a local Onyx instance! 🏁
|
||||
|
||||
#### Running the Onyx application in a container
|
||||
|
||||
You can run the full Onyx application stack from pre-built images including all external software dependencies.
|
||||
|
||||
Navigate to `onyx/deployment/docker_compose` and run:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
|
||||
|
||||
If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
|
||||
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
|
||||
## Getting Help 🙋
|
||||
We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).
|
||||
### Release Process
|
||||
|
||||
See you there!
|
||||
|
||||
|
||||
## Release Process
|
||||
Onyx loosely follows the SemVer versioning standard.
|
||||
Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
|
||||
A set of Docker containers will be pushed automatically to DockerHub with every tag.
|
||||
|
||||
@@ -7,6 +7,8 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
|
||||
1. **Environment Setup**:
|
||||
- Copy `.vscode/env_template.txt` to `.vscode/.env`
|
||||
- Fill in the necessary environment variables in `.vscode/.env`
|
||||
2. **launch.json**:
|
||||
- Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`
|
||||
|
||||
## Using the Debugger
|
||||
|
||||
@@ -37,6 +37,10 @@ CVE-2023-50868
|
||||
CVE-2023-52425
|
||||
CVE-2024-28757
|
||||
|
||||
# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
|
||||
# No impact in our settings
|
||||
CVE-2023-7104
|
||||
|
||||
# libharfbuzz0b, O(n^2) growth, worst case is denial of service
|
||||
# Accept the risk
|
||||
CVE-2023-25193
|
||||
|
||||
@@ -89,6 +89,12 @@ RUN uv pip install --system --no-cache-dir --upgrade \
|
||||
RUN python -c "from tokenizers import Tokenizer; \
|
||||
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
|
||||
|
||||
# Pre-downloading NLTK for setups with limited egress
|
||||
RUN python -c "import nltk; \
|
||||
nltk.download('stopwords', quiet=True); \
|
||||
nltk.download('punkt_tab', quiet=True);"
|
||||
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
|
||||
|
||||
# Pre-downloading tiktoken for setups with limited egress
|
||||
RUN python -c "import tiktoken; \
|
||||
tiktoken.get_encoding('cl100k_base')"
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
"""add_unique_constraint_to_inputprompt_prompt_user_id
|
||||
|
||||
Revision ID: 2c2430828bdf
|
||||
Revises: fb80bdd256de
|
||||
Create Date: 2026-01-20 16:01:54.314805
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "2c2430828bdf"
|
||||
down_revision = "fb80bdd256de"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create unique constraint on (prompt, user_id) for user-owned prompts
|
||||
# This ensures each user can only have one shortcut with a given name
|
||||
op.create_unique_constraint(
|
||||
"uq_inputprompt_prompt_user_id",
|
||||
"inputprompt",
|
||||
["prompt", "user_id"],
|
||||
)
|
||||
|
||||
# Create partial unique index for public prompts (where user_id IS NULL)
|
||||
# PostgreSQL unique constraints don't enforce uniqueness for NULL values,
|
||||
# so we need a partial index to ensure public prompt names are also unique
|
||||
op.execute(
|
||||
"""
|
||||
CREATE UNIQUE INDEX uq_inputprompt_prompt_public
|
||||
ON inputprompt (prompt)
|
||||
WHERE user_id IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
|
||||
op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")
|
||||
@@ -1,29 +0,0 @@
|
||||
"""remove default prompt shortcuts
|
||||
|
||||
Revision ID: 41fa44bef321
|
||||
Revises: 2c2430828bdf
|
||||
Create Date: 2025-01-21
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "41fa44bef321"
|
||||
down_revision = "2c2430828bdf"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Delete any user associations for the default prompts first (foreign key constraint)
|
||||
op.execute(
|
||||
"DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
|
||||
)
|
||||
# Delete the pre-seeded default prompt shortcuts (they have negative IDs)
|
||||
op.execute("DELETE FROM inputprompt WHERE id < 0")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# We don't restore the default prompts on downgrade
|
||||
pass
|
||||
@@ -1,47 +0,0 @@
|
||||
"""add_search_query_table
|
||||
|
||||
Revision ID: 73e9983e5091
|
||||
Revises: d1b637d7050a
|
||||
Create Date: 2026-01-14 14:16:52.837489
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "73e9983e5091"
|
||||
down_revision = "d1b637d7050a"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"search_query",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
postgresql.UUID(as_uuid=True),
|
||||
sa.ForeignKey("user.id"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("query", sa.String(), nullable=False),
|
||||
sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
nullable=False,
|
||||
server_default=sa.func.now(),
|
||||
),
|
||||
)
|
||||
|
||||
op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
|
||||
op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_search_query_created_at", table_name="search_query")
|
||||
op.drop_index("ix_search_query_user_id", table_name="search_query")
|
||||
op.drop_table("search_query")
|
||||
@@ -10,7 +10,8 @@ from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
from onyx.db.models import IndexModelStatus
|
||||
from onyx.context.search.enums import RecencyBiasSetting, SearchType
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.context.search.enums import SearchType
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "776b3bbe9092"
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
"""Add Discord bot tables
|
||||
|
||||
Revision ID: 8b5ce697290e
|
||||
Revises: a1b2c3d4e5f7
|
||||
Create Date: 2025-01-14
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "8b5ce697290e"
|
||||
down_revision = "a1b2c3d4e5f7"
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# DiscordBotConfig (singleton table - one per tenant)
|
||||
op.create_table(
|
||||
"discord_bot_config",
|
||||
sa.Column(
|
||||
"id",
|
||||
sa.String(),
|
||||
primary_key=True,
|
||||
server_default=sa.text("'SINGLETON'"),
|
||||
),
|
||||
sa.Column("bot_token", sa.LargeBinary(), nullable=False), # EncryptedString
|
||||
sa.Column(
|
||||
"created_at",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.func.now(),
|
||||
nullable=False,
|
||||
),
|
||||
sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
|
||||
)
|
||||
|
||||
# DiscordGuildConfig
|
||||
op.create_table(
|
||||
"discord_guild_config",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
|
||||
sa.Column("guild_name", sa.String(), nullable=True),
|
||||
sa.Column("registration_key", sa.String(), nullable=False, unique=True),
|
||||
sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column(
|
||||
"default_persona_id",
|
||||
sa.Integer(),
|
||||
sa.ForeignKey("persona.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
|
||||
),
|
||||
)
|
||||
|
||||
# DiscordChannelConfig
|
||||
op.create_table(
|
||||
"discord_channel_config",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column(
|
||||
"guild_config_id",
|
||||
sa.Integer(),
|
||||
sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("channel_id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("channel_name", sa.String(), nullable=False),
|
||||
sa.Column(
|
||||
"channel_type",
|
||||
sa.String(20),
|
||||
server_default=sa.text("'text'"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"is_private",
|
||||
sa.Boolean(),
|
||||
server_default=sa.text("false"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"thread_only_mode",
|
||||
sa.Boolean(),
|
||||
server_default=sa.text("false"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"require_bot_invocation",
|
||||
sa.Boolean(),
|
||||
server_default=sa.text("true"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"persona_override_id",
|
||||
sa.Integer(),
|
||||
sa.ForeignKey("persona.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
),
|
||||
sa.Column(
|
||||
"enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
|
||||
),
|
||||
)
|
||||
|
||||
# Unique constraint: one config per channel per guild
|
||||
op.create_unique_constraint(
|
||||
"uq_discord_channel_guild_channel",
|
||||
"discord_channel_config",
|
||||
["guild_config_id", "channel_id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("discord_channel_config")
|
||||
op.drop_table("discord_guild_config")
|
||||
op.drop_table("discord_bot_config")
|
||||
@@ -1,47 +0,0 @@
|
||||
"""drop agent_search_metrics table
|
||||
|
||||
Revision ID: a1b2c3d4e5f7
|
||||
Revises: 73e9983e5091
|
||||
Create Date: 2026-01-17
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "a1b2c3d4e5f7"
|
||||
down_revision = "73e9983e5091"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.drop_table("agent__search_metrics")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.create_table(
|
||||
"agent__search_metrics",
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("user_id", sa.UUID(), nullable=True),
|
||||
sa.Column("persona_id", sa.Integer(), nullable=True),
|
||||
sa.Column("agent_type", sa.String(), nullable=False),
|
||||
sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("base_duration_s", sa.Float(), nullable=False),
|
||||
sa.Column("full_duration_s", sa.Float(), nullable=False),
|
||||
sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
|
||||
sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
|
||||
sa.ForeignKeyConstraint(
|
||||
["user_id"],
|
||||
["user.id"],
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["persona_id"],
|
||||
["persona.id"],
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id"),
|
||||
)
|
||||
@@ -1,31 +0,0 @@
|
||||
"""add chat_background to user
|
||||
|
||||
Revision ID: fb80bdd256de
|
||||
Revises: 8b5ce697290e
|
||||
Create Date: 2026-01-16 16:15:59.222617
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "fb80bdd256de"
|
||||
down_revision = "8b5ce697290e"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"user",
|
||||
sa.Column(
|
||||
"chat_background",
|
||||
sa.String(),
|
||||
nullable=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("user", "chat_background")
|
||||
@@ -128,8 +128,3 @@ MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
|
||||
HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")
|
||||
|
||||
GATED_TENANTS_KEY = "gated_tenants"
|
||||
|
||||
# License enforcement - when True, blocks API access for gated/expired licenses
|
||||
LICENSE_ENFORCEMENT_ENABLED = (
|
||||
os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
|
||||
)
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.engine.time_utils import get_db_current_time
|
||||
from onyx.db.models import SearchQuery
|
||||
|
||||
|
||||
def create_search_query(
|
||||
db_session: Session,
|
||||
user_id: UUID,
|
||||
query: str,
|
||||
query_expansions: list[str] | None = None,
|
||||
) -> SearchQuery:
|
||||
"""Create and persist a `SearchQuery` row.
|
||||
|
||||
Notes:
|
||||
- `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
|
||||
- `created_at` is filled by the DB (server_default=now()).
|
||||
"""
|
||||
search_query = SearchQuery(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user_id,
|
||||
query=query,
|
||||
query_expansions=query_expansions,
|
||||
)
|
||||
db_session.add(search_query)
|
||||
db_session.commit()
|
||||
db_session.refresh(search_query)
|
||||
return search_query
|
||||
|
||||
|
||||
def fetch_search_queries_for_user(
|
||||
db_session: Session,
|
||||
user_id: UUID,
|
||||
filter_days: int | None = None,
|
||||
limit: int | None = None,
|
||||
) -> list[SearchQuery]:
|
||||
"""Fetch `SearchQuery` rows for a user.
|
||||
|
||||
Args:
|
||||
user_id: User UUID.
|
||||
filter_days: Optional time filter. If provided, only rows created within
|
||||
the last `filter_days` days are returned.
|
||||
limit: Optional max number of rows to return.
|
||||
"""
|
||||
if filter_days is not None and filter_days <= 0:
|
||||
raise ValueError("filter_days must be > 0")
|
||||
|
||||
stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)
|
||||
|
||||
if filter_days is not None and filter_days > 0:
|
||||
cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
|
||||
stmt = stmt.where(SearchQuery.created_at >= cutoff)
|
||||
|
||||
stmt = stmt.order_by(SearchQuery.created_at.desc())
|
||||
|
||||
if limit is not None:
|
||||
stmt = stmt.limit(limit)
|
||||
|
||||
return list(db_session.scalars(stmt).all())
|
||||
@@ -16,17 +16,16 @@ from ee.onyx.server.enterprise_settings.api import (
|
||||
from ee.onyx.server.evals.api import router as evals_router
|
||||
from ee.onyx.server.license.api import router as license_router
|
||||
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
|
||||
from ee.onyx.server.middleware.license_enforcement import (
|
||||
add_license_enforcement_middleware,
|
||||
)
|
||||
from ee.onyx.server.middleware.tenant_tracking import (
|
||||
add_api_server_tenant_id_middleware,
|
||||
)
|
||||
from ee.onyx.server.oauth.api import router as ee_oauth_router
|
||||
from ee.onyx.server.query_and_chat.chat_backend import (
|
||||
router as chat_router,
|
||||
)
|
||||
from ee.onyx.server.query_and_chat.query_backend import (
|
||||
basic_router as ee_query_router,
|
||||
)
|
||||
from ee.onyx.server.query_and_chat.search_backend import router as search_router
|
||||
from ee.onyx.server.query_history.api import router as query_history_router
|
||||
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
|
||||
from ee.onyx.server.seeding import seed_db
|
||||
@@ -86,10 +85,6 @@ def get_application() -> FastAPI:
|
||||
if MULTI_TENANT:
|
||||
add_api_server_tenant_id_middleware(application, logger)
|
||||
|
||||
# Add license enforcement middleware (runs after tenant tracking)
|
||||
# This blocks access when license is expired/gated
|
||||
add_license_enforcement_middleware(application, logger)
|
||||
|
||||
if AUTH_TYPE == AuthType.CLOUD:
|
||||
# For Google OAuth, refresh tokens are requested by:
|
||||
# 1. Adding the right scopes
|
||||
@@ -129,7 +124,7 @@ def get_application() -> FastAPI:
|
||||
# EE only backend APIs
|
||||
include_router_with_global_prefix_prepended(application, query_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_query_router)
|
||||
include_router_with_global_prefix_prepended(application, search_router)
|
||||
include_router_with_global_prefix_prepended(application, chat_router)
|
||||
include_router_with_global_prefix_prepended(application, standard_answer_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_oauth_router)
|
||||
include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
# Single message is likely most reliable and generally better for this task
|
||||
# No final reminders at the end since the user query is expected to be short
|
||||
# If it is not short, it should go into the chat flow so we do not need to account for this.
|
||||
KEYWORD_EXPANSION_PROMPT = """
|
||||
Generate a set of keyword-only queries to help find relevant documents for the provided query. \
|
||||
These queries will be passed to a bm25-based keyword search engine. \
|
||||
Provide a single query per line (where each query consists of one or more keywords). \
|
||||
The queries must be purely keywords and not contain any filler natural language. \
|
||||
The each query should have as few keywords as necessary to represent the user's search intent. \
|
||||
If there are no useful expansions, simply return the original query with no additional keyword queries. \
|
||||
CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.
|
||||
|
||||
The user query is:
|
||||
{user_query}
|
||||
""".strip()
|
||||
|
||||
|
||||
QUERY_TYPE_PROMPT = """
|
||||
Determine if the provided query is better suited for a keyword search or a semantic search.
|
||||
Respond with "keyword" or "semantic" literally and nothing else.
|
||||
Do not provide any additional text or reasoning to your response.
|
||||
|
||||
CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".
|
||||
|
||||
The user query is:
|
||||
{user_query}
|
||||
""".strip()
|
||||
@@ -1,42 +0,0 @@
|
||||
# ruff: noqa: E501, W605 start
|
||||
SEARCH_CLASS = "search"
|
||||
CHAT_CLASS = "chat"
|
||||
|
||||
# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
|
||||
# use cases.
|
||||
SEARCH_CHAT_PROMPT = f"""
|
||||
Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
|
||||
Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".
|
||||
|
||||
# Classification Guidelines:
|
||||
## {SEARCH_CLASS}
|
||||
- If the query consists entirely of keywords or query doesn't require any answer from the AI
|
||||
- If the query is a short statement that seems like a search query rather than a question
|
||||
- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document
|
||||
|
||||
### Examples of {SEARCH_CLASS} queries:
|
||||
- Find me the document that goes over the onboarding process for a new hire
|
||||
- Pull requests since last week
|
||||
- Sales Runbook AMEA Region
|
||||
- Procurement process
|
||||
- Retrieve the PRD for project X
|
||||
|
||||
## {CHAT_CLASS}
|
||||
- If the query is asking a question that requires an answer rather than a document
|
||||
- If the query is asking for a solution, suggestion, or general help
|
||||
- If the query is seeking information that is on the web and likely not in a company internal document
|
||||
- If the query should be answered without any context from additional documents or searches
|
||||
|
||||
### Examples of {CHAT_CLASS} queries:
|
||||
- What led us to win the deal with company X? (seeking answer)
|
||||
- Google Drive not sync-ing files to my computer (seeking solution)
|
||||
- Review my email: <whatever the email is> (general help)
|
||||
- Write me a script to... (general help)
|
||||
- Cheap flights Europe to Tokyo (information likely found on the web, not internal)
|
||||
|
||||
# User Query:
|
||||
{{user_query}}
|
||||
|
||||
REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
|
||||
""".strip()
|
||||
# ruff: noqa: E501, W605 end
|
||||
@@ -1,286 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.search import create_search_query
|
||||
from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
|
||||
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
|
||||
from ee.onyx.server.query_and_chat.models import SearchFullResponse
|
||||
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
|
||||
from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
|
||||
from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
|
||||
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
|
||||
from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import ChunkSearchRequest
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.pipeline import merge_individual_chunks
|
||||
from onyx.context.search.pipeline import search_pipeline
|
||||
from onyx.db.models import User
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.document_index.interfaces import DocumentIndex
|
||||
from onyx.llm.factory import get_default_llm
|
||||
from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
|
||||
from onyx.tools.tool_implementations.search.search_utils import (
|
||||
weighted_reciprocal_rank_fusion,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
# This is just a heuristic that also happens to work well for the UI/UX
|
||||
# Users would not find it useful to see a huge list of suggested docs
|
||||
# but more than 1 is also likely good as many questions may target more than 1 doc.
|
||||
TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3
|
||||
|
||||
|
||||
def _run_single_search(
|
||||
query: str,
|
||||
filters: BaseFilters | None,
|
||||
document_index: DocumentIndex,
|
||||
user: User | None,
|
||||
db_session: Session,
|
||||
num_hits: int | None = None,
|
||||
) -> list[InferenceChunk]:
|
||||
"""Execute a single search query and return chunks."""
|
||||
chunk_search_request = ChunkSearchRequest(
|
||||
query=query,
|
||||
user_selected_filters=filters,
|
||||
limit=num_hits,
|
||||
)
|
||||
|
||||
return search_pipeline(
|
||||
chunk_search_request=chunk_search_request,
|
||||
document_index=document_index,
|
||||
user=user,
|
||||
persona=None, # No persona for direct search
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
|
||||
def stream_search_query(
|
||||
request: SendSearchQueryRequest,
|
||||
user: User | None,
|
||||
db_session: Session,
|
||||
) -> Generator[
|
||||
SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
|
||||
None,
|
||||
None,
|
||||
]:
|
||||
"""
|
||||
Core search function that yields streaming packets.
|
||||
Used by both streaming and non-streaming endpoints.
|
||||
"""
|
||||
# Get document index
|
||||
search_settings = get_current_search_settings(db_session)
|
||||
# This flow is for search so we do not get all indices.
|
||||
document_index = get_default_document_index(search_settings, None)
|
||||
|
||||
# Determine queries to execute
|
||||
original_query = request.search_query
|
||||
keyword_expansions: list[str] = []
|
||||
|
||||
if request.run_query_expansion:
|
||||
try:
|
||||
llm = get_default_llm()
|
||||
keyword_expansions = expand_keywords(
|
||||
user_query=original_query,
|
||||
llm=llm,
|
||||
)
|
||||
if keyword_expansions:
|
||||
logger.debug(
|
||||
f"Query expansion generated {len(keyword_expansions)} keyword queries"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Query expansion failed: {e}; using original query only.")
|
||||
keyword_expansions = []
|
||||
|
||||
# Build list of all executed queries for tracking
|
||||
all_executed_queries = [original_query] + keyword_expansions
|
||||
|
||||
# TODO remove this check, user should not be None
|
||||
if user is not None:
|
||||
create_search_query(
|
||||
db_session=db_session,
|
||||
user_id=user.id,
|
||||
query=request.search_query,
|
||||
query_expansions=keyword_expansions if keyword_expansions else None,
|
||||
)
|
||||
|
||||
# Execute search(es)
|
||||
if not keyword_expansions:
|
||||
# Single query (original only) - no threading needed
|
||||
chunks = _run_single_search(
|
||||
query=original_query,
|
||||
filters=request.filters,
|
||||
document_index=document_index,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
num_hits=request.num_hits,
|
||||
)
|
||||
else:
|
||||
# Multiple queries - run in parallel and merge with RRF
|
||||
# First query is the original (semantic), rest are keyword expansions
|
||||
search_functions = [
|
||||
(
|
||||
_run_single_search,
|
||||
(
|
||||
query,
|
||||
request.filters,
|
||||
document_index,
|
||||
user,
|
||||
db_session,
|
||||
request.num_hits,
|
||||
),
|
||||
)
|
||||
for query in all_executed_queries
|
||||
]
|
||||
|
||||
# Run all searches in parallel
|
||||
all_search_results: list[list[InferenceChunk]] = (
|
||||
run_functions_tuples_in_parallel(
|
||||
search_functions,
|
||||
allow_failures=True,
|
||||
)
|
||||
)
|
||||
|
||||
# Separate original query results from keyword expansion results
|
||||
# Note that in rare cases, the original query may have failed and so we may be
|
||||
# just overweighting one set of keyword results, should be not a big deal though.
|
||||
original_result = all_search_results[0] if all_search_results else []
|
||||
keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []
|
||||
|
||||
# Build valid results and weights
|
||||
# Original query (semantic): weight 2.0
|
||||
# Keyword expansions: weight 1.0 each
|
||||
valid_results: list[list[InferenceChunk]] = []
|
||||
weights: list[float] = []
|
||||
|
||||
if original_result:
|
||||
valid_results.append(original_result)
|
||||
weights.append(2.0)
|
||||
|
||||
for keyword_result in keyword_results:
|
||||
if keyword_result:
|
||||
valid_results.append(keyword_result)
|
||||
weights.append(1.0)
|
||||
|
||||
if not valid_results:
|
||||
logger.warning("All parallel searches returned empty results")
|
||||
chunks = []
|
||||
else:
|
||||
chunks = weighted_reciprocal_rank_fusion(
|
||||
ranked_results=valid_results,
|
||||
weights=weights,
|
||||
id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
|
||||
)
|
||||
|
||||
# Merge chunks into sections
|
||||
sections = merge_individual_chunks(chunks)
|
||||
|
||||
# Truncate to the requested number of hits
|
||||
sections = sections[: request.num_hits]
|
||||
|
||||
# Apply LLM document selection if requested
|
||||
# num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
|
||||
# The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
|
||||
# llm_selected_doc_ids will be:
|
||||
# - None if LLM selection was not requested or failed
|
||||
# - Empty list if LLM selection ran but selected nothing
|
||||
# - List of doc IDs if LLM selection succeeded
|
||||
run_llm_selection = (
|
||||
request.num_docs_fed_to_llm_selection is not None
|
||||
and request.num_docs_fed_to_llm_selection >= 1
|
||||
)
|
||||
llm_selected_doc_ids: list[str] | None = None
|
||||
llm_selection_failed = False
|
||||
if run_llm_selection and sections:
|
||||
try:
|
||||
llm = get_default_llm()
|
||||
sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
|
||||
selected_sections, _ = select_sections_for_expansion(
|
||||
sections=sections_to_evaluate,
|
||||
user_query=original_query,
|
||||
llm=llm,
|
||||
max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
|
||||
try_to_fill_to_max=True,
|
||||
)
|
||||
# Extract unique document IDs from selected sections (may be empty)
|
||||
llm_selected_doc_ids = list(
|
||||
dict.fromkeys(
|
||||
section.center_chunk.document_id for section in selected_sections
|
||||
)
|
||||
)
|
||||
logger.debug(
|
||||
f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
|
||||
f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
|
||||
)
|
||||
except Exception as e:
|
||||
# Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
|
||||
logger.warning(f"LLM document selection failed: {e}")
|
||||
llm_selection_failed = True
|
||||
elif run_llm_selection and not sections:
|
||||
# LLM selection requested but no sections to evaluate
|
||||
llm_selected_doc_ids = []
|
||||
|
||||
# Convert to SearchDocWithContent list, optionally including content
|
||||
search_docs = SearchDocWithContent.from_inference_sections(
|
||||
sections,
|
||||
include_content=request.include_content,
|
||||
is_internet=False,
|
||||
)
|
||||
|
||||
# Yield queries packet
|
||||
yield SearchQueriesPacket(all_executed_queries=all_executed_queries)
|
||||
|
||||
# Yield docs packet
|
||||
yield SearchDocsPacket(search_docs=search_docs)
|
||||
|
||||
# Yield LLM selected docs packet if LLM selection was requested
|
||||
# - llm_selected_doc_ids is None if selection failed
|
||||
# - llm_selected_doc_ids is empty list if no docs were selected
|
||||
# - llm_selected_doc_ids is list of IDs if docs were selected
|
||||
if run_llm_selection:
|
||||
yield LLMSelectedDocsPacket(
|
||||
llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
|
||||
)
|
||||
|
||||
|
||||
def gather_search_stream(
|
||||
packets: Generator[
|
||||
SearchQueriesPacket
|
||||
| SearchDocsPacket
|
||||
| LLMSelectedDocsPacket
|
||||
| SearchErrorPacket,
|
||||
None,
|
||||
None,
|
||||
],
|
||||
) -> SearchFullResponse:
|
||||
"""
|
||||
Aggregate all streaming packets into SearchFullResponse.
|
||||
"""
|
||||
all_executed_queries: list[str] = []
|
||||
search_docs: list[SearchDocWithContent] = []
|
||||
llm_selected_doc_ids: list[str] | None = None
|
||||
error: str | None = None
|
||||
|
||||
for packet in packets:
|
||||
if isinstance(packet, SearchQueriesPacket):
|
||||
all_executed_queries = packet.all_executed_queries
|
||||
elif isinstance(packet, SearchDocsPacket):
|
||||
search_docs = packet.search_docs
|
||||
elif isinstance(packet, LLMSelectedDocsPacket):
|
||||
llm_selected_doc_ids = packet.llm_selected_doc_ids
|
||||
elif isinstance(packet, SearchErrorPacket):
|
||||
error = packet.error
|
||||
|
||||
return SearchFullResponse(
|
||||
all_executed_queries=all_executed_queries,
|
||||
search_docs=search_docs,
|
||||
doc_selection_reasoning=None,
|
||||
llm_selected_doc_ids=llm_selected_doc_ids,
|
||||
error=error,
|
||||
)
|
||||
@@ -1,92 +0,0 @@
|
||||
import re
|
||||
|
||||
from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.llm.models import LanguageModelInput
|
||||
from onyx.llm.models import ReasoningEffort
|
||||
from onyx.llm.models import UserMessage
|
||||
from onyx.llm.utils import llm_response_to_string
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
|
||||
CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')
|
||||
|
||||
|
||||
def _clean_keyword_line(line: str) -> str:
|
||||
"""Clean a keyword line by removing common LLM artifacts.
|
||||
|
||||
Removes brackets, quotes, and other characters that LLMs may accidentally
|
||||
include in their output.
|
||||
"""
|
||||
# Remove common artifacts
|
||||
cleaned = CLEANUP_PATTERN.sub("", line)
|
||||
# Remove leading list markers like "1.", "2.", "-", "*"
|
||||
cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
|
||||
def expand_keywords(
|
||||
user_query: str,
|
||||
llm: LLM,
|
||||
) -> list[str]:
|
||||
"""Expand a user query into multiple keyword-only queries for BM25 search.
|
||||
|
||||
Uses an LLM to generate keyword-based search queries that capture different
|
||||
aspects of the user's search intent. Returns only the expanded queries,
|
||||
not the original query.
|
||||
|
||||
Args:
|
||||
user_query: The original search query from the user
|
||||
llm: Language model to use for keyword expansion
|
||||
|
||||
Returns:
|
||||
List of expanded keyword queries (excluding the original query).
|
||||
Returns empty list if expansion fails or produces no useful expansions.
|
||||
"""
|
||||
messages: LanguageModelInput = [
|
||||
UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
|
||||
]
|
||||
|
||||
try:
|
||||
response = llm.invoke(
|
||||
prompt=messages,
|
||||
reasoning_effort=ReasoningEffort.OFF,
|
||||
# Limit output - we only expect a few short keyword queries
|
||||
max_tokens=150,
|
||||
)
|
||||
|
||||
content = llm_response_to_string(response).strip()
|
||||
|
||||
if not content:
|
||||
logger.warning("Keyword expansion returned empty response.")
|
||||
return []
|
||||
|
||||
# Parse response - each line is a separate keyword query
|
||||
# Clean each line to remove LLM artifacts and drop empty lines
|
||||
parsed_queries = []
|
||||
for line in content.strip().split("\n"):
|
||||
cleaned = _clean_keyword_line(line)
|
||||
if cleaned:
|
||||
parsed_queries.append(cleaned)
|
||||
|
||||
if not parsed_queries:
|
||||
logger.warning("Keyword expansion parsing returned no queries.")
|
||||
return []
|
||||
|
||||
# Filter out duplicates and queries that match the original
|
||||
expanded_queries: list[str] = []
|
||||
seen_lower: set[str] = {user_query.lower()}
|
||||
for query in parsed_queries:
|
||||
query_lower = query.lower()
|
||||
if query_lower not in seen_lower:
|
||||
seen_lower.add(query_lower)
|
||||
expanded_queries.append(query)
|
||||
|
||||
logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
|
||||
return expanded_queries
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Keyword expansion failed: {e}")
|
||||
return []
|
||||
@@ -1,50 +0,0 @@
|
||||
from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
|
||||
from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
|
||||
from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.llm.models import LanguageModelInput
|
||||
from onyx.llm.models import ReasoningEffort
|
||||
from onyx.llm.models import UserMessage
|
||||
from onyx.llm.utils import llm_response_to_string
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.timing import log_function_time
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
def classify_is_search_flow(
|
||||
query: str,
|
||||
llm: LLM,
|
||||
) -> bool:
|
||||
messages: LanguageModelInput = [
|
||||
UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
|
||||
]
|
||||
response = llm.invoke(
|
||||
prompt=messages,
|
||||
reasoning_effort=ReasoningEffort.OFF,
|
||||
# Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
|
||||
timeout_override=2,
|
||||
# Well more than necessary but just to ensure completion and in case it succeeds with classifying but
|
||||
# ends up rambling
|
||||
max_tokens=20,
|
||||
)
|
||||
|
||||
content = llm_response_to_string(response).strip().lower()
|
||||
if not content:
|
||||
logger.warning(
|
||||
"Search flow classification returned empty response; defaulting to chat flow."
|
||||
)
|
||||
return False
|
||||
|
||||
# Prefer chat if both appear.
|
||||
if CHAT_CLASS in content:
|
||||
return False
|
||||
if SEARCH_CLASS in content:
|
||||
return True
|
||||
|
||||
logger.warning(
|
||||
"Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
|
||||
content,
|
||||
)
|
||||
return False
|
||||
@@ -19,9 +19,9 @@ from ee.onyx.db.analytics import fetch_query_analytics
|
||||
from ee.onyx.db.analytics import user_can_view_assistant_stats
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.configs.constants import PUBLIC_API_TAGS
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import User
|
||||
from onyx.server.utils import PUBLIC_API_TAGS
|
||||
|
||||
router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)
|
||||
|
||||
|
||||
@@ -10,8 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
|
||||
("/enterprise-settings/logo", {"GET"}),
|
||||
("/enterprise-settings/logotype", {"GET"}),
|
||||
("/enterprise-settings/custom-analytics-script", {"GET"}),
|
||||
# Stripe publishable key is safe to expose publicly
|
||||
("/tenants/stripe-publishable-key", {"GET"}),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
"""Middleware to enforce license status application-wide."""
|
||||
|
||||
import logging
|
||||
from collections.abc import Awaitable
|
||||
from collections.abc import Callable
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi import Request
|
||||
from fastapi import Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
|
||||
from ee.onyx.db.license import get_cached_license_metadata
|
||||
from ee.onyx.server.tenants.product_gating import is_tenant_gated
|
||||
from onyx.server.settings.models import ApplicationStatus
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
# Paths that are ALWAYS accessible, even when license is expired/gated.
|
||||
# These enable users to:
|
||||
# /auth - Log in/out (users can't fix billing if locked out of auth)
|
||||
# /license - Fetch, upload, or check license status
|
||||
# /health - Health checks for load balancers/orchestrators
|
||||
# /me - Basic user info needed for UI rendering
|
||||
# /settings, /enterprise-settings - View app status and branding
|
||||
# /tenants/billing-* - Manage subscription to resolve gating
|
||||
ALLOWED_PATH_PREFIXES = {
|
||||
"/auth",
|
||||
"/license",
|
||||
"/health",
|
||||
"/me",
|
||||
"/settings",
|
||||
"/enterprise-settings",
|
||||
"/tenants/billing-information",
|
||||
"/tenants/create-customer-portal-session",
|
||||
"/tenants/create-subscription-session",
|
||||
}
|
||||
|
||||
|
||||
def _is_path_allowed(path: str) -> bool:
|
||||
"""Check if path is in allowlist (prefix match)."""
|
||||
return any(path.startswith(prefix) for prefix in ALLOWED_PATH_PREFIXES)
|
||||
|
||||
|
||||
def add_license_enforcement_middleware(
|
||||
app: FastAPI, logger: logging.LoggerAdapter
|
||||
) -> None:
|
||||
logger.info("License enforcement middleware registered")
|
||||
|
||||
@app.middleware("http")
|
||||
async def enforce_license(
|
||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||
) -> Response:
|
||||
"""Block requests when license is expired/gated."""
|
||||
if not LICENSE_ENFORCEMENT_ENABLED:
|
||||
return await call_next(request)
|
||||
|
||||
path = request.url.path
|
||||
if path.startswith("/api"):
|
||||
path = path[4:]
|
||||
|
||||
if _is_path_allowed(path):
|
||||
return await call_next(request)
|
||||
|
||||
is_gated = False
|
||||
tenant_id = get_current_tenant_id()
|
||||
|
||||
if MULTI_TENANT:
|
||||
try:
|
||||
is_gated = is_tenant_gated(tenant_id)
|
||||
except RedisError as e:
|
||||
logger.warning(f"Failed to check tenant gating status: {e}")
|
||||
# Fail open - don't block users due to Redis connectivity issues
|
||||
is_gated = False
|
||||
else:
|
||||
try:
|
||||
metadata = get_cached_license_metadata(tenant_id)
|
||||
if metadata:
|
||||
if metadata.status == ApplicationStatus.GATED_ACCESS:
|
||||
is_gated = True
|
||||
else:
|
||||
# No license metadata = gated for self-hosted EE
|
||||
is_gated = True
|
||||
except RedisError as e:
|
||||
logger.warning(f"Failed to check license metadata: {e}")
|
||||
# Fail open - don't block users due to Redis connectivity issues
|
||||
is_gated = False
|
||||
|
||||
if is_gated:
|
||||
logger.info(f"Blocking request for gated tenant: {tenant_id}, path={path}")
|
||||
return JSONResponse(
|
||||
status_code=402,
|
||||
content={
|
||||
"detail": {
|
||||
"error": "license_expired",
|
||||
"message": "Your subscription has expired. Please update your billing.",
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
return await call_next(request)
|
||||
217
backend/ee/onyx/server/query_and_chat/chat_backend.py
Normal file
217
backend/ee/onyx/server/query_and_chat/chat_backend.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.server.query_and_chat.models import BasicCreateChatMessageRequest
|
||||
from ee.onyx.server.query_and_chat.models import (
|
||||
BasicCreateChatMessageWithHistoryRequest,
|
||||
)
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.chat.chat_utils import create_chat_history_chain
|
||||
from onyx.chat.models import ChatBasicResponse
|
||||
from onyx.chat.process_message import gather_stream
|
||||
from onyx.chat.process_message import stream_chat_message_objects
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.models import OptionalSearchSetting
|
||||
from onyx.context.search.models import RetrievalDetails
|
||||
from onyx.db.chat import create_chat_session
|
||||
from onyx.db.chat import create_new_chat_message
|
||||
from onyx.db.chat import get_or_create_root_message
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import User
|
||||
from onyx.llm.factory import get_llm_for_persona
|
||||
from onyx.natural_language_processing.utils import get_tokenizer
|
||||
from onyx.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from onyx.server.query_and_chat.models import MessageOrigin
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/chat")
|
||||
|
||||
|
||||
@router.post("/send-message-simple-api")
|
||||
def handle_simplified_chat_message(
|
||||
chat_message_req: BasicCreateChatMessageRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> ChatBasicResponse:
|
||||
"""This is a Non-Streaming version that only gives back a minimal set of information"""
|
||||
logger.notice(f"Received new simple api chat message: {chat_message_req.message}")
|
||||
|
||||
if not chat_message_req.message:
|
||||
raise HTTPException(status_code=400, detail="Empty chat message is invalid")
|
||||
|
||||
# Handle chat session creation if chat_session_id is not provided
|
||||
if chat_message_req.chat_session_id is None:
|
||||
if chat_message_req.persona_id is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Either chat_session_id or persona_id must be provided",
|
||||
)
|
||||
|
||||
# Create a new chat session with the provided persona_id
|
||||
try:
|
||||
new_chat_session = create_chat_session(
|
||||
db_session=db_session,
|
||||
description="", # Leave empty for simple API
|
||||
user_id=user.id if user else None,
|
||||
persona_id=chat_message_req.persona_id,
|
||||
)
|
||||
chat_session_id = new_chat_session.id
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
raise HTTPException(status_code=400, detail="Invalid Persona provided.")
|
||||
else:
|
||||
chat_session_id = chat_message_req.chat_session_id
|
||||
|
||||
try:
|
||||
parent_message = create_chat_history_chain(
|
||||
chat_session_id=chat_session_id, db_session=db_session
|
||||
)[-1]
|
||||
except Exception:
|
||||
parent_message = get_or_create_root_message(
|
||||
chat_session_id=chat_session_id, db_session=db_session
|
||||
)
|
||||
|
||||
if (
|
||||
chat_message_req.retrieval_options is None
|
||||
and chat_message_req.search_doc_ids is None
|
||||
):
|
||||
retrieval_options: RetrievalDetails | None = RetrievalDetails(
|
||||
run_search=OptionalSearchSetting.ALWAYS,
|
||||
real_time=False,
|
||||
)
|
||||
else:
|
||||
retrieval_options = chat_message_req.retrieval_options
|
||||
|
||||
full_chat_msg_info = CreateChatMessageRequest(
|
||||
chat_session_id=chat_session_id,
|
||||
parent_message_id=parent_message.id,
|
||||
message=chat_message_req.message,
|
||||
file_descriptors=[],
|
||||
search_doc_ids=chat_message_req.search_doc_ids,
|
||||
retrieval_options=retrieval_options,
|
||||
# Simple API does not support reranking, hide complexity from user
|
||||
rerank_settings=None,
|
||||
query_override=chat_message_req.query_override,
|
||||
# Currently only applies to search flow not chat
|
||||
chunks_above=0,
|
||||
chunks_below=0,
|
||||
full_doc=chat_message_req.full_doc,
|
||||
structured_response_format=chat_message_req.structured_response_format,
|
||||
origin=MessageOrigin.API,
|
||||
)
|
||||
|
||||
packets = stream_chat_message_objects(
|
||||
new_msg_req=full_chat_msg_info,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
return gather_stream(packets)
|
||||
|
||||
|
||||
@router.post("/send-message-simple-with-history")
|
||||
def handle_send_message_simple_with_history(
|
||||
req: BasicCreateChatMessageWithHistoryRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> ChatBasicResponse:
|
||||
"""This is a Non-Streaming version that only gives back a minimal set of information.
|
||||
takes in chat history maintained by the caller
|
||||
and does query rephrasing similar to answer-with-quote"""
|
||||
|
||||
if len(req.messages) == 0:
|
||||
raise HTTPException(status_code=400, detail="Messages cannot be zero length")
|
||||
|
||||
# This is a sanity check to make sure the chat history is valid
|
||||
# It must start with a user message and alternate beteen user and assistant
|
||||
expected_role = MessageType.USER
|
||||
for msg in req.messages:
|
||||
if not msg.message:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="One or more chat messages were empty"
|
||||
)
|
||||
|
||||
if msg.role != expected_role:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Message roles must start and end with MessageType.USER and alternate in-between.",
|
||||
)
|
||||
if expected_role == MessageType.USER:
|
||||
expected_role = MessageType.ASSISTANT
|
||||
else:
|
||||
expected_role = MessageType.USER
|
||||
|
||||
query = req.messages[-1].message
|
||||
msg_history = req.messages[:-1]
|
||||
|
||||
logger.notice(f"Received new simple with history chat message: {query}")
|
||||
|
||||
user_id = user.id if user is not None else None
|
||||
chat_session = create_chat_session(
|
||||
db_session=db_session,
|
||||
description="handle_send_message_simple_with_history",
|
||||
user_id=user_id,
|
||||
persona_id=req.persona_id,
|
||||
)
|
||||
|
||||
llm = get_llm_for_persona(persona=chat_session.persona, user=user)
|
||||
|
||||
llm_tokenizer = get_tokenizer(
|
||||
model_name=llm.config.model_name,
|
||||
provider_type=llm.config.model_provider,
|
||||
)
|
||||
|
||||
# Every chat Session begins with an empty root message
|
||||
root_message = get_or_create_root_message(
|
||||
chat_session_id=chat_session.id, db_session=db_session
|
||||
)
|
||||
|
||||
chat_message = root_message
|
||||
for msg in msg_history:
|
||||
chat_message = create_new_chat_message(
|
||||
chat_session_id=chat_session.id,
|
||||
parent_message=chat_message,
|
||||
message=msg.message,
|
||||
token_count=len(llm_tokenizer.encode(msg.message)),
|
||||
message_type=msg.role,
|
||||
db_session=db_session,
|
||||
commit=False,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
if req.retrieval_options is None and req.search_doc_ids is None:
|
||||
retrieval_options: RetrievalDetails | None = RetrievalDetails(
|
||||
run_search=OptionalSearchSetting.ALWAYS,
|
||||
real_time=False,
|
||||
)
|
||||
else:
|
||||
retrieval_options = req.retrieval_options
|
||||
|
||||
full_chat_msg_info = CreateChatMessageRequest(
|
||||
chat_session_id=chat_session.id,
|
||||
parent_message_id=chat_message.id,
|
||||
message=query,
|
||||
file_descriptors=[],
|
||||
search_doc_ids=req.search_doc_ids,
|
||||
retrieval_options=retrieval_options,
|
||||
# Simple API does not support reranking, hide complexity from user
|
||||
rerank_settings=None,
|
||||
query_override=None,
|
||||
chunks_above=0,
|
||||
chunks_below=0,
|
||||
full_doc=req.full_doc,
|
||||
structured_response_format=req.structured_response_format,
|
||||
origin=MessageOrigin.API,
|
||||
)
|
||||
|
||||
packets = stream_chat_message_objects(
|
||||
new_msg_req=full_chat_msg_info,
|
||||
user=user,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
return gather_stream(packets)
|
||||
@@ -1,12 +1,18 @@
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from collections import OrderedDict
|
||||
from typing import Literal
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import model_validator
|
||||
|
||||
from onyx.chat.models import ThreadMessage
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.context.search.models import BasicChunkRequest
|
||||
from onyx.context.search.models import ChunkContext
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import RetrievalDetails
|
||||
from onyx.server.manage.models import StandardAnswer
|
||||
|
||||
|
||||
@@ -19,89 +25,119 @@ class StandardAnswerResponse(BaseModel):
|
||||
standard_answers: list[StandardAnswer] = Field(default_factory=list)
|
||||
|
||||
|
||||
class SearchFlowClassificationRequest(BaseModel):
|
||||
user_query: str
|
||||
class DocumentSearchRequest(BasicChunkRequest):
|
||||
user_selected_filters: BaseFilters | None = None
|
||||
|
||||
|
||||
class SearchFlowClassificationResponse(BaseModel):
|
||||
is_search_flow: bool
|
||||
class DocumentSearchResponse(BaseModel):
|
||||
top_documents: list[InferenceChunk]
|
||||
|
||||
|
||||
class SendSearchQueryRequest(BaseModel):
|
||||
search_query: str
|
||||
filters: BaseFilters | None = None
|
||||
num_docs_fed_to_llm_selection: int | None = None
|
||||
run_query_expansion: bool = False
|
||||
num_hits: int = 50
|
||||
class BasicCreateChatMessageRequest(ChunkContext):
|
||||
"""If a chat_session_id is not provided, a persona_id must be provided to automatically create a new chat session
|
||||
Note, for simplicity this option only allows for a single linear chain of messages
|
||||
"""
|
||||
|
||||
include_content: bool = False
|
||||
stream: bool = False
|
||||
chat_session_id: UUID | None = None
|
||||
# Optional persona_id to create a new chat session if chat_session_id is not provided
|
||||
persona_id: int | None = None
|
||||
# New message contents
|
||||
message: str
|
||||
# Defaults to using retrieval with no additional filters
|
||||
retrieval_options: RetrievalDetails | None = None
|
||||
# Allows the caller to specify the exact search query they want to use
|
||||
# will disable Query Rewording if specified
|
||||
query_override: str | None = None
|
||||
# If search_doc_ids provided, then retrieval options are unused
|
||||
search_doc_ids: list[int] | None = None
|
||||
# only works if using an OpenAI model. See the following for more details:
|
||||
# https://platform.openai.com/docs/guides/structured-outputs/introduction
|
||||
structured_response_format: dict | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
|
||||
if self.chat_session_id is None and self.persona_id is None:
|
||||
raise ValueError("Either chat_session_id or persona_id must be provided")
|
||||
return self
|
||||
|
||||
|
||||
class SearchDocWithContent(SearchDoc):
|
||||
# Allows None because this is determined by a flag but the object used in code
|
||||
# of the search path uses this type
|
||||
content: str | None
|
||||
class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
|
||||
# Last element is the new query. All previous elements are historical context
|
||||
messages: list[ThreadMessage]
|
||||
persona_id: int
|
||||
retrieval_options: RetrievalDetails | None = None
|
||||
query_override: str | None = None
|
||||
skip_rerank: bool | None = None
|
||||
# If search_doc_ids provided, then retrieval options are unused
|
||||
search_doc_ids: list[int] | None = None
|
||||
# only works if using an OpenAI model. See the following for more details:
|
||||
# https://platform.openai.com/docs/guides/structured-outputs/introduction
|
||||
structured_response_format: dict | None = None
|
||||
|
||||
@classmethod
|
||||
def from_inference_sections(
|
||||
cls,
|
||||
sections: Sequence[InferenceSection],
|
||||
include_content: bool = False,
|
||||
is_internet: bool = False,
|
||||
) -> list["SearchDocWithContent"]:
|
||||
"""Convert InferenceSections to SearchDocWithContent objects.
|
||||
|
||||
Args:
|
||||
sections: Sequence of InferenceSection objects
|
||||
include_content: If True, populate content field with combined_content
|
||||
is_internet: Whether these are internet search results
|
||||
class SimpleDoc(BaseModel):
|
||||
id: str
|
||||
semantic_identifier: str
|
||||
link: str | None
|
||||
blurb: str
|
||||
match_highlights: list[str]
|
||||
source_type: DocumentSource
|
||||
metadata: dict | None
|
||||
|
||||
Returns:
|
||||
List of SearchDocWithContent with optional content
|
||||
|
||||
class AgentSubQuestion(BaseModel):
|
||||
sub_question: str
|
||||
document_ids: list[str]
|
||||
|
||||
|
||||
class AgentAnswer(BaseModel):
|
||||
answer: str
|
||||
answer_type: Literal["agent_sub_answer", "agent_level_answer"]
|
||||
|
||||
|
||||
class AgentSubQuery(BaseModel):
|
||||
sub_query: str
|
||||
query_id: int
|
||||
|
||||
@staticmethod
|
||||
def make_dict_by_level_and_question_index(
|
||||
original_dict: dict[tuple[int, int, int], "AgentSubQuery"],
|
||||
) -> dict[int, dict[int, list["AgentSubQuery"]]]:
|
||||
"""Takes a dict of tuple(level, question num, query_id) to sub queries.
|
||||
|
||||
returns a dict of level to dict[question num to list of query_id's]
|
||||
Ordering is asc for readability.
|
||||
"""
|
||||
if not sections:
|
||||
return []
|
||||
# In this function, when we sort int | None, we deliberately push None to the end
|
||||
|
||||
return [
|
||||
cls(
|
||||
document_id=(chunk := section.center_chunk).document_id,
|
||||
chunk_ind=chunk.chunk_id,
|
||||
semantic_identifier=chunk.semantic_identifier or "Unknown",
|
||||
link=chunk.source_links[0] if chunk.source_links else None,
|
||||
blurb=chunk.blurb,
|
||||
source_type=chunk.source_type,
|
||||
boost=chunk.boost,
|
||||
hidden=chunk.hidden,
|
||||
metadata=chunk.metadata,
|
||||
score=chunk.score,
|
||||
match_highlights=chunk.match_highlights,
|
||||
updated_at=chunk.updated_at,
|
||||
primary_owners=chunk.primary_owners,
|
||||
secondary_owners=chunk.secondary_owners,
|
||||
is_internet=is_internet,
|
||||
content=section.combined_content if include_content else None,
|
||||
# map entries to the level_question_dict
|
||||
level_question_dict: dict[int, dict[int, list["AgentSubQuery"]]] = {}
|
||||
for k1, obj in original_dict.items():
|
||||
level = k1[0]
|
||||
question = k1[1]
|
||||
|
||||
if level not in level_question_dict:
|
||||
level_question_dict[level] = {}
|
||||
|
||||
if question not in level_question_dict[level]:
|
||||
level_question_dict[level][question] = []
|
||||
|
||||
level_question_dict[level][question].append(obj)
|
||||
|
||||
# sort each query_id list and question_index
|
||||
for key1, obj1 in level_question_dict.items():
|
||||
for key2, value2 in obj1.items():
|
||||
# sort the query_id list of each question_index
|
||||
level_question_dict[key1][key2] = sorted(
|
||||
value2, key=lambda o: o.query_id
|
||||
)
|
||||
# sort the question_index dict of level
|
||||
level_question_dict[key1] = OrderedDict(
|
||||
sorted(level_question_dict[key1].items(), key=lambda x: (x is None, x))
|
||||
)
|
||||
for section in sections
|
||||
]
|
||||
|
||||
|
||||
class SearchFullResponse(BaseModel):
|
||||
all_executed_queries: list[str]
|
||||
search_docs: list[SearchDocWithContent]
|
||||
# Reasoning tokens output by the LLM for the document selection
|
||||
doc_selection_reasoning: str | None = None
|
||||
# This a list of document ids that are in the search_docs list
|
||||
llm_selected_doc_ids: list[str] | None = None
|
||||
# Error message if the search failed partway through
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class SearchQueryResponse(BaseModel):
|
||||
query: str
|
||||
query_expansions: list[str] | None
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class SearchHistoryResponse(BaseModel):
|
||||
search_queries: list[SearchQueryResponse]
|
||||
# sort the top dict of levels
|
||||
sorted_dict = OrderedDict(
|
||||
sorted(level_question_dict.items(), key=lambda x: (x is None, x))
|
||||
)
|
||||
return sorted_dict
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.search import fetch_search_queries_for_user
|
||||
from ee.onyx.search.process_search_query import gather_search_stream
|
||||
from ee.onyx.search.process_search_query import stream_search_query
|
||||
from ee.onyx.secondary_llm_flows.search_flow_classification import (
|
||||
classify_is_search_flow,
|
||||
)
|
||||
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest
|
||||
from ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse
|
||||
from ee.onyx.server.query_and_chat.models import SearchFullResponse
|
||||
from ee.onyx.server.query_and_chat.models import SearchHistoryResponse
|
||||
from ee.onyx.server.query_and_chat.models import SearchQueryResponse
|
||||
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
|
||||
from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.models import User
|
||||
from onyx.llm.factory import get_default_llm
|
||||
from onyx.server.usage_limits import check_llm_cost_limit_for_provider
|
||||
from onyx.server.utils import get_json_line
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/search")
|
||||
|
||||
|
||||
@router.post("/search-flow-classification")
|
||||
def search_flow_classification(
|
||||
request: SearchFlowClassificationRequest,
|
||||
# This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
|
||||
_: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> SearchFlowClassificationResponse:
|
||||
query = request.user_query
|
||||
# This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document
|
||||
# Most likely something needs to be done with the text included so we'll just classify it as a chat flow
|
||||
if len(query) > 200:
|
||||
return SearchFlowClassificationResponse(is_search_flow=False)
|
||||
|
||||
llm = get_default_llm()
|
||||
|
||||
check_llm_cost_limit_for_provider(
|
||||
db_session=db_session,
|
||||
tenant_id=get_current_tenant_id(),
|
||||
llm_provider_api_key=llm.config.api_key,
|
||||
)
|
||||
|
||||
try:
|
||||
is_search_flow = classify_is_search_flow(query=query, llm=llm)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Search flow classification failed; defaulting to chat flow",
|
||||
exc_info=e,
|
||||
)
|
||||
is_search_flow = False
|
||||
|
||||
return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
|
||||
|
||||
|
||||
@router.post("/send-search-message", response_model=None)
|
||||
def handle_send_search_message(
|
||||
request: SendSearchQueryRequest,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> StreamingResponse | SearchFullResponse:
|
||||
"""
|
||||
Execute a search query with optional streaming.
|
||||
|
||||
When stream=True: Returns StreamingResponse with SSE
|
||||
When stream=False: Returns SearchFullResponse
|
||||
"""
|
||||
logger.debug(f"Received search query: {request.search_query}")
|
||||
|
||||
# Non-streaming path
|
||||
if not request.stream:
|
||||
try:
|
||||
packets = stream_search_query(request, user, db_session)
|
||||
return gather_search_stream(packets)
|
||||
except NotImplementedError as e:
|
||||
return SearchFullResponse(
|
||||
all_executed_queries=[],
|
||||
search_docs=[],
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
# Streaming path
|
||||
def stream_generator() -> Generator[str, None, None]:
|
||||
try:
|
||||
with get_session_with_current_tenant() as streaming_db_session:
|
||||
for packet in stream_search_query(request, user, streaming_db_session):
|
||||
yield get_json_line(packet.model_dump())
|
||||
except NotImplementedError as e:
|
||||
yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Error in search streaming")
|
||||
yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
|
||||
|
||||
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@router.get("/search-history")
|
||||
def get_search_history(
|
||||
limit: int = 100,
|
||||
filter_days: int | None = None,
|
||||
user: User | None = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> SearchHistoryResponse:
|
||||
"""
|
||||
Fetch past search queries for the authenticated user.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of queries to return (default 100)
|
||||
filter_days: Only return queries from the last N days (optional)
|
||||
|
||||
Returns:
|
||||
SearchHistoryResponse with list of search queries, ordered by most recent first.
|
||||
"""
|
||||
# Validate limit
|
||||
if limit <= 0:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="limit must be greater than 0",
|
||||
)
|
||||
if limit > 1000:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="limit must be at most 1000",
|
||||
)
|
||||
|
||||
# Validate filter_days
|
||||
if filter_days is not None and filter_days <= 0:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="filter_days must be greater than 0",
|
||||
)
|
||||
|
||||
# TODO(yuhong) remove this
|
||||
if user is None:
|
||||
# Return empty list for unauthenticated users
|
||||
return SearchHistoryResponse(search_queries=[])
|
||||
|
||||
search_queries = fetch_search_queries_for_user(
|
||||
db_session=db_session,
|
||||
user_id=user.id,
|
||||
filter_days=filter_days,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
return SearchHistoryResponse(
|
||||
search_queries=[
|
||||
SearchQueryResponse(
|
||||
query=sq.query,
|
||||
query_expansions=sq.query_expansions,
|
||||
created_at=sq.created_at,
|
||||
)
|
||||
for sq in search_queries
|
||||
]
|
||||
)
|
||||
@@ -1,35 +0,0 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from ee.onyx.server.query_and_chat.models import SearchDocWithContent
|
||||
|
||||
|
||||
class SearchQueriesPacket(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
type: Literal["search_queries"] = "search_queries"
|
||||
all_executed_queries: list[str]
|
||||
|
||||
|
||||
class SearchDocsPacket(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
type: Literal["search_docs"] = "search_docs"
|
||||
search_docs: list[SearchDocWithContent]
|
||||
|
||||
|
||||
class SearchErrorPacket(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
type: Literal["search_error"] = "search_error"
|
||||
error: str
|
||||
|
||||
|
||||
class LLMSelectedDocsPacket(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
type: Literal["llm_selected_docs"] = "llm_selected_docs"
|
||||
# None if LLM selection failed, empty list if no docs selected, list of IDs otherwise
|
||||
llm_selected_doc_ids: list[str] | None
|
||||
@@ -32,7 +32,6 @@ from onyx.configs.constants import MessageType
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import PUBLIC_API_TAGS
|
||||
from onyx.configs.constants import QAFeedbackType
|
||||
from onyx.configs.constants import QueryHistoryType
|
||||
from onyx.configs.constants import SessionType
|
||||
@@ -49,6 +48,7 @@ from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.server.documents.models import PaginatedReturn
|
||||
from onyx.server.query_and_chat.models import ChatSessionDetails
|
||||
from onyx.server.query_and_chat.models import ChatSessionsResponse
|
||||
from onyx.server.utils import PUBLIC_API_TAGS
|
||||
from onyx.utils.threadpool_concurrency import parallel_yield
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
"""EE Settings API - provides license-aware settings override."""
|
||||
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
|
||||
from ee.onyx.db.license import get_cached_license_metadata
|
||||
from onyx.server.settings.models import ApplicationStatus
|
||||
from onyx.server.settings.models import Settings
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Statuses that indicate a billing/license problem - propagate these to settings
|
||||
_GATED_STATUSES = frozenset(
|
||||
{
|
||||
ApplicationStatus.GATED_ACCESS,
|
||||
ApplicationStatus.GRACE_PERIOD,
|
||||
ApplicationStatus.PAYMENT_REMINDER,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def apply_license_status_to_settings(settings: Settings) -> Settings:
|
||||
"""EE version: checks license status for self-hosted deployments.
|
||||
|
||||
For self-hosted, looks up license metadata and overrides application_status
|
||||
if the license is missing or indicates a problem (expired, grace period, etc.).
|
||||
|
||||
For multi-tenant (cloud), the settings already have the correct status
|
||||
from the control plane, so no override is needed.
|
||||
|
||||
If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
|
||||
allowing the product to function normally without license checks.
|
||||
"""
|
||||
if not LICENSE_ENFORCEMENT_ENABLED:
|
||||
return settings
|
||||
|
||||
if MULTI_TENANT:
|
||||
return settings
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
try:
|
||||
metadata = get_cached_license_metadata(tenant_id)
|
||||
if metadata and metadata.status in _GATED_STATUSES:
|
||||
settings.application_status = metadata.status
|
||||
elif not metadata:
|
||||
# No license = gated access for self-hosted EE
|
||||
settings.application_status = ApplicationStatus.GATED_ACCESS
|
||||
except RedisError as e:
|
||||
logger.warning(f"Failed to check license metadata for settings: {e}")
|
||||
|
||||
return settings
|
||||
@@ -1,14 +1,10 @@
|
||||
"""Tenant-specific usage limit overrides from the control plane (EE version)."""
|
||||
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from ee.onyx.server.tenants.access import generate_data_plane_token
|
||||
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
|
||||
from onyx.configs.app_configs import DEV_MODE
|
||||
from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
|
||||
from onyx.server.usage_limits import NO_LIMIT
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@@ -16,12 +12,9 @@ logger = setup_logger()
|
||||
|
||||
# In-memory storage for tenant overrides (populated at startup)
|
||||
_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None
|
||||
_last_fetch_time: float = 0.0
|
||||
_FETCH_INTERVAL = 60 * 60 * 24 # 24 hours
|
||||
_ERROR_FETCH_INTERVAL = 30 * 60 # 30 minutes (if the last fetch failed)
|
||||
|
||||
|
||||
def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:
|
||||
def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides]:
|
||||
"""
|
||||
Fetch tenant-specific usage limit overrides from the control plane.
|
||||
|
||||
@@ -52,52 +45,33 @@ def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None
|
||||
f"Failed to parse usage limit overrides for tenant {tenant_id}: {e}"
|
||||
)
|
||||
|
||||
return (
|
||||
result or None
|
||||
) # if empty dictionary, something went wrong and we shouldn't enforce limits
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Failed to fetch usage limit overrides from control plane: {e}")
|
||||
return None
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing usage limit overrides: {e}")
|
||||
return None
|
||||
return {}
|
||||
|
||||
|
||||
def load_usage_limit_overrides() -> None:
|
||||
def load_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides]:
|
||||
"""
|
||||
Load tenant usage limit overrides from the control plane.
|
||||
|
||||
Called at server startup to populate the in-memory cache.
|
||||
"""
|
||||
global _tenant_usage_limit_overrides
|
||||
global _last_fetch_time
|
||||
|
||||
logger.info("Loading tenant usage limit overrides from control plane...")
|
||||
overrides = fetch_usage_limit_overrides()
|
||||
|
||||
_last_fetch_time = time.time()
|
||||
|
||||
# use the new result if it exists, otherwise use the old result
|
||||
# (prevents us from updating to a failed fetch result)
|
||||
_tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides
|
||||
_tenant_usage_limit_overrides = overrides
|
||||
|
||||
if overrides:
|
||||
logger.info(f"Loaded usage limit overrides for {len(overrides)} tenants")
|
||||
else:
|
||||
logger.info("No tenant-specific usage limit overrides found")
|
||||
|
||||
|
||||
def unlimited(tenant_id: str) -> TenantUsageLimitOverrides:
|
||||
return TenantUsageLimitOverrides(
|
||||
tenant_id=tenant_id,
|
||||
llm_cost_cents_trial=NO_LIMIT,
|
||||
llm_cost_cents_paid=NO_LIMIT,
|
||||
chunks_indexed_trial=NO_LIMIT,
|
||||
chunks_indexed_paid=NO_LIMIT,
|
||||
api_calls_trial=NO_LIMIT,
|
||||
api_calls_paid=NO_LIMIT,
|
||||
non_streaming_calls_trial=NO_LIMIT,
|
||||
non_streaming_calls_paid=NO_LIMIT,
|
||||
)
|
||||
return overrides
|
||||
|
||||
|
||||
def get_tenant_usage_limit_overrides(
|
||||
@@ -112,22 +86,7 @@ def get_tenant_usage_limit_overrides(
|
||||
Returns:
|
||||
TenantUsageLimitOverrides if the tenant has overrides, None otherwise.
|
||||
"""
|
||||
|
||||
if DEV_MODE: # in dev mode, we return unlimited limits for all tenants
|
||||
return unlimited(tenant_id)
|
||||
|
||||
global _tenant_usage_limit_overrides
|
||||
time_since = time.time() - _last_fetch_time
|
||||
if (
|
||||
_tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL
|
||||
) or (time_since > _FETCH_INTERVAL):
|
||||
logger.debug(
|
||||
f"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}"
|
||||
)
|
||||
|
||||
load_usage_limit_overrides()
|
||||
|
||||
# If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.
|
||||
if _tenant_usage_limit_overrides is None or DEV_MODE:
|
||||
return unlimited(tenant_id)
|
||||
if _tenant_usage_limit_overrides is None:
|
||||
_tenant_usage_limit_overrides = load_usage_limit_overrides()
|
||||
return _tenant_usage_limit_overrides.get(tenant_id)
|
||||
|
||||
@@ -76,26 +76,6 @@ def fetch_billing_information(
|
||||
return BillingInformation(**response_data)
|
||||
|
||||
|
||||
def fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:
|
||||
"""
|
||||
Fetch a Stripe customer portal session URL from the control plane.
|
||||
NOTE: This is currently only used for multi-tenant (cloud) deployments.
|
||||
Self-hosted proxy endpoints will be added in a future phase.
|
||||
"""
|
||||
token = generate_data_plane_token()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
url = f"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session"
|
||||
payload = {"tenant_id": tenant_id}
|
||||
if return_url:
|
||||
payload["return_url"] = return_url
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()["url"]
|
||||
|
||||
|
||||
def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
|
||||
"""
|
||||
Update the number of seats for a tenant's subscription.
|
||||
|
||||
@@ -1,41 +1,34 @@
|
||||
import asyncio
|
||||
|
||||
import httpx
|
||||
import stripe
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
|
||||
from ee.onyx.auth.users import current_admin_user
|
||||
from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
|
||||
from ee.onyx.server.tenants.access import control_plane_dep
|
||||
from ee.onyx.server.tenants.billing import fetch_billing_information
|
||||
from ee.onyx.server.tenants.billing import fetch_customer_portal_session
|
||||
from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
|
||||
from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
|
||||
from ee.onyx.server.tenants.models import BillingInformation
|
||||
from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
|
||||
from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
|
||||
from ee.onyx.server.tenants.models import ProductGatingRequest
|
||||
from ee.onyx.server.tenants.models import ProductGatingResponse
|
||||
from ee.onyx.server.tenants.models import StripePublishableKeyResponse
|
||||
from ee.onyx.server.tenants.models import SubscriptionSessionResponse
|
||||
from ee.onyx.server.tenants.models import SubscriptionStatusResponse
|
||||
from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
|
||||
from ee.onyx.server.tenants.product_gating import store_product_gating
|
||||
from onyx.auth.users import User
|
||||
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
|
||||
from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
|
||||
from onyx.configs.app_configs import WEB_DOMAIN
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
stripe.api_key = STRIPE_SECRET_KEY
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(prefix="/tenants")
|
||||
|
||||
# Cache for Stripe publishable key to avoid hitting S3 on every request
|
||||
_stripe_publishable_key_cache: str | None = None
|
||||
_stripe_key_lock = asyncio.Lock()
|
||||
|
||||
|
||||
@router.post("/product-gating")
|
||||
def gate_product(
|
||||
@@ -90,17 +83,21 @@ async def billing_information(
|
||||
async def create_customer_portal_session(
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> dict:
|
||||
"""
|
||||
Create a Stripe customer portal session via the control plane.
|
||||
NOTE: This is currently only used for multi-tenant (cloud) deployments.
|
||||
Self-hosted proxy endpoints will be added in a future phase.
|
||||
"""
|
||||
tenant_id = get_current_tenant_id()
|
||||
return_url = f"{WEB_DOMAIN}/admin/billing"
|
||||
|
||||
try:
|
||||
portal_url = fetch_customer_portal_session(tenant_id, return_url)
|
||||
return {"url": portal_url}
|
||||
stripe_info = fetch_tenant_stripe_information(tenant_id)
|
||||
stripe_customer_id = stripe_info.get("stripe_customer_id")
|
||||
if not stripe_customer_id:
|
||||
raise HTTPException(status_code=400, detail="Stripe customer ID not found")
|
||||
logger.info(stripe_customer_id)
|
||||
|
||||
portal_session = stripe.billing_portal.Session.create(
|
||||
customer=stripe_customer_id,
|
||||
return_url=f"{WEB_DOMAIN}/admin/billing",
|
||||
)
|
||||
logger.info(portal_session)
|
||||
return {"url": portal_session.url}
|
||||
except Exception as e:
|
||||
logger.exception("Failed to create customer portal session")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
@@ -123,67 +120,3 @@ async def create_subscription_session(
|
||||
except Exception as e:
|
||||
logger.exception("Failed to create subscription session")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/stripe-publishable-key")
|
||||
async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
|
||||
"""
|
||||
Fetch the Stripe publishable key.
|
||||
Priority: env var override (for testing) > S3 bucket (production).
|
||||
This endpoint is public (no auth required) since publishable keys are safe to expose.
|
||||
The key is cached in memory to avoid hitting S3 on every request.
|
||||
"""
|
||||
global _stripe_publishable_key_cache
|
||||
|
||||
# Fast path: return cached value without lock
|
||||
if _stripe_publishable_key_cache:
|
||||
return StripePublishableKeyResponse(
|
||||
publishable_key=_stripe_publishable_key_cache
|
||||
)
|
||||
|
||||
# Use lock to prevent concurrent S3 requests
|
||||
async with _stripe_key_lock:
|
||||
# Double-check after acquiring lock (another request may have populated cache)
|
||||
if _stripe_publishable_key_cache:
|
||||
return StripePublishableKeyResponse(
|
||||
publishable_key=_stripe_publishable_key_cache
|
||||
)
|
||||
|
||||
# Check for env var override first (for local testing with pk_test_* keys)
|
||||
if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
|
||||
key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
|
||||
if not key.startswith("pk_"):
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Invalid Stripe publishable key format",
|
||||
)
|
||||
_stripe_publishable_key_cache = key
|
||||
return StripePublishableKeyResponse(publishable_key=key)
|
||||
|
||||
# Fall back to S3 bucket
|
||||
if not STRIPE_PUBLISHABLE_KEY_URL:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Stripe publishable key is not configured",
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
|
||||
response.raise_for_status()
|
||||
key = response.text.strip()
|
||||
|
||||
# Validate key format
|
||||
if not key.startswith("pk_"):
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Invalid Stripe publishable key format",
|
||||
)
|
||||
|
||||
_stripe_publishable_key_cache = key
|
||||
return StripePublishableKeyResponse(publishable_key=key)
|
||||
except httpx.HTTPError:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to fetch Stripe publishable key",
|
||||
)
|
||||
|
||||
@@ -105,7 +105,3 @@ class PendingUserSnapshot(BaseModel):
|
||||
|
||||
class ApproveUserRequest(BaseModel):
|
||||
email: str
|
||||
|
||||
|
||||
class StripePublishableKeyResponse(BaseModel):
|
||||
publishable_key: str
|
||||
|
||||
@@ -65,9 +65,3 @@ def get_gated_tenants() -> set[str]:
|
||||
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
|
||||
gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
|
||||
return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
|
||||
|
||||
|
||||
def is_tenant_gated(tenant_id: str) -> bool:
|
||||
"""Fast O(1) check if tenant is in gated set (multi-tenant only)."""
|
||||
redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
|
||||
return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))
|
||||
|
||||
@@ -9,7 +9,6 @@ from ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user
|
||||
from ee.onyx.db.token_limit import insert_user_group_token_rate_limit
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import current_curator_or_admin_user
|
||||
from onyx.configs.constants import PUBLIC_API_TAGS
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import User
|
||||
from onyx.db.token_limit import fetch_all_user_token_rate_limits
|
||||
@@ -17,6 +16,7 @@ from onyx.db.token_limit import insert_user_token_rate_limit
|
||||
from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
|
||||
from onyx.server.token_rate_limits.models import TokenRateLimitArgs
|
||||
from onyx.server.token_rate_limits.models import TokenRateLimitDisplay
|
||||
from onyx.server.utils import PUBLIC_API_TAGS
|
||||
|
||||
router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)
|
||||
|
||||
|
||||
@@ -18,10 +18,10 @@ from ee.onyx.server.user_group.models import UserGroupCreate
|
||||
from ee.onyx.server.user_group.models import UserGroupUpdate
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.auth.users import current_curator_or_admin_user
|
||||
from onyx.configs.constants import PUBLIC_API_TAGS
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import UserRole
|
||||
from onyx.server.utils import PUBLIC_API_TAGS
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -11,7 +11,6 @@ from typing import Any
|
||||
from typing import cast
|
||||
from typing import Dict
|
||||
from typing import List
|
||||
from typing import Literal
|
||||
from typing import Optional
|
||||
from typing import Protocol
|
||||
from typing import Tuple
|
||||
@@ -1457,9 +1456,6 @@ def get_default_admin_user_emails_() -> list[str]:
|
||||
|
||||
|
||||
STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
|
||||
STATE_TOKEN_LIFETIME_SECONDS = 3600
|
||||
CSRF_TOKEN_KEY = "csrftoken"
|
||||
CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"
|
||||
|
||||
|
||||
class OAuth2AuthorizeResponse(BaseModel):
|
||||
@@ -1467,19 +1463,13 @@ class OAuth2AuthorizeResponse(BaseModel):
|
||||
|
||||
|
||||
def generate_state_token(
|
||||
data: Dict[str, str],
|
||||
secret: SecretType,
|
||||
lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
|
||||
data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
|
||||
) -> str:
|
||||
data["aud"] = STATE_TOKEN_AUDIENCE
|
||||
|
||||
return generate_jwt(data, secret, lifetime_seconds)
|
||||
|
||||
|
||||
def generate_csrf_token() -> str:
|
||||
return secrets.token_urlsafe(32)
|
||||
|
||||
|
||||
# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
|
||||
def create_onyx_oauth_router(
|
||||
oauth_client: BaseOAuth2,
|
||||
@@ -1508,13 +1498,6 @@ def get_oauth_router(
|
||||
redirect_url: Optional[str] = None,
|
||||
associate_by_email: bool = False,
|
||||
is_verified_by_default: bool = False,
|
||||
*,
|
||||
csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
|
||||
csrf_token_cookie_path: str = "/",
|
||||
csrf_token_cookie_domain: Optional[str] = None,
|
||||
csrf_token_cookie_secure: Optional[bool] = None,
|
||||
csrf_token_cookie_httponly: bool = True,
|
||||
csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
|
||||
) -> APIRouter:
|
||||
"""Generate a router with the OAuth routes."""
|
||||
router = APIRouter()
|
||||
@@ -1531,9 +1514,6 @@ def get_oauth_router(
|
||||
route_name=callback_route_name,
|
||||
)
|
||||
|
||||
if csrf_token_cookie_secure is None:
|
||||
csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
|
||||
|
||||
@router.get(
|
||||
"/authorize",
|
||||
name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
|
||||
@@ -1541,10 +1521,8 @@ def get_oauth_router(
|
||||
)
|
||||
async def authorize(
|
||||
request: Request,
|
||||
response: Response,
|
||||
redirect: bool = Query(False),
|
||||
scopes: List[str] = Query(None),
|
||||
) -> Response | OAuth2AuthorizeResponse:
|
||||
) -> OAuth2AuthorizeResponse:
|
||||
referral_source = request.cookies.get("referral_source", None)
|
||||
|
||||
if redirect_url is not None:
|
||||
@@ -1554,11 +1532,9 @@ def get_oauth_router(
|
||||
|
||||
next_url = request.query_params.get("next", "/")
|
||||
|
||||
csrf_token = generate_csrf_token()
|
||||
state_data: Dict[str, str] = {
|
||||
"next_url": next_url,
|
||||
"referral_source": referral_source or "default_referral",
|
||||
CSRF_TOKEN_KEY: csrf_token,
|
||||
}
|
||||
state = generate_state_token(state_data, state_secret)
|
||||
|
||||
@@ -1575,31 +1551,6 @@ def get_oauth_router(
|
||||
authorization_url, {"access_type": "offline", "prompt": "consent"}
|
||||
)
|
||||
|
||||
if redirect:
|
||||
redirect_response = RedirectResponse(authorization_url, status_code=302)
|
||||
redirect_response.set_cookie(
|
||||
key=csrf_token_cookie_name,
|
||||
value=csrf_token,
|
||||
max_age=STATE_TOKEN_LIFETIME_SECONDS,
|
||||
path=csrf_token_cookie_path,
|
||||
domain=csrf_token_cookie_domain,
|
||||
secure=csrf_token_cookie_secure,
|
||||
httponly=csrf_token_cookie_httponly,
|
||||
samesite=csrf_token_cookie_samesite,
|
||||
)
|
||||
return redirect_response
|
||||
|
||||
response.set_cookie(
|
||||
key=csrf_token_cookie_name,
|
||||
value=csrf_token,
|
||||
max_age=STATE_TOKEN_LIFETIME_SECONDS,
|
||||
path=csrf_token_cookie_path,
|
||||
domain=csrf_token_cookie_domain,
|
||||
secure=csrf_token_cookie_secure,
|
||||
httponly=csrf_token_cookie_httponly,
|
||||
samesite=csrf_token_cookie_samesite,
|
||||
)
|
||||
|
||||
return OAuth2AuthorizeResponse(authorization_url=authorization_url)
|
||||
|
||||
@log_function_time(print_only=True)
|
||||
@@ -1649,33 +1600,7 @@ def get_oauth_router(
|
||||
try:
|
||||
state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
|
||||
except jwt.DecodeError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(
|
||||
ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
|
||||
),
|
||||
)
|
||||
except jwt.ExpiredSignatureError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(
|
||||
ErrorCode,
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
"ACCESS_TOKEN_ALREADY_EXPIRED",
|
||||
),
|
||||
)
|
||||
|
||||
cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
|
||||
state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
|
||||
if (
|
||||
not cookie_csrf_token
|
||||
or not state_csrf_token
|
||||
or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
|
||||
)
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
next_url = state_data.get("next_url", "/")
|
||||
referral_source = state_data.get("referral_source", None)
|
||||
|
||||
@@ -26,13 +26,10 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
|
||||
from onyx.background.celery.celery_utils import make_probe_path
|
||||
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
|
||||
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
|
||||
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
|
||||
from onyx.document_index.opensearch.client import (
|
||||
wait_for_opensearch_with_timeout,
|
||||
)
|
||||
from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
|
||||
from onyx.httpx.httpx_pool import HttpxPool
|
||||
from onyx.redis.redis_connector import RedisConnector
|
||||
@@ -519,17 +516,14 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
|
||||
"""Waits for Vespa to become ready subject to a timeout.
|
||||
Raises WorkerShutdown if the timeout is reached."""
|
||||
|
||||
if ENABLE_OPENSEARCH_FOR_ONYX:
|
||||
return
|
||||
|
||||
if not wait_for_vespa_with_timeout():
|
||||
msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
|
||||
msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
if not wait_for_opensearch_with_timeout():
|
||||
msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
|
||||
# File for validating worker liveness
|
||||
class LivenessProbe(bootsteps.StartStopStep):
|
||||
|
||||
@@ -87,7 +87,7 @@ from onyx.db.models import SearchSettings
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.db.search_settings import get_secondary_search_settings
|
||||
from onyx.db.swap_index import check_and_perform_index_swap
|
||||
from onyx.document_index.factory import get_all_document_indices
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.file_store.document_batch_storage import DocumentBatchStorage
|
||||
from onyx.file_store.document_batch_storage import get_document_batch_storage
|
||||
from onyx.httpx.httpx_pool import HttpxPool
|
||||
@@ -1436,7 +1436,7 @@ def _docprocessing_task(
|
||||
callback=callback,
|
||||
)
|
||||
|
||||
document_indices = get_all_document_indices(
|
||||
document_index = get_default_document_index(
|
||||
index_attempt.search_settings,
|
||||
None,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
@@ -1473,7 +1473,7 @@ def _docprocessing_task(
|
||||
# real work happens here!
|
||||
index_pipeline_result = run_indexing_pipeline(
|
||||
embedder=embedding_model,
|
||||
document_indices=document_indices,
|
||||
document_index=document_index,
|
||||
ignore_time_skip=True, # Documents are already filtered during extraction
|
||||
db_session=db_session,
|
||||
tenant_id=tenant_id,
|
||||
|
||||
@@ -25,7 +25,7 @@ from onyx.db.document_set import fetch_document_sets_for_document
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.relationships import delete_document_references_from_kg
|
||||
from onyx.db.search_settings import get_active_search_settings
|
||||
from onyx.document_index.factory import get_all_document_indices
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.document_index.interfaces import VespaDocumentFields
|
||||
from onyx.httpx.httpx_pool import HttpxPool
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
@@ -97,17 +97,13 @@ def document_by_cc_pair_cleanup_task(
|
||||
action = "skip"
|
||||
|
||||
active_search_settings = get_active_search_settings(db_session)
|
||||
# This flow is for updates and deletion so we get all indices.
|
||||
document_indices = get_all_document_indices(
|
||||
doc_index = get_default_document_index(
|
||||
active_search_settings.primary,
|
||||
active_search_settings.secondary,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
)
|
||||
|
||||
retry_document_indices: list[RetryDocumentIndex] = [
|
||||
RetryDocumentIndex(document_index)
|
||||
for document_index in document_indices
|
||||
]
|
||||
retry_index = RetryDocumentIndex(doc_index)
|
||||
|
||||
count = get_document_connector_count(db_session, document_id)
|
||||
if count == 1:
|
||||
@@ -117,12 +113,11 @@ def document_by_cc_pair_cleanup_task(
|
||||
|
||||
chunk_count = fetch_chunk_count_for_document(document_id, db_session)
|
||||
|
||||
for retry_document_index in retry_document_indices:
|
||||
_ = retry_document_index.delete_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=chunk_count,
|
||||
)
|
||||
_ = retry_index.delete_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=chunk_count,
|
||||
)
|
||||
|
||||
delete_document_references_from_kg(
|
||||
db_session=db_session,
|
||||
@@ -160,18 +155,14 @@ def document_by_cc_pair_cleanup_task(
|
||||
hidden=doc.hidden,
|
||||
)
|
||||
|
||||
for retry_document_index in retry_document_indices:
|
||||
# TODO(andrei): Previously there was a comment here saying
|
||||
# it was ok if a doc did not exist in the document index. I
|
||||
# don't agree with that claim, so keep an eye on this task
|
||||
# to see if this raises.
|
||||
retry_document_index.update_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=doc.chunk_count,
|
||||
fields=fields,
|
||||
user_fields=None,
|
||||
)
|
||||
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
|
||||
retry_index.update_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=doc.chunk_count,
|
||||
fields=fields,
|
||||
user_fields=None,
|
||||
)
|
||||
|
||||
# there are still other cc_pair references to the doc, so just resync to Vespa
|
||||
delete_document_by_connector_credential_pair__no_commit(
|
||||
|
||||
@@ -12,6 +12,7 @@ from retry import retry
|
||||
from sqlalchemy import select
|
||||
|
||||
from onyx.background.celery.apps.app_base import task_logger
|
||||
from onyx.background.celery.celery_redis import celery_get_queue_length
|
||||
from onyx.background.celery.celery_utils import httpx_init_vespa_pool
|
||||
from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
|
||||
from onyx.configs.app_configs import MANAGED_VESPA
|
||||
@@ -19,12 +20,14 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
|
||||
from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
|
||||
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
|
||||
from onyx.connectors.file.connector import LocalFileConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
@@ -32,7 +35,7 @@ from onyx.db.enums import UserFileStatus
|
||||
from onyx.db.models import UserFile
|
||||
from onyx.db.search_settings import get_active_search_settings
|
||||
from onyx.db.search_settings import get_active_search_settings_list
|
||||
from onyx.document_index.factory import get_all_document_indices
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.document_index.interfaces import VespaDocumentUserFields
|
||||
from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
@@ -53,6 +56,17 @@ def _user_file_lock_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def _user_file_queued_key(user_file_id: str | UUID) -> str:
|
||||
"""Key that exists while a process_single_user_file task is sitting in the queue.
|
||||
|
||||
The beat generator sets this with a TTL equal to CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
before enqueuing and the worker deletes it as its first action. This prevents
|
||||
the beat from adding duplicate tasks for files that already have a live task
|
||||
in flight.
|
||||
"""
|
||||
return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
|
||||
|
||||
@@ -116,7 +130,24 @@ def _get_document_chunk_count(
|
||||
def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
|
||||
"""Scan for user files with PROCESSING status and enqueue per-file tasks.
|
||||
|
||||
Uses direct Redis locks to avoid overlapping runs.
|
||||
Three mechanisms prevent queue runaway:
|
||||
|
||||
1. **Queue depth backpressure** – if the broker queue already has more than
|
||||
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH items we skip this beat cycle
|
||||
entirely. Workers are clearly behind; adding more tasks would only make
|
||||
the backlog worse.
|
||||
|
||||
2. **Per-file queued guard** – before enqueuing a task we set a short-lived
|
||||
Redis key (TTL = CELERY_USER_FILE_PROCESSING_TASK_EXPIRES). If that key
|
||||
already exists the file already has a live task in the queue, so we skip
|
||||
it. The worker deletes the key the moment it picks up the task so the
|
||||
next beat cycle can re-enqueue if the file is still PROCESSING.
|
||||
|
||||
3. **Task expiry** – every enqueued task carries an `expires` value equal to
|
||||
CELERY_USER_FILE_PROCESSING_TASK_EXPIRES. If a task is still sitting in
|
||||
the queue after that deadline, Celery discards it without touching the DB.
|
||||
This is a belt-and-suspenders defence: even if the guard key is lost (e.g.
|
||||
Redis restart), stale tasks evict themselves rather than piling up forever.
|
||||
"""
|
||||
task_logger.info("check_user_file_processing - Starting")
|
||||
|
||||
@@ -131,7 +162,21 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
|
||||
return None
|
||||
|
||||
enqueued = 0
|
||||
skipped_guard = 0
|
||||
try:
|
||||
# --- Protection 1: queue depth backpressure ---
|
||||
r_celery = self.app.broker_connection().channel().client # type: ignore
|
||||
queue_len = celery_get_queue_length(
|
||||
OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
|
||||
)
|
||||
if queue_len > USER_FILE_PROCESSING_MAX_QUEUE_DEPTH:
|
||||
task_logger.warning(
|
||||
f"check_user_file_processing - Queue depth {queue_len} exceeds "
|
||||
f"{USER_FILE_PROCESSING_MAX_QUEUE_DEPTH}, skipping enqueue for "
|
||||
f"tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
user_file_ids = (
|
||||
db_session.execute(
|
||||
@@ -144,12 +189,35 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
|
||||
)
|
||||
|
||||
for user_file_id in user_file_ids:
|
||||
self.app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
|
||||
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
# --- Protection 2: per-file queued guard ---
|
||||
queued_key = _user_file_queued_key(user_file_id)
|
||||
guard_set = redis_client.set(
|
||||
queued_key,
|
||||
1,
|
||||
ex=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
|
||||
nx=True,
|
||||
)
|
||||
if not guard_set:
|
||||
skipped_guard += 1
|
||||
continue
|
||||
|
||||
# --- Protection 3: task expiry ---
|
||||
# If task submission fails, clear the guard immediately so the
|
||||
# next beat cycle can retry enqueuing this file.
|
||||
try:
|
||||
self.app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
|
||||
kwargs={
|
||||
"user_file_id": str(user_file_id),
|
||||
"tenant_id": tenant_id,
|
||||
},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
expires=CELERY_USER_FILE_PROCESSING_TASK_EXPIRES,
|
||||
)
|
||||
except Exception:
|
||||
redis_client.delete(queued_key)
|
||||
raise
|
||||
enqueued += 1
|
||||
|
||||
finally:
|
||||
@@ -157,7 +225,8 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
|
||||
f"check_user_file_processing - Enqueued {enqueued} skipped_guard={skipped_guard} "
|
||||
f"tasks for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -172,6 +241,12 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
|
||||
start = time.monotonic()
|
||||
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
|
||||
# Clear the "queued" guard set by the beat generator so that the next beat
|
||||
# cycle can re-enqueue this file if it is still in PROCESSING state after
|
||||
# this task completes or fails.
|
||||
redis_client.delete(_user_file_queued_key(user_file_id))
|
||||
|
||||
file_lock: RedisLock = redis_client.lock(
|
||||
_user_file_lock_key(user_file_id),
|
||||
timeout=CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT,
|
||||
@@ -244,8 +319,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
|
||||
search_settings=current_search_settings,
|
||||
)
|
||||
|
||||
# This flow is for indexing so we get all indices.
|
||||
document_indices = get_all_document_indices(
|
||||
document_index = get_default_document_index(
|
||||
current_search_settings,
|
||||
None,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
@@ -259,7 +333,7 @@ def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -
|
||||
# real work happens here!
|
||||
index_pipeline_result = run_indexing_pipeline(
|
||||
embedder=embedding_model,
|
||||
document_indices=document_indices,
|
||||
document_index=document_index,
|
||||
ignore_time_skip=True,
|
||||
db_session=db_session,
|
||||
tenant_id=tenant_id,
|
||||
@@ -413,16 +487,12 @@ def process_single_user_file_delete(
|
||||
httpx_init_vespa_pool(20)
|
||||
|
||||
active_search_settings = get_active_search_settings(db_session)
|
||||
# This flow is for deletion so we get all indices.
|
||||
document_indices = get_all_document_indices(
|
||||
document_index = get_default_document_index(
|
||||
search_settings=active_search_settings.primary,
|
||||
secondary_search_settings=active_search_settings.secondary,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
)
|
||||
retry_document_indices: list[RetryDocumentIndex] = [
|
||||
RetryDocumentIndex(document_index)
|
||||
for document_index in document_indices
|
||||
]
|
||||
retry_index = RetryDocumentIndex(document_index)
|
||||
index_name = active_search_settings.primary.index_name
|
||||
selection = f"{index_name}.document_id=='{user_file_id}'"
|
||||
|
||||
@@ -443,12 +513,11 @@ def process_single_user_file_delete(
|
||||
else:
|
||||
chunk_count = user_file.chunk_count
|
||||
|
||||
for retry_document_index in retry_document_indices:
|
||||
retry_document_index.delete_single(
|
||||
doc_id=user_file_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=chunk_count,
|
||||
)
|
||||
retry_index.delete_single(
|
||||
doc_id=user_file_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=chunk_count,
|
||||
)
|
||||
|
||||
# 2) Delete the user-uploaded file content from filestore (blob + metadata)
|
||||
file_store = get_default_file_store()
|
||||
@@ -570,16 +639,12 @@ def process_single_user_file_project_sync(
|
||||
httpx_init_vespa_pool(20)
|
||||
|
||||
active_search_settings = get_active_search_settings(db_session)
|
||||
# This flow is for updates so we get all indices.
|
||||
document_indices = get_all_document_indices(
|
||||
doc_index = get_default_document_index(
|
||||
search_settings=active_search_settings.primary,
|
||||
secondary_search_settings=active_search_settings.secondary,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
)
|
||||
retry_document_indices: list[RetryDocumentIndex] = [
|
||||
RetryDocumentIndex(document_index)
|
||||
for document_index in document_indices
|
||||
]
|
||||
retry_index = RetryDocumentIndex(doc_index)
|
||||
|
||||
user_file = db_session.get(UserFile, _as_uuid(user_file_id))
|
||||
if not user_file:
|
||||
@@ -589,14 +654,13 @@ def process_single_user_file_project_sync(
|
||||
return None
|
||||
|
||||
project_ids = [project.id for project in user_file.projects]
|
||||
for retry_document_index in retry_document_indices:
|
||||
retry_document_index.update_single(
|
||||
doc_id=str(user_file.id),
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=user_file.chunk_count,
|
||||
fields=None,
|
||||
user_fields=VespaDocumentUserFields(user_projects=project_ids),
|
||||
)
|
||||
retry_index.update_single(
|
||||
doc_id=str(user_file.id),
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=user_file.chunk_count,
|
||||
fields=None,
|
||||
user_fields=VespaDocumentUserFields(user_projects=project_ids),
|
||||
)
|
||||
|
||||
task_logger.info(
|
||||
f"process_single_user_file_project_sync - User file id={user_file_id}"
|
||||
|
||||
@@ -49,7 +49,7 @@ from onyx.db.search_settings import get_active_search_settings
|
||||
from onyx.db.sync_record import cleanup_sync_records
|
||||
from onyx.db.sync_record import insert_sync_record
|
||||
from onyx.db.sync_record import update_sync_record_status
|
||||
from onyx.document_index.factory import get_all_document_indices
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.document_index.interfaces import VespaDocumentFields
|
||||
from onyx.httpx.httpx_pool import HttpxPool
|
||||
from onyx.redis.redis_document_set import RedisDocumentSet
|
||||
@@ -70,8 +70,6 @@ logger = setup_logger()
|
||||
|
||||
# celery auto associates tasks created inside another task,
|
||||
# which bloats the result metadata considerably. trail=False prevents this.
|
||||
# TODO(andrei): Rename all these kinds of functions from *vespa* to a more
|
||||
# generic *document_index*.
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
|
||||
ignore_result=True,
|
||||
@@ -467,17 +465,13 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
|
||||
try:
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
active_search_settings = get_active_search_settings(db_session)
|
||||
# This flow is for updates so we get all indices.
|
||||
document_indices = get_all_document_indices(
|
||||
doc_index = get_default_document_index(
|
||||
search_settings=active_search_settings.primary,
|
||||
secondary_search_settings=active_search_settings.secondary,
|
||||
httpx_client=HttpxPool.get("vespa"),
|
||||
)
|
||||
|
||||
retry_document_indices: list[RetryDocumentIndex] = [
|
||||
RetryDocumentIndex(document_index)
|
||||
for document_index in document_indices
|
||||
]
|
||||
retry_index = RetryDocumentIndex(doc_index)
|
||||
|
||||
doc = get_document(document_id, db_session)
|
||||
if not doc:
|
||||
@@ -506,18 +500,14 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->
|
||||
# aggregated_boost_factor=doc.aggregated_boost_factor,
|
||||
)
|
||||
|
||||
for retry_document_index in retry_document_indices:
|
||||
# TODO(andrei): Previously there was a comment here saying
|
||||
# it was ok if a doc did not exist in the document index. I
|
||||
# don't agree with that claim, so keep an eye on this task
|
||||
# to see if this raises.
|
||||
retry_document_index.update_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=doc.chunk_count,
|
||||
fields=fields,
|
||||
user_fields=None,
|
||||
)
|
||||
# update Vespa. OK if doc doesn't exist. Raises exception otherwise.
|
||||
retry_index.update_single(
|
||||
document_id,
|
||||
tenant_id=tenant_id,
|
||||
chunk_count=doc.chunk_count,
|
||||
fields=fields,
|
||||
user_fields=None,
|
||||
)
|
||||
|
||||
# update db last. Worst case = we crash right before this and
|
||||
# the sync might repeat again later
|
||||
|
||||
@@ -7,7 +7,6 @@ from typing import Any
|
||||
|
||||
from onyx.chat.citation_processor import CitationMapping
|
||||
from onyx.chat.emitter import Emitter
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
from onyx.server.query_and_chat.streaming_models import OverallStop
|
||||
from onyx.server.query_and_chat.streaming_models import Packet
|
||||
@@ -16,11 +15,6 @@ from onyx.tools.models import ToolCallInfo
|
||||
from onyx.utils.threadpool_concurrency import run_in_background
|
||||
from onyx.utils.threadpool_concurrency import wait_on_background
|
||||
|
||||
# Type alias for search doc deduplication key
|
||||
# Simple key: just document_id (str)
|
||||
# Full key: (document_id, chunk_ind, match_highlights)
|
||||
SearchDocKey = str | tuple[str, int, tuple[str, ...]]
|
||||
|
||||
|
||||
class ChatStateContainer:
|
||||
"""Container for accumulating state during LLM loop execution.
|
||||
@@ -46,10 +40,6 @@ class ChatStateContainer:
|
||||
# True if this turn is a clarification question (deep research flow)
|
||||
self.is_clarification: bool = False
|
||||
# Note: LLM cost tracking is now handled in multi_llm.py
|
||||
# Search doc collection - maps dedup key to SearchDoc for all docs from tool calls
|
||||
self._all_search_docs: dict[SearchDocKey, SearchDoc] = {}
|
||||
# Track which citation numbers were actually emitted during streaming
|
||||
self._emitted_citations: set[int] = set()
|
||||
|
||||
def add_tool_call(self, tool_call: ToolCallInfo) -> None:
|
||||
"""Add a tool call to the accumulated state."""
|
||||
@@ -101,54 +91,6 @@ class ChatStateContainer:
|
||||
with self._lock:
|
||||
return self.is_clarification
|
||||
|
||||
@staticmethod
|
||||
def create_search_doc_key(
|
||||
search_doc: SearchDoc, use_simple_key: bool = True
|
||||
) -> SearchDocKey:
|
||||
"""Create a unique key for a SearchDoc for deduplication.
|
||||
|
||||
Args:
|
||||
search_doc: The SearchDoc to create a key for
|
||||
use_simple_key: If True (default), use only document_id for deduplication.
|
||||
If False, include chunk_ind and match_highlights so that the same
|
||||
document/chunk with different highlights are stored separately.
|
||||
"""
|
||||
if use_simple_key:
|
||||
return search_doc.document_id
|
||||
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
|
||||
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
|
||||
|
||||
def add_search_docs(
|
||||
self, search_docs: list[SearchDoc], use_simple_key: bool = True
|
||||
) -> None:
|
||||
"""Add search docs to the accumulated collection with deduplication.
|
||||
|
||||
Args:
|
||||
search_docs: List of SearchDoc objects to add
|
||||
use_simple_key: If True (default), deduplicate by document_id only.
|
||||
If False, deduplicate by document_id + chunk_ind + match_highlights.
|
||||
"""
|
||||
with self._lock:
|
||||
for doc in search_docs:
|
||||
key = self.create_search_doc_key(doc, use_simple_key)
|
||||
if key not in self._all_search_docs:
|
||||
self._all_search_docs[key] = doc
|
||||
|
||||
def get_all_search_docs(self) -> dict[SearchDocKey, SearchDoc]:
|
||||
"""Thread-safe getter for all accumulated search docs (returns a copy)."""
|
||||
with self._lock:
|
||||
return self._all_search_docs.copy()
|
||||
|
||||
def add_emitted_citation(self, citation_num: int) -> None:
|
||||
"""Add a citation number that was actually emitted during streaming."""
|
||||
with self._lock:
|
||||
self._emitted_citations.add(citation_num)
|
||||
|
||||
def get_emitted_citations(self) -> set[int]:
|
||||
"""Thread-safe getter for emitted citations (returns a copy)."""
|
||||
with self._lock:
|
||||
return self._emitted_citations.copy()
|
||||
|
||||
|
||||
def run_chat_loop_with_state_containers(
|
||||
func: Callable[..., None],
|
||||
|
||||
@@ -18,10 +18,12 @@ from onyx.background.celery.tasks.kg_processing.kg_indexing import (
|
||||
from onyx.chat.models import ChatLoadedFile
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
from onyx.chat.models import PersonaOverrideConfig
|
||||
from onyx.chat.models import ThreadMessage
|
||||
from onyx.configs.constants import DEFAULT_PERSONA_ID
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.context.search.models import RerankingDetails
|
||||
from onyx.context.search.models import RetrievalDetails
|
||||
from onyx.db.chat import create_chat_session
|
||||
from onyx.db.chat import get_chat_messages_by_session
|
||||
from onyx.db.chat import get_or_create_root_message
|
||||
@@ -46,10 +48,14 @@ from onyx.kg.models import KGException
|
||||
from onyx.kg.setup.kg_default_entity_definitions import (
|
||||
populate_missing_default_entity_types__commit,
|
||||
)
|
||||
from onyx.llm.override_models import LLMOverride
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.prompts.chat_prompts import ADDITIONAL_CONTEXT_PROMPT
|
||||
from onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE
|
||||
from onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT
|
||||
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
|
||||
from onyx.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from onyx.server.query_and_chat.models import MessageOrigin
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tools.tool_implementations.custom.custom_tool import (
|
||||
@@ -98,6 +104,91 @@ def create_chat_session_from_request(
|
||||
)
|
||||
|
||||
|
||||
def prepare_chat_message_request(
|
||||
message_text: str,
|
||||
user: User | None,
|
||||
persona_id: int | None,
|
||||
# Does the question need to have a persona override
|
||||
persona_override_config: PersonaOverrideConfig | None,
|
||||
message_ts_to_respond_to: str | None,
|
||||
retrieval_details: RetrievalDetails | None,
|
||||
rerank_settings: RerankingDetails | None,
|
||||
db_session: Session,
|
||||
skip_gen_ai_answer_generation: bool = False,
|
||||
llm_override: LLMOverride | None = None,
|
||||
allowed_tool_ids: list[int] | None = None,
|
||||
forced_tool_ids: list[int] | None = None,
|
||||
origin: MessageOrigin | None = None,
|
||||
) -> CreateChatMessageRequest:
|
||||
# Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
|
||||
new_chat_session = create_chat_session(
|
||||
db_session=db_session,
|
||||
description=None,
|
||||
user_id=user.id if user else None,
|
||||
# If using an override, this id will be ignored later on
|
||||
persona_id=persona_id or DEFAULT_PERSONA_ID,
|
||||
onyxbot_flow=True,
|
||||
slack_thread_id=message_ts_to_respond_to,
|
||||
)
|
||||
|
||||
return CreateChatMessageRequest(
|
||||
chat_session_id=new_chat_session.id,
|
||||
parent_message_id=None, # It's a standalone chat session each time
|
||||
message=message_text,
|
||||
file_descriptors=[], # Currently SlackBot/answer api do not support files in the context
|
||||
# Can always override the persona for the single query, if it's a normal persona
|
||||
# then it will be treated the same
|
||||
persona_override_config=persona_override_config,
|
||||
search_doc_ids=None,
|
||||
retrieval_options=retrieval_details,
|
||||
rerank_settings=rerank_settings,
|
||||
skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
|
||||
llm_override=llm_override,
|
||||
allowed_tool_ids=allowed_tool_ids,
|
||||
forced_tool_ids=forced_tool_ids,
|
||||
origin=origin or MessageOrigin.UNKNOWN,
|
||||
)
|
||||
|
||||
|
||||
def combine_message_thread(
|
||||
messages: list[ThreadMessage],
|
||||
max_tokens: int | None,
|
||||
llm_tokenizer: BaseTokenizer,
|
||||
) -> str:
|
||||
"""Used to create a single combined message context from threads"""
|
||||
if not messages:
|
||||
return ""
|
||||
|
||||
message_strs: list[str] = []
|
||||
total_token_count = 0
|
||||
|
||||
for message in reversed(messages):
|
||||
if message.role == MessageType.USER:
|
||||
role_str = message.role.value.upper()
|
||||
if message.sender:
|
||||
role_str += " " + message.sender
|
||||
else:
|
||||
# Since other messages might have the user identifying information
|
||||
# better to use Unknown for symmetry
|
||||
role_str += " Unknown"
|
||||
else:
|
||||
role_str = message.role.value.upper()
|
||||
|
||||
msg_str = f"{role_str}:\n{message.message}"
|
||||
message_token_count = len(llm_tokenizer.encode(msg_str))
|
||||
|
||||
if (
|
||||
max_tokens is not None
|
||||
and total_token_count + message_token_count > max_tokens
|
||||
):
|
||||
break
|
||||
|
||||
message_strs.insert(0, msg_str)
|
||||
total_token_count += message_token_count
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
|
||||
|
||||
def create_chat_history_chain(
|
||||
chat_session_id: UUID,
|
||||
db_session: Session,
|
||||
@@ -159,6 +250,31 @@ def create_chat_history_chain(
|
||||
return mainline_messages
|
||||
|
||||
|
||||
def combine_message_chain(
|
||||
messages: list[ChatMessage],
|
||||
token_limit: int,
|
||||
msg_limit: int | None = None,
|
||||
) -> str:
|
||||
"""Used for secondary LLM flows that require the chat history,"""
|
||||
message_strs: list[str] = []
|
||||
total_token_count = 0
|
||||
|
||||
if msg_limit is not None:
|
||||
messages = messages[-msg_limit:]
|
||||
|
||||
for message in cast(list[ChatMessage], reversed(messages)):
|
||||
message_token_count = message.token_count
|
||||
|
||||
if total_token_count + message_token_count > token_limit:
|
||||
break
|
||||
|
||||
role = message.message_type.value.upper()
|
||||
message_strs.insert(0, f"{role}:\n{message.message}")
|
||||
total_token_count += message_token_count
|
||||
|
||||
return "\n\n".join(message_strs)
|
||||
|
||||
|
||||
def reorganize_citations(
|
||||
answer: str, citations: list[CitationInfo]
|
||||
) -> tuple[str, list[CitationInfo]]:
|
||||
@@ -299,7 +415,7 @@ def create_temporary_persona(
|
||||
num_chunks=persona_config.num_chunks,
|
||||
llm_relevance_filter=persona_config.llm_relevance_filter,
|
||||
llm_filter_extraction=persona_config.llm_filter_extraction,
|
||||
recency_bias=RecencyBiasSetting.BASE_DECAY,
|
||||
recency_bias=persona_config.recency_bias,
|
||||
llm_model_provider_override=persona_config.llm_model_provider_override,
|
||||
llm_model_version_override=persona_config.llm_model_version_override,
|
||||
)
|
||||
@@ -469,71 +585,6 @@ def load_all_chat_files(
|
||||
return files
|
||||
|
||||
|
||||
def convert_chat_history_basic(
|
||||
chat_history: list[ChatMessage],
|
||||
token_counter: Callable[[str], int],
|
||||
max_individual_message_tokens: int | None = None,
|
||||
max_total_tokens: int | None = None,
|
||||
) -> list[ChatMessageSimple]:
|
||||
"""Convert ChatMessage history to ChatMessageSimple format with no tool calls or files included.
|
||||
|
||||
Args:
|
||||
chat_history: List of ChatMessage objects to convert
|
||||
token_counter: Function to count tokens in a message string
|
||||
max_individual_message_tokens: If set, messages exceeding this number of tokens are dropped.
|
||||
If None, no messages are dropped based on individual token count.
|
||||
max_total_tokens: If set, maximum number of tokens allowed for the entire history.
|
||||
If None, the history is not trimmed based on total token count.
|
||||
|
||||
Returns:
|
||||
List of ChatMessageSimple objects
|
||||
"""
|
||||
# Defensive: treat a non-positive total budget as "no history".
|
||||
if max_total_tokens is not None and max_total_tokens <= 0:
|
||||
return []
|
||||
|
||||
# Convert only the core USER/ASSISTANT messages; omit files and tool calls.
|
||||
converted: list[ChatMessageSimple] = []
|
||||
for chat_message in chat_history:
|
||||
if chat_message.message_type not in (MessageType.USER, MessageType.ASSISTANT):
|
||||
continue
|
||||
|
||||
message = chat_message.message or ""
|
||||
token_count = getattr(chat_message, "token_count", None)
|
||||
if token_count is None:
|
||||
token_count = token_counter(message)
|
||||
|
||||
# Drop any single message that would dominate the context window.
|
||||
if (
|
||||
max_individual_message_tokens is not None
|
||||
and token_count > max_individual_message_tokens
|
||||
):
|
||||
continue
|
||||
|
||||
converted.append(
|
||||
ChatMessageSimple(
|
||||
message=message,
|
||||
token_count=token_count,
|
||||
message_type=chat_message.message_type,
|
||||
image_files=None,
|
||||
)
|
||||
)
|
||||
|
||||
if max_total_tokens is None:
|
||||
return converted
|
||||
|
||||
# Enforce a max total budget by keeping a contiguous suffix of the conversation.
|
||||
trimmed_reversed: list[ChatMessageSimple] = []
|
||||
total_tokens = 0
|
||||
for msg in reversed(converted):
|
||||
if total_tokens + msg.token_count > max_total_tokens:
|
||||
break
|
||||
trimmed_reversed.append(msg)
|
||||
total_tokens += msg.token_count
|
||||
|
||||
return list(reversed(trimmed_reversed))
|
||||
|
||||
|
||||
def convert_chat_history(
|
||||
chat_history: list[ChatMessage],
|
||||
files: list[ChatLoadedFile],
|
||||
|
||||
@@ -4,15 +4,14 @@ Dynamic Citation Processor for LLM Responses
|
||||
This module provides a citation processor that can:
|
||||
- Accept citation number to SearchDoc mappings dynamically
|
||||
- Process token streams from LLMs to extract citations
|
||||
- Handle citations in three modes: REMOVE, KEEP_MARKERS, or HYPERLINK
|
||||
- Emit CitationInfo objects for detected citations (in HYPERLINK mode)
|
||||
- Track all seen citations regardless of mode
|
||||
- Optionally replace citation markers with formatted markdown links
|
||||
- Emit CitationInfo objects for detected citations (when replacing)
|
||||
- Track all seen citations regardless of replacement mode
|
||||
- Maintain a list of cited documents in order of first citation
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections.abc import Generator
|
||||
from enum import Enum
|
||||
from typing import TypeAlias
|
||||
|
||||
from onyx.configs.chat_configs import STOP_STREAM_PAT
|
||||
@@ -24,29 +23,6 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class CitationMode(Enum):
|
||||
"""Defines how citations should be handled in the output.
|
||||
|
||||
REMOVE: Citations are completely removed from output text.
|
||||
No CitationInfo objects are emitted.
|
||||
Use case: When you need to remove citations from the output if they are not shared with the user
|
||||
(e.g. in discord bot, public slack bot).
|
||||
|
||||
KEEP_MARKERS: Original citation markers like [1], [2] are preserved unchanged.
|
||||
No CitationInfo objects are emitted.
|
||||
Use case: When you need to track citations in research agent and later process
|
||||
them with collapse_citations() to renumber.
|
||||
|
||||
HYPERLINK: Citations are replaced with markdown links like [[1]](url).
|
||||
CitationInfo objects are emitted for UI tracking.
|
||||
Use case: Final reports shown to users with clickable links.
|
||||
"""
|
||||
|
||||
REMOVE = "remove"
|
||||
KEEP_MARKERS = "keep_markers"
|
||||
HYPERLINK = "hyperlink"
|
||||
|
||||
|
||||
CitationMapping: TypeAlias = dict[int, SearchDoc]
|
||||
|
||||
|
||||
@@ -72,37 +48,29 @@ class DynamicCitationProcessor:
|
||||
|
||||
This processor is designed for multi-turn conversations where the citation
|
||||
number to document mapping is provided externally. It processes streaming
|
||||
tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and handles
|
||||
them according to the configured CitationMode:
|
||||
tokens from an LLM, detects citations (e.g., [1], [2,3], [[4]]), and based
|
||||
on the `replace_citation_tokens` setting:
|
||||
|
||||
CitationMode.HYPERLINK (default):
|
||||
When replace_citation_tokens=True (default):
|
||||
1. Replaces citation markers with formatted markdown links (e.g., [[1]](url))
|
||||
2. Emits CitationInfo objects for tracking
|
||||
3. Maintains the order in which documents were first cited
|
||||
Use case: Final reports shown to users with clickable links.
|
||||
|
||||
CitationMode.KEEP_MARKERS:
|
||||
1. Preserves original citation markers like [1], [2] unchanged
|
||||
When replace_citation_tokens=False:
|
||||
1. Preserves original citation markers in the output text
|
||||
2. Does NOT emit CitationInfo objects
|
||||
3. Still tracks all seen citations via get_seen_citations()
|
||||
Use case: When citations need later processing (e.g., renumbering).
|
||||
|
||||
CitationMode.REMOVE:
|
||||
1. Removes citation markers entirely from the output text
|
||||
2. Does NOT emit CitationInfo objects
|
||||
3. Still tracks all seen citations via get_seen_citations()
|
||||
Use case: Research agent intermediate reports.
|
||||
|
||||
Features:
|
||||
- Accepts citation number → SearchDoc mapping via update_citation_mapping()
|
||||
- Configurable citation mode at initialization
|
||||
- Always tracks seen citations regardless of mode
|
||||
- Configurable citation replacement behavior at initialization
|
||||
- Always tracks seen citations regardless of replacement mode
|
||||
- Holds back tokens that might be partial citations
|
||||
- Maintains list of cited SearchDocs in order of first citation
|
||||
- Handles unicode bracket variants (【】, [])
|
||||
- Skips citation processing inside code blocks
|
||||
|
||||
Example (HYPERLINK mode - default):
|
||||
Example (with citation replacement - default):
|
||||
processor = DynamicCitationProcessor()
|
||||
|
||||
# Set up citation mapping
|
||||
@@ -119,8 +87,8 @@ class DynamicCitationProcessor:
|
||||
# Get cited documents at the end
|
||||
cited_docs = processor.get_cited_documents()
|
||||
|
||||
Example (KEEP_MARKERS mode):
|
||||
processor = DynamicCitationProcessor(citation_mode=CitationMode.KEEP_MARKERS)
|
||||
Example (without citation replacement):
|
||||
processor = DynamicCitationProcessor(replace_citation_tokens=False)
|
||||
processor.update_citation_mapping({1: search_doc1, 2: search_doc2})
|
||||
|
||||
# Process tokens from LLM
|
||||
@@ -131,42 +99,26 @@ class DynamicCitationProcessor:
|
||||
|
||||
# Get all seen citations after processing
|
||||
seen_citations = processor.get_seen_citations() # {1: search_doc1, ...}
|
||||
|
||||
Example (REMOVE mode):
|
||||
processor = DynamicCitationProcessor(citation_mode=CitationMode.REMOVE)
|
||||
processor.update_citation_mapping({1: search_doc1, 2: search_doc2})
|
||||
|
||||
# Process tokens - citations are removed but tracked
|
||||
for token in llm_stream:
|
||||
for result in processor.process_token(token):
|
||||
print(result) # Text without any citation markers
|
||||
|
||||
# Citations are still tracked
|
||||
seen_citations = processor.get_seen_citations()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
citation_mode: CitationMode = CitationMode.HYPERLINK,
|
||||
replace_citation_tokens: bool = True,
|
||||
stop_stream: str | None = STOP_STREAM_PAT,
|
||||
):
|
||||
"""
|
||||
Initialize the citation processor.
|
||||
|
||||
Args:
|
||||
citation_mode: How to handle citations in the output. One of:
|
||||
- CitationMode.HYPERLINK (default): Replace [1] with [[1]](url)
|
||||
and emit CitationInfo objects.
|
||||
- CitationMode.KEEP_MARKERS: Keep original [1] markers unchanged,
|
||||
no CitationInfo objects emitted.
|
||||
- CitationMode.REMOVE: Remove citations entirely from output,
|
||||
no CitationInfo objects emitted.
|
||||
All modes track seen citations via get_seen_citations().
|
||||
replace_citation_tokens: If True (default), citations like [1] are replaced
|
||||
with formatted markdown links like [[1]](url) and CitationInfo objects
|
||||
are emitted. If False, original citation text is preserved in output
|
||||
and no CitationInfo objects are emitted. Regardless of this setting,
|
||||
all seen citations are tracked and available via get_seen_citations().
|
||||
stop_stream: Optional stop token pattern to halt processing early.
|
||||
When this pattern is detected in the token stream, processing stops.
|
||||
Defaults to STOP_STREAM_PAT from chat configs.
|
||||
"""
|
||||
|
||||
# Citation mapping from citation number to SearchDoc
|
||||
self.citation_to_doc: CitationMapping = {}
|
||||
self.seen_citations: CitationMapping = {} # citation num -> SearchDoc
|
||||
@@ -176,7 +128,7 @@ class DynamicCitationProcessor:
|
||||
self.curr_segment = "" # tokens held for citation processing
|
||||
self.hold = "" # tokens held for stop token processing
|
||||
self.stop_stream = stop_stream
|
||||
self.citation_mode = citation_mode
|
||||
self.replace_citation_tokens = replace_citation_tokens
|
||||
|
||||
# Citation tracking
|
||||
self.cited_documents_in_order: list[SearchDoc] = (
|
||||
@@ -247,21 +199,19 @@ class DynamicCitationProcessor:
|
||||
5. Handles stop tokens
|
||||
6. Always tracks seen citations in self.seen_citations
|
||||
|
||||
Behavior depends on the `citation_mode` setting from __init__:
|
||||
- HYPERLINK: Citations are replaced with [[n]](url) format and CitationInfo
|
||||
Behavior depends on the `replace_citation_tokens` setting from __init__:
|
||||
- If True: Citations are replaced with [[n]](url) format and CitationInfo
|
||||
objects are yielded before each formatted citation
|
||||
- KEEP_MARKERS: Original citation markers like [1] are preserved unchanged,
|
||||
no CitationInfo objects are yielded
|
||||
- REMOVE: Citations are removed entirely from output,
|
||||
no CitationInfo objects are yielded
|
||||
- If False: Original citation text (e.g., [1]) is preserved in output
|
||||
and no CitationInfo objects are yielded
|
||||
|
||||
Args:
|
||||
token: The next token from the LLM stream, or None to signal end of stream.
|
||||
Pass None to flush any remaining buffered text at end of stream.
|
||||
|
||||
Yields:
|
||||
str: Text chunks to display. Citation format depends on citation_mode.
|
||||
CitationInfo: Citation metadata (only when citation_mode=HYPERLINK)
|
||||
str: Text chunks to display. Citation format depends on replace_citation_tokens.
|
||||
CitationInfo: Citation metadata (only when replace_citation_tokens=True)
|
||||
"""
|
||||
# None -> end of stream, flush remaining segment
|
||||
if token is None:
|
||||
@@ -349,17 +299,17 @@ class DynamicCitationProcessor:
|
||||
if self.non_citation_count > 5:
|
||||
self.recent_cited_documents.clear()
|
||||
|
||||
# Yield text before citation FIRST (preserve order)
|
||||
if intermatch_str:
|
||||
yield intermatch_str
|
||||
|
||||
# Process the citation (returns formatted citation text and CitationInfo objects)
|
||||
# Always tracks seen citations regardless of citation_mode
|
||||
# Always tracks seen citations regardless of strip_citations flag
|
||||
citation_text, citation_info_list = self._process_citation(
|
||||
match, has_leading_space
|
||||
match, has_leading_space, self.replace_citation_tokens
|
||||
)
|
||||
|
||||
if self.citation_mode == CitationMode.HYPERLINK:
|
||||
# HYPERLINK mode: Replace citations with markdown links [[n]](url)
|
||||
# Yield text before citation FIRST (preserve order)
|
||||
if intermatch_str:
|
||||
yield intermatch_str
|
||||
if self.replace_citation_tokens:
|
||||
# Yield CitationInfo objects BEFORE the citation text
|
||||
# This allows the frontend to receive citation metadata before the token
|
||||
# that contains [[n]](link), enabling immediate rendering
|
||||
@@ -368,34 +318,10 @@ class DynamicCitationProcessor:
|
||||
# Then yield the formatted citation text
|
||||
if citation_text:
|
||||
yield citation_text
|
||||
|
||||
elif self.citation_mode == CitationMode.KEEP_MARKERS:
|
||||
# KEEP_MARKERS mode: Preserve original citation markers unchanged
|
||||
# Yield text before citation
|
||||
if intermatch_str:
|
||||
yield intermatch_str
|
||||
# Yield the original citation marker as-is
|
||||
else:
|
||||
# When not stripping, yield the original citation text unchanged
|
||||
yield match.group()
|
||||
|
||||
else: # CitationMode.REMOVE
|
||||
# REMOVE mode: Remove citations entirely from output
|
||||
# This strips citation markers like [1], [2], 【1】 from the output text
|
||||
# When removing citations, we need to handle spacing to avoid issues like:
|
||||
# - "text [1] more" -> "text more" (double space)
|
||||
# - "text [1]." -> "text ." (space before punctuation)
|
||||
if intermatch_str:
|
||||
remaining_text = self.curr_segment[match_span[1] :]
|
||||
# Strip trailing space from intermatch if:
|
||||
# 1. Remaining text starts with space (avoids double space)
|
||||
# 2. Remaining text starts with punctuation (avoids space before punctuation)
|
||||
if intermatch_str[-1].isspace() and remaining_text:
|
||||
first_char = remaining_text[0]
|
||||
# Check if next char is space or common punctuation
|
||||
if first_char.isspace() or first_char in ".,;:!?)]}":
|
||||
intermatch_str = intermatch_str.rstrip()
|
||||
if intermatch_str:
|
||||
yield intermatch_str
|
||||
|
||||
self.non_citation_count = 0
|
||||
|
||||
# Leftover text could be part of next citation
|
||||
@@ -412,7 +338,7 @@ class DynamicCitationProcessor:
|
||||
yield result
|
||||
|
||||
def _process_citation(
|
||||
self, match: re.Match, has_leading_space: bool
|
||||
self, match: re.Match, has_leading_space: bool, replace_tokens: bool = True
|
||||
) -> tuple[str, list[CitationInfo]]:
|
||||
"""
|
||||
Process a single citation match and return formatted citation text and citation info objects.
|
||||
@@ -423,28 +349,31 @@ class DynamicCitationProcessor:
|
||||
This method always:
|
||||
1. Extracts citation numbers from the match
|
||||
2. Looks up the corresponding SearchDoc from the mapping
|
||||
3. Tracks seen citations in self.seen_citations (regardless of citation_mode)
|
||||
3. Tracks seen citations in self.seen_citations (regardless of replace_tokens)
|
||||
|
||||
When citation_mode is HYPERLINK:
|
||||
When replace_tokens=True (controlled by self.replace_citation_tokens):
|
||||
4. Creates formatted citation text as [[n]](url)
|
||||
5. Creates CitationInfo objects for new citations
|
||||
6. Handles deduplication of recently cited documents
|
||||
|
||||
When citation_mode is REMOVE or KEEP_MARKERS:
|
||||
4. Returns empty string and empty list (caller handles output based on mode)
|
||||
When replace_tokens=False:
|
||||
4. Returns empty string and empty list (caller yields original match text)
|
||||
|
||||
Args:
|
||||
match: Regex match object containing the citation pattern
|
||||
has_leading_space: Whether the text immediately before this citation
|
||||
ends with whitespace. Used to determine if a leading space should
|
||||
be added to the formatted output.
|
||||
replace_tokens: If True, return formatted text and CitationInfo objects.
|
||||
If False, only track seen citations and return empty results.
|
||||
This is passed from self.replace_citation_tokens by the caller.
|
||||
|
||||
Returns:
|
||||
Tuple of (formatted_citation_text, citation_info_list):
|
||||
- formatted_citation_text: Markdown-formatted citation text like
|
||||
"[[1]](https://example.com)" or empty string if not in HYPERLINK mode
|
||||
"[[1]](https://example.com)" or empty string if replace_tokens=False
|
||||
- citation_info_list: List of CitationInfo objects for newly cited
|
||||
documents, or empty list if not in HYPERLINK mode
|
||||
documents, or empty list if replace_tokens=False
|
||||
"""
|
||||
citation_str: str = match.group() # e.g., '[1]', '[1, 2, 3]', '[[1]]', '【1】'
|
||||
formatted = (
|
||||
@@ -482,11 +411,11 @@ class DynamicCitationProcessor:
|
||||
doc_id = search_doc.document_id
|
||||
link = search_doc.link or ""
|
||||
|
||||
# Always track seen citations regardless of citation_mode setting
|
||||
# Always track seen citations regardless of replace_tokens setting
|
||||
self.seen_citations[num] = search_doc
|
||||
|
||||
# Only generate formatted citations and CitationInfo in HYPERLINK mode
|
||||
if self.citation_mode != CitationMode.HYPERLINK:
|
||||
# When not replacing citation tokens, skip the rest of the processing
|
||||
if not replace_tokens:
|
||||
continue
|
||||
|
||||
# Format the citation text as [[n]](link)
|
||||
@@ -521,14 +450,14 @@ class DynamicCitationProcessor:
|
||||
"""
|
||||
Get the list of cited SearchDoc objects in the order they were first cited.
|
||||
|
||||
Note: This list is only populated when `citation_mode=HYPERLINK`.
|
||||
When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
|
||||
Note: This list is only populated when `replace_citation_tokens=True`.
|
||||
When `replace_citation_tokens=False`, this will return an empty list.
|
||||
Use get_seen_citations() instead if you need to track citations without
|
||||
emitting CitationInfo objects.
|
||||
replacing them.
|
||||
|
||||
Returns:
|
||||
List of SearchDoc objects in the order they were first cited.
|
||||
Empty list if citation_mode is not HYPERLINK.
|
||||
Empty list if replace_citation_tokens=False.
|
||||
"""
|
||||
return self.cited_documents_in_order
|
||||
|
||||
@@ -536,14 +465,14 @@ class DynamicCitationProcessor:
|
||||
"""
|
||||
Get the list of cited document IDs in the order they were first cited.
|
||||
|
||||
Note: This list is only populated when `citation_mode=HYPERLINK`.
|
||||
When using REMOVE or KEEP_MARKERS mode, this will return an empty list.
|
||||
Note: This list is only populated when `replace_citation_tokens=True`.
|
||||
When `replace_citation_tokens=False`, this will return an empty list.
|
||||
Use get_seen_citations() instead if you need to track citations without
|
||||
emitting CitationInfo objects.
|
||||
replacing them.
|
||||
|
||||
Returns:
|
||||
List of document IDs (strings) in the order they were first cited.
|
||||
Empty list if citation_mode is not HYPERLINK.
|
||||
Empty list if replace_citation_tokens=False.
|
||||
"""
|
||||
return [doc.document_id for doc in self.cited_documents_in_order]
|
||||
|
||||
@@ -552,12 +481,12 @@ class DynamicCitationProcessor:
|
||||
Get all seen citations as a mapping from citation number to SearchDoc.
|
||||
|
||||
This returns all citations that have been encountered during processing,
|
||||
regardless of the `citation_mode` setting. Citations are tracked
|
||||
regardless of the `replace_citation_tokens` setting. Citations are tracked
|
||||
whenever they are parsed, making this useful for cases where you need to
|
||||
know which citations appeared in the text without emitting CitationInfo objects.
|
||||
know which citations appeared in the text without replacing them.
|
||||
|
||||
This is particularly useful when using REMOVE or KEEP_MARKERS mode, as
|
||||
get_cited_documents() will be empty in those cases, but get_seen_citations()
|
||||
This is particularly useful when `replace_citation_tokens=False`, as
|
||||
get_cited_documents() will be empty in that case, but get_seen_citations()
|
||||
will still contain all the citations that were found.
|
||||
|
||||
Returns:
|
||||
@@ -572,13 +501,13 @@ class DynamicCitationProcessor:
|
||||
"""
|
||||
Get the number of unique documents that have been cited.
|
||||
|
||||
Note: This count is only updated when `citation_mode=HYPERLINK`.
|
||||
When using REMOVE or KEEP_MARKERS mode, this will always return 0.
|
||||
Note: This count is only updated when `replace_citation_tokens=True`.
|
||||
When `replace_citation_tokens=False`, this will always return 0.
|
||||
Use len(get_seen_citations()) instead if you need to count citations
|
||||
without emitting CitationInfo objects.
|
||||
without replacing them.
|
||||
|
||||
Returns:
|
||||
Number of unique documents cited. 0 if citation_mode is not HYPERLINK.
|
||||
Number of unique documents cited. 0 if replace_citation_tokens=False.
|
||||
"""
|
||||
return len(self.cited_document_ids)
|
||||
|
||||
@@ -590,9 +519,9 @@ class DynamicCitationProcessor:
|
||||
CitationInfo objects for the same document when it's cited multiple times
|
||||
in close succession. This method clears that tracker.
|
||||
|
||||
This is primarily useful when `citation_mode=HYPERLINK` to allow
|
||||
This is primarily useful when `replace_citation_tokens=True` to allow
|
||||
previously cited documents to emit CitationInfo objects again. Has no
|
||||
effect when using REMOVE or KEEP_MARKERS mode.
|
||||
effect when `replace_citation_tokens=False`.
|
||||
|
||||
The recent citation tracker is also automatically cleared when more than
|
||||
5 non-citation characters are processed between citations.
|
||||
|
||||
@@ -53,50 +53,6 @@ def update_citation_processor_from_tool_response(
|
||||
citation_processor.update_citation_mapping(citation_to_doc)
|
||||
|
||||
|
||||
def extract_citation_order_from_text(text: str) -> list[int]:
|
||||
"""Extract citation numbers from text in order of first appearance.
|
||||
|
||||
Parses citation patterns like [1], [1, 2], [[1]], 【1】 etc. and returns
|
||||
the citation numbers in the order they first appear in the text.
|
||||
|
||||
Args:
|
||||
text: The text containing citations
|
||||
|
||||
Returns:
|
||||
List of citation numbers in order of first appearance (no duplicates)
|
||||
"""
|
||||
# Same pattern used in collapse_citations and DynamicCitationProcessor
|
||||
# Group 2 captures the number in double bracket format: [[1]], 【【1】】
|
||||
# Group 4 captures the numbers in single bracket format: [1], [1, 2]
|
||||
citation_pattern = re.compile(
|
||||
r"([\[【[]{2}(\d+)[\]】]]{2})|([\[【[]([\d]+(?: *, *\d+)*)[\]】]])"
|
||||
)
|
||||
seen: set[int] = set()
|
||||
order: list[int] = []
|
||||
|
||||
for match in citation_pattern.finditer(text):
|
||||
# Group 2 is for double bracket single number, group 4 is for single bracket
|
||||
if match.group(2):
|
||||
nums_str = match.group(2)
|
||||
elif match.group(4):
|
||||
nums_str = match.group(4)
|
||||
else:
|
||||
continue
|
||||
|
||||
for num_str in nums_str.split(","):
|
||||
num_str = num_str.strip()
|
||||
if num_str:
|
||||
try:
|
||||
num = int(num_str)
|
||||
if num not in seen:
|
||||
seen.add(num)
|
||||
order.append(num)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return order
|
||||
|
||||
|
||||
def collapse_citations(
|
||||
answer_text: str,
|
||||
existing_citation_mapping: CitationMapping,
|
||||
|
||||
@@ -5,11 +5,9 @@ from sqlalchemy.orm import Session
|
||||
from onyx.chat.chat_state import ChatStateContainer
|
||||
from onyx.chat.chat_utils import create_tool_call_failure_messages
|
||||
from onyx.chat.citation_processor import CitationMapping
|
||||
from onyx.chat.citation_processor import CitationMode
|
||||
from onyx.chat.citation_processor import DynamicCitationProcessor
|
||||
from onyx.chat.citation_utils import update_citation_processor_from_tool_response
|
||||
from onyx.chat.emitter import Emitter
|
||||
from onyx.chat.llm_step import extract_tool_calls_from_response_text
|
||||
from onyx.chat.llm_step import run_llm_step
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
from onyx.chat.models import ExtractedProjectFiles
|
||||
@@ -39,13 +37,11 @@ from onyx.tools.built_in_tools import CITEABLE_TOOLS_NAMES
|
||||
from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
|
||||
from onyx.tools.interface import Tool
|
||||
from onyx.tools.models import ToolCallInfo
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tools.models import ToolResponse
|
||||
from onyx.tools.tool_implementations.images.models import (
|
||||
FinalImageGenerationResponse,
|
||||
)
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
from onyx.tools.tool_implementations.web_search.utils import extract_url_snippet_map
|
||||
from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
|
||||
from onyx.tools.tool_runner import run_tool_calls
|
||||
from onyx.tracing.framework.create import trace
|
||||
@@ -54,78 +50,6 @@ from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _try_fallback_tool_extraction(
|
||||
llm_step_result: LlmStepResult,
|
||||
tool_choice: ToolChoiceOptions,
|
||||
fallback_extraction_attempted: bool,
|
||||
tool_defs: list[dict],
|
||||
turn_index: int,
|
||||
) -> tuple[LlmStepResult, bool]:
|
||||
"""Attempt to extract tool calls from response text as a fallback.
|
||||
|
||||
This is a last resort fallback for low quality LLMs or those that don't have
|
||||
tool calling from the serving layer. Also triggers if there's reasoning but
|
||||
no answer and no tool calls.
|
||||
|
||||
Args:
|
||||
llm_step_result: The result from the LLM step
|
||||
tool_choice: The tool choice option used for this step
|
||||
fallback_extraction_attempted: Whether fallback extraction was already attempted
|
||||
tool_defs: List of tool definitions
|
||||
turn_index: The current turn index for placement
|
||||
|
||||
Returns:
|
||||
Tuple of (possibly updated LlmStepResult, whether fallback was attempted this call)
|
||||
"""
|
||||
if fallback_extraction_attempted:
|
||||
return llm_step_result, False
|
||||
|
||||
no_tool_calls = (
|
||||
not llm_step_result.tool_calls or len(llm_step_result.tool_calls) == 0
|
||||
)
|
||||
reasoning_but_no_answer_or_tools = (
|
||||
llm_step_result.reasoning and not llm_step_result.answer and no_tool_calls
|
||||
)
|
||||
should_try_fallback = (
|
||||
tool_choice == ToolChoiceOptions.REQUIRED and no_tool_calls
|
||||
) or reasoning_but_no_answer_or_tools
|
||||
|
||||
if not should_try_fallback:
|
||||
return llm_step_result, False
|
||||
|
||||
# Try to extract from answer first, then fall back to reasoning
|
||||
extracted_tool_calls: list[ToolCallKickoff] = []
|
||||
if llm_step_result.answer:
|
||||
extracted_tool_calls = extract_tool_calls_from_response_text(
|
||||
response_text=llm_step_result.answer,
|
||||
tool_definitions=tool_defs,
|
||||
placement=Placement(turn_index=turn_index),
|
||||
)
|
||||
if not extracted_tool_calls and llm_step_result.reasoning:
|
||||
extracted_tool_calls = extract_tool_calls_from_response_text(
|
||||
response_text=llm_step_result.reasoning,
|
||||
tool_definitions=tool_defs,
|
||||
placement=Placement(turn_index=turn_index),
|
||||
)
|
||||
|
||||
if extracted_tool_calls:
|
||||
logger.info(
|
||||
f"Extracted {len(extracted_tool_calls)} tool call(s) from response text "
|
||||
f"as fallback (tool_choice was REQUIRED but no tool calls returned)"
|
||||
)
|
||||
return (
|
||||
LlmStepResult(
|
||||
reasoning=llm_step_result.reasoning,
|
||||
answer=llm_step_result.answer,
|
||||
tool_calls=extracted_tool_calls,
|
||||
),
|
||||
True,
|
||||
)
|
||||
|
||||
return llm_step_result, True
|
||||
|
||||
|
||||
# Hardcoded oppinionated value, might breaks down to something like:
|
||||
# Cycle 1: Calls web_search for something
|
||||
# Cycle 2: Calls open_url for some results
|
||||
@@ -373,7 +297,6 @@ def run_llm_loop(
|
||||
forced_tool_id: int | None = None,
|
||||
user_identity: LLMUserIdentity | None = None,
|
||||
chat_session_id: str | None = None,
|
||||
include_citations: bool = True,
|
||||
) -> None:
|
||||
with trace(
|
||||
"run_llm_loop",
|
||||
@@ -391,13 +314,7 @@ def run_llm_loop(
|
||||
initialize_litellm()
|
||||
|
||||
# Initialize citation processor for handling citations dynamically
|
||||
# When include_citations is True, use HYPERLINK mode to format citations as [[1]](url)
|
||||
# When include_citations is False, use REMOVE mode to strip citations from output
|
||||
citation_processor = DynamicCitationProcessor(
|
||||
citation_mode=(
|
||||
CitationMode.HYPERLINK if include_citations else CitationMode.REMOVE
|
||||
)
|
||||
)
|
||||
citation_processor = DynamicCitationProcessor()
|
||||
|
||||
# Add project file citation mappings if project files are present
|
||||
project_citation_mapping: CitationMapping = {}
|
||||
@@ -427,7 +344,6 @@ def run_llm_loop(
|
||||
ran_image_gen: bool = False
|
||||
just_ran_web_search: bool = False
|
||||
has_called_search_tool: bool = False
|
||||
fallback_extraction_attempted: bool = False
|
||||
citation_mapping: dict[int, str] = {} # Maps citation_num -> document_id/URL
|
||||
|
||||
default_base_system_prompt: str = get_default_base_system_prompt(db_session)
|
||||
@@ -454,16 +370,12 @@ def run_llm_loop(
|
||||
|
||||
# The section below calculates the available tokens for history a bit more accurately
|
||||
# now that project files are loaded in.
|
||||
if persona and persona.replace_base_system_prompt:
|
||||
if persona and persona.replace_base_system_prompt and persona.system_prompt:
|
||||
# Handles the case where user has checked off the "Replace base system prompt" checkbox
|
||||
system_prompt = (
|
||||
ChatMessageSimple(
|
||||
message=persona.system_prompt,
|
||||
token_count=token_counter(persona.system_prompt),
|
||||
message_type=MessageType.SYSTEM,
|
||||
)
|
||||
if persona.system_prompt
|
||||
else None
|
||||
system_prompt = ChatMessageSimple(
|
||||
message=persona.system_prompt,
|
||||
token_count=token_counter(persona.system_prompt),
|
||||
message_type=MessageType.SYSTEM,
|
||||
)
|
||||
custom_agent_prompt_msg = None
|
||||
else:
|
||||
@@ -550,11 +462,10 @@ def run_llm_loop(
|
||||
|
||||
# This calls the LLM, yields packets (reasoning, answers, etc.) and returns the result
|
||||
# It also pre-processes the tool calls in preparation for running them
|
||||
tool_defs = [tool.tool_definition() for tool in final_tools]
|
||||
llm_step_result, has_reasoned = run_llm_step(
|
||||
emitter=emitter,
|
||||
history=truncated_message_history,
|
||||
tool_definitions=tool_defs,
|
||||
tool_definitions=[tool.tool_definition() for tool in final_tools],
|
||||
tool_choice=tool_choice,
|
||||
llm=llm,
|
||||
placement=Placement(turn_index=llm_cycle_count + reasoning_cycles),
|
||||
@@ -569,19 +480,6 @@ def run_llm_loop(
|
||||
if has_reasoned:
|
||||
reasoning_cycles += 1
|
||||
|
||||
# Fallback extraction for LLMs that don't support tool calling natively or are lower quality
|
||||
# and might incorrectly output tool calls in other channels
|
||||
llm_step_result, attempted = _try_fallback_tool_extraction(
|
||||
llm_step_result=llm_step_result,
|
||||
tool_choice=tool_choice,
|
||||
fallback_extraction_attempted=fallback_extraction_attempted,
|
||||
tool_defs=tool_defs,
|
||||
turn_index=llm_cycle_count + reasoning_cycles,
|
||||
)
|
||||
if attempted:
|
||||
# To prevent the case of excessive looping with bad models, we only allow one fallback attempt
|
||||
fallback_extraction_attempted = True
|
||||
|
||||
# Save citation mapping after each LLM step for incremental state updates
|
||||
state_container.set_citation_mapping(citation_processor.citation_to_doc)
|
||||
|
||||
@@ -617,7 +515,6 @@ def run_llm_loop(
|
||||
next_citation_num=citation_processor.get_next_citation_number(),
|
||||
max_concurrent_tools=None,
|
||||
skip_search_query_expansion=has_called_search_tool,
|
||||
url_snippet_map=extract_url_snippet_map(gathered_documents or []),
|
||||
)
|
||||
tool_responses = parallel_tool_call_results.tool_responses
|
||||
citation_mapping = parallel_tool_call_results.updated_citation_mapping
|
||||
@@ -656,15 +553,8 @@ def run_llm_loop(
|
||||
|
||||
# Extract search_docs if this is a search tool response
|
||||
search_docs = None
|
||||
displayed_docs = None
|
||||
if isinstance(tool_response.rich_response, SearchDocsResponse):
|
||||
search_docs = tool_response.rich_response.search_docs
|
||||
displayed_docs = tool_response.rich_response.displayed_docs
|
||||
|
||||
# Add ALL search docs to state container for DB persistence
|
||||
if search_docs:
|
||||
state_container.add_search_docs(search_docs)
|
||||
|
||||
if gathered_documents:
|
||||
gathered_documents.extend(search_docs)
|
||||
else:
|
||||
@@ -682,12 +572,6 @@ def run_llm_loop(
|
||||
):
|
||||
generated_images = tool_response.rich_response.generated_images
|
||||
|
||||
saved_response = (
|
||||
tool_response.rich_response
|
||||
if isinstance(tool_response.rich_response, str)
|
||||
else tool_response.llm_facing_response
|
||||
)
|
||||
|
||||
tool_call_info = ToolCallInfo(
|
||||
parent_tool_call_id=None, # Top-level tool calls are attached to the chat message
|
||||
turn_index=llm_cycle_count + reasoning_cycles,
|
||||
@@ -697,8 +581,8 @@ def run_llm_loop(
|
||||
tool_id=tool.id,
|
||||
reasoning_tokens=llm_step_result.reasoning, # All tool calls from this loop share the same reasoning
|
||||
tool_call_arguments=tool_call.tool_args,
|
||||
tool_call_response=saved_response,
|
||||
search_docs=displayed_docs or search_docs,
|
||||
tool_call_response=tool_response.llm_facing_response,
|
||||
search_docs=search_docs,
|
||||
generated_images=generated_images,
|
||||
)
|
||||
# Add to state container for partial save support
|
||||
@@ -753,12 +637,7 @@ def run_llm_loop(
|
||||
should_cite_documents = True
|
||||
|
||||
if not llm_step_result or not llm_step_result.answer:
|
||||
raise RuntimeError(
|
||||
"The LLM did not return an answer. "
|
||||
"Typically this is an issue with LLMs that do not support tool calling natively, "
|
||||
"or the model serving API is not configured correctly. "
|
||||
"This may also happen with models that are lower quality outputting invalid tool calls."
|
||||
)
|
||||
raise RuntimeError("LLM did not return an answer.")
|
||||
|
||||
emitter.emit(
|
||||
Packet(
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Mapping
|
||||
@@ -14,7 +13,6 @@ from onyx.chat.emitter import Emitter
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
from onyx.chat.models import LlmStepResult
|
||||
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
|
||||
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.file_store.models import ChatFileType
|
||||
@@ -50,7 +48,6 @@ from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tracing.framework.create import generation_span
|
||||
from onyx.utils.b64 import get_image_type_from_bytes
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.text_processing import find_all_json_objects
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -139,11 +136,12 @@ def _format_message_history_for_logging(
|
||||
|
||||
separator = "================================================"
|
||||
|
||||
# Handle single ChatCompletionMessage - wrap in list for uniform processing
|
||||
if isinstance(
|
||||
message_history, (SystemMessage, UserMessage, AssistantMessage, ToolMessage)
|
||||
):
|
||||
message_history = [message_history]
|
||||
# Handle string input
|
||||
if isinstance(message_history, str):
|
||||
formatted_lines.append("Message [string]:")
|
||||
formatted_lines.append(separator)
|
||||
formatted_lines.append(f"{message_history}")
|
||||
return "\n".join(formatted_lines)
|
||||
|
||||
# Handle sequence of messages
|
||||
for i, msg in enumerate(message_history):
|
||||
@@ -213,8 +211,7 @@ def _update_tool_call_with_delta(
|
||||
|
||||
if index not in tool_calls_in_progress:
|
||||
tool_calls_in_progress[index] = {
|
||||
# Fallback ID in case the provider never sends one via deltas.
|
||||
"id": f"fallback_{uuid.uuid4().hex}",
|
||||
"id": None,
|
||||
"name": None,
|
||||
"arguments": "",
|
||||
}
|
||||
@@ -280,144 +277,6 @@ def _extract_tool_call_kickoffs(
|
||||
return tool_calls
|
||||
|
||||
|
||||
def extract_tool_calls_from_response_text(
|
||||
response_text: str | None,
|
||||
tool_definitions: list[dict],
|
||||
placement: Placement,
|
||||
) -> list[ToolCallKickoff]:
|
||||
"""Extract tool calls from LLM response text by matching JSON against tool definitions.
|
||||
|
||||
This is a fallback mechanism for when the LLM was expected to return tool calls
|
||||
but didn't use the proper tool call format. It searches for JSON objects in the
|
||||
response text that match the structure of available tools.
|
||||
|
||||
Args:
|
||||
response_text: The LLM's text response to search for tool calls
|
||||
tool_definitions: List of tool definitions to match against
|
||||
placement: Placement information for the tool calls
|
||||
|
||||
Returns:
|
||||
List of ToolCallKickoff objects for any matched tool calls
|
||||
"""
|
||||
if not response_text or not tool_definitions:
|
||||
return []
|
||||
|
||||
# Build a map of tool names to their definitions
|
||||
tool_name_to_def: dict[str, dict] = {}
|
||||
for tool_def in tool_definitions:
|
||||
if tool_def.get("type") == "function" and "function" in tool_def:
|
||||
func_def = tool_def["function"]
|
||||
tool_name = func_def.get("name")
|
||||
if tool_name:
|
||||
tool_name_to_def[tool_name] = func_def
|
||||
|
||||
if not tool_name_to_def:
|
||||
return []
|
||||
|
||||
# Find all JSON objects in the response text
|
||||
json_objects = find_all_json_objects(response_text)
|
||||
|
||||
tool_calls: list[ToolCallKickoff] = []
|
||||
tab_index = 0
|
||||
|
||||
for json_obj in json_objects:
|
||||
matched_tool_call = _try_match_json_to_tool(json_obj, tool_name_to_def)
|
||||
if matched_tool_call:
|
||||
tool_name, tool_args = matched_tool_call
|
||||
tool_calls.append(
|
||||
ToolCallKickoff(
|
||||
tool_call_id=f"extracted_{uuid.uuid4().hex[:8]}",
|
||||
tool_name=tool_name,
|
||||
tool_args=tool_args,
|
||||
placement=Placement(
|
||||
turn_index=placement.turn_index,
|
||||
tab_index=tab_index,
|
||||
sub_turn_index=placement.sub_turn_index,
|
||||
),
|
||||
)
|
||||
)
|
||||
tab_index += 1
|
||||
|
||||
logger.info(
|
||||
f"Extracted {len(tool_calls)} tool call(s) from response text as fallback"
|
||||
)
|
||||
|
||||
return tool_calls
|
||||
|
||||
|
||||
def _try_match_json_to_tool(
|
||||
json_obj: dict[str, Any],
|
||||
tool_name_to_def: dict[str, dict],
|
||||
) -> tuple[str, dict[str, Any]] | None:
|
||||
"""Try to match a JSON object to a tool definition.
|
||||
|
||||
Supports several formats:
|
||||
1. Direct tool call format: {"name": "tool_name", "arguments": {...}}
|
||||
2. Function call format: {"function": {"name": "tool_name", "arguments": {...}}}
|
||||
3. Tool name as key: {"tool_name": {...arguments...}}
|
||||
4. Arguments matching a tool's parameter schema
|
||||
|
||||
Args:
|
||||
json_obj: The JSON object to match
|
||||
tool_name_to_def: Map of tool names to their function definitions
|
||||
|
||||
Returns:
|
||||
Tuple of (tool_name, tool_args) if matched, None otherwise
|
||||
"""
|
||||
# Format 1: Direct tool call format {"name": "...", "arguments": {...}}
|
||||
if "name" in json_obj and json_obj["name"] in tool_name_to_def:
|
||||
tool_name = json_obj["name"]
|
||||
arguments = json_obj.get("arguments", json_obj.get("parameters", {}))
|
||||
if isinstance(arguments, str):
|
||||
try:
|
||||
arguments = json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
arguments = {}
|
||||
if isinstance(arguments, dict):
|
||||
return (tool_name, arguments)
|
||||
|
||||
# Format 2: Function call format {"function": {"name": "...", "arguments": {...}}}
|
||||
if "function" in json_obj and isinstance(json_obj["function"], dict):
|
||||
func_obj = json_obj["function"]
|
||||
if "name" in func_obj and func_obj["name"] in tool_name_to_def:
|
||||
tool_name = func_obj["name"]
|
||||
arguments = func_obj.get("arguments", func_obj.get("parameters", {}))
|
||||
if isinstance(arguments, str):
|
||||
try:
|
||||
arguments = json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
arguments = {}
|
||||
if isinstance(arguments, dict):
|
||||
return (tool_name, arguments)
|
||||
|
||||
# Format 3: Tool name as key {"tool_name": {...arguments...}}
|
||||
for tool_name in tool_name_to_def:
|
||||
if tool_name in json_obj:
|
||||
arguments = json_obj[tool_name]
|
||||
if isinstance(arguments, dict):
|
||||
return (tool_name, arguments)
|
||||
|
||||
# Format 4: Check if the JSON object matches a tool's parameter schema
|
||||
for tool_name, func_def in tool_name_to_def.items():
|
||||
params = func_def.get("parameters", {})
|
||||
properties = params.get("properties", {})
|
||||
required = params.get("required", [])
|
||||
|
||||
if not properties:
|
||||
continue
|
||||
|
||||
# Check if all required parameters are present (empty required = all optional)
|
||||
if all(req in json_obj for req in required):
|
||||
# Check if any of the tool's properties are in the JSON object
|
||||
matching_props = [prop for prop in properties if prop in json_obj]
|
||||
if matching_props:
|
||||
# Filter to only include known properties
|
||||
filtered_args = {k: v for k, v in json_obj.items() if k in properties}
|
||||
return (tool_name, filtered_args)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def translate_history_to_llm_format(
|
||||
history: list[ChatMessageSimple],
|
||||
llm_config: LLMConfig,
|
||||
@@ -433,7 +292,7 @@ def translate_history_to_llm_format(
|
||||
|
||||
for idx, msg in enumerate(history):
|
||||
# if the message is being added to the history
|
||||
if PROMPT_CACHE_CHAT_HISTORY and msg.message_type in [
|
||||
if msg.message_type in [
|
||||
MessageType.SYSTEM,
|
||||
MessageType.USER,
|
||||
MessageType.ASSISTANT,
|
||||
@@ -722,18 +581,6 @@ def run_llm_step_pkt_generator(
|
||||
}
|
||||
# Note: LLM cost tracking is now handled in multi_llm.py
|
||||
delta = packet.choice.delta
|
||||
|
||||
# Weird behavior from some model providers, just log and ignore for now
|
||||
if (
|
||||
delta.content is None
|
||||
and delta.reasoning_content is None
|
||||
and delta.tool_calls is None
|
||||
):
|
||||
logger.warning(
|
||||
f"LLM packet is empty (no contents, reasoning or tool calls). Skipping: {packet}"
|
||||
)
|
||||
continue
|
||||
|
||||
if not first_action_recorded and _delta_has_action(delta):
|
||||
span_generation.span_data.time_to_first_action_seconds = (
|
||||
time.monotonic() - stream_start_time
|
||||
@@ -860,11 +707,6 @@ def run_llm_step_pkt_generator(
|
||||
),
|
||||
obj=result,
|
||||
)
|
||||
# Track emitted citation for saving
|
||||
if state_container:
|
||||
state_container.add_emitted_citation(
|
||||
result.citation_number
|
||||
)
|
||||
else:
|
||||
# When citation_processor is None, use delta.content directly without modification
|
||||
accumulated_answer += delta.content
|
||||
@@ -991,9 +833,6 @@ def run_llm_step_pkt_generator(
|
||||
),
|
||||
obj=result,
|
||||
)
|
||||
# Track emitted citation for saving
|
||||
if state_container:
|
||||
state_container.add_emitted_citation(result.citation_number)
|
||||
|
||||
# Note: Content (AgentResponseDelta) doesn't need an explicit end packet - OverallStop handles it
|
||||
# Tool calls are handled by tool execution code and emit their own packets (e.g., SectionEnd)
|
||||
@@ -1001,14 +840,14 @@ def run_llm_step_pkt_generator(
|
||||
logger.debug(f"Accumulated reasoning: {accumulated_reasoning}")
|
||||
logger.debug(f"Accumulated answer: {accumulated_answer}")
|
||||
|
||||
if tool_calls:
|
||||
tool_calls_str = "\n".join(
|
||||
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
|
||||
for tc in tool_calls
|
||||
)
|
||||
logger.debug(f"Tool calls:\n{tool_calls_str}")
|
||||
else:
|
||||
logger.debug("Tool calls: []")
|
||||
if tool_calls:
|
||||
tool_calls_str = "\n".join(
|
||||
f" - {tc.tool_name}: {json.dumps(tc.tool_args, indent=4)}"
|
||||
for tc in tool_calls
|
||||
)
|
||||
logger.debug(f"Tool calls:\n{tool_calls_str}")
|
||||
else:
|
||||
logger.debug("Tool calls: []")
|
||||
|
||||
return (
|
||||
LlmStepResult(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
@@ -7,7 +8,10 @@ from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.enums import QueryFlow
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.file_store.models import FileDescriptor
|
||||
@@ -20,6 +24,25 @@ from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType
|
||||
|
||||
|
||||
# First chunk of info for streaming QA
|
||||
class QADocsResponse(BaseModel):
|
||||
top_documents: list[SearchDoc]
|
||||
rephrased_query: str | None = None
|
||||
predicted_flow: QueryFlow | None
|
||||
predicted_search: SearchType | None
|
||||
applied_source_filters: list[DocumentSource] | None
|
||||
applied_time_cutoff: datetime | None
|
||||
recency_bias_multiplier: float
|
||||
|
||||
def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
|
||||
initial_dict = super().model_dump(mode="json", *args, **kwargs) # type: ignore
|
||||
initial_dict["applied_time_cutoff"] = (
|
||||
self.applied_time_cutoff.isoformat() if self.applied_time_cutoff else None
|
||||
)
|
||||
|
||||
return initial_dict
|
||||
|
||||
|
||||
class StreamStopReason(Enum):
|
||||
CONTEXT_LENGTH = "context_length"
|
||||
CANCELLED = "cancelled"
|
||||
@@ -47,11 +70,22 @@ class UserKnowledgeFilePacket(BaseModel):
|
||||
user_files: list[FileDescriptor]
|
||||
|
||||
|
||||
class LLMRelevanceFilterResponse(BaseModel):
|
||||
llm_selected_doc_indices: list[int]
|
||||
|
||||
|
||||
class RelevanceAnalysis(BaseModel):
|
||||
relevant: bool
|
||||
content: str | None = None
|
||||
|
||||
|
||||
class SectionRelevancePiece(RelevanceAnalysis):
|
||||
"""LLM analysis mapped to an Inference Section"""
|
||||
|
||||
document_id: str
|
||||
chunk_id: int # ID of the center chunk for a given inference section
|
||||
|
||||
|
||||
class DocumentRelevance(BaseModel):
|
||||
"""Contains all relevance information for a given search"""
|
||||
|
||||
@@ -82,6 +116,12 @@ class OnyxAnswer(BaseModel):
|
||||
answer: str | None
|
||||
|
||||
|
||||
class ThreadMessage(BaseModel):
|
||||
message: str
|
||||
sender: str | None = None
|
||||
role: MessageType = MessageType.USER
|
||||
|
||||
|
||||
class FileChatDisplay(BaseModel):
|
||||
file_ids: list[str]
|
||||
|
||||
@@ -118,6 +158,7 @@ class PersonaOverrideConfig(BaseModel):
|
||||
num_chunks: float | None = None
|
||||
llm_relevance_filter: bool = False
|
||||
llm_filter_extraction: bool = False
|
||||
recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO
|
||||
llm_model_provider_override: str | None = None
|
||||
llm_model_version_override: str | None = None
|
||||
|
||||
|
||||
@@ -38,10 +38,10 @@ from onyx.chat.save_chat import save_chat_turn
|
||||
from onyx.chat.stop_signal_checker import is_connected as check_stop_signal
|
||||
from onyx.chat.stop_signal_checker import reset_cancel_status
|
||||
from onyx.configs.constants import DEFAULT_PERSONA_ID
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.configs.constants import MilestoneRecordType
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.enums import OptionalSearchSetting
|
||||
from onyx.context.search.models import CitationDocInfo
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.db.chat import create_new_chat_message
|
||||
from onyx.db.chat import get_chat_session_by_id
|
||||
@@ -50,7 +50,6 @@ from onyx.db.chat import reserve_message_id
|
||||
from onyx.db.memory import get_memories
|
||||
from onyx.db.models import ChatMessage
|
||||
from onyx.db.models import ChatSession
|
||||
from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.db.projects import get_project_token_count
|
||||
from onyx.db.projects import get_user_files_from_project
|
||||
@@ -68,7 +67,6 @@ from onyx.onyxbot.slack.models import SlackContext
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
|
||||
from onyx.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from onyx.server.query_and_chat.models import OptionalSearchSetting
|
||||
from onyx.server.query_and_chat.models import SendMessageRequest
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
|
||||
@@ -95,22 +93,6 @@ logger = setup_logger()
|
||||
ERROR_TYPE_CANCELLED = "cancelled"
|
||||
|
||||
|
||||
def _should_enable_slack_search(
|
||||
persona: Persona,
|
||||
filters: BaseFilters | None,
|
||||
) -> bool:
|
||||
"""Determine if Slack search should be enabled.
|
||||
|
||||
Returns True if:
|
||||
- Source type filter exists and includes Slack, OR
|
||||
- Default persona with no source type filter
|
||||
"""
|
||||
source_types = filters.source_type if filters else None
|
||||
return (source_types is not None and DocumentSource.SLACK in source_types) or (
|
||||
persona.id == DEFAULT_PERSONA_ID and source_types is None
|
||||
)
|
||||
|
||||
|
||||
def _extract_project_file_texts_and_images(
|
||||
project_id: int | None,
|
||||
user_id: UUID | None,
|
||||
@@ -299,7 +281,6 @@ def handle_stream_message_objects(
|
||||
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
|
||||
litellm_additional_headers: dict[str, str] | None = None,
|
||||
custom_tool_additional_headers: dict[str, str] | None = None,
|
||||
mcp_headers: dict[str, str] | None = None,
|
||||
bypass_acl: bool = False,
|
||||
# Additional context that should be included in the chat history, for example:
|
||||
# Slack threads where the conversation cannot be represented by a chain of User/Assistant
|
||||
@@ -523,15 +504,11 @@ def handle_stream_message_objects(
|
||||
),
|
||||
bypass_acl=bypass_acl,
|
||||
slack_context=slack_context,
|
||||
enable_slack_search=_should_enable_slack_search(
|
||||
persona, new_msg_req.internal_search_filters
|
||||
),
|
||||
),
|
||||
custom_tool_config=CustomToolConfig(
|
||||
chat_session_id=chat_session.id,
|
||||
message_id=user_message.id if user_message else None,
|
||||
additional_headers=custom_tool_additional_headers,
|
||||
mcp_headers=mcp_headers,
|
||||
),
|
||||
allowed_tool_ids=new_msg_req.allowed_tool_ids,
|
||||
search_usage_forcing_setting=project_search_config.search_usage,
|
||||
@@ -652,7 +629,6 @@ def handle_stream_message_objects(
|
||||
forced_tool_id=forced_tool_id,
|
||||
user_identity=user_identity,
|
||||
chat_session_id=str(chat_session.id),
|
||||
include_citations=new_msg_req.include_citations,
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
@@ -743,16 +719,27 @@ def llm_loop_completion_handle(
|
||||
else:
|
||||
final_answer = "The generation was stopped by the user."
|
||||
|
||||
# Build citation_docs_info from accumulated citations in state container
|
||||
citation_docs_info: list[CitationDocInfo] = []
|
||||
seen_citation_nums: set[int] = set()
|
||||
for citation_num, search_doc in state_container.citation_to_doc.items():
|
||||
if citation_num not in seen_citation_nums:
|
||||
seen_citation_nums.add(citation_num)
|
||||
citation_docs_info.append(
|
||||
CitationDocInfo(
|
||||
search_doc=search_doc,
|
||||
citation_number=citation_num,
|
||||
)
|
||||
)
|
||||
|
||||
save_chat_turn(
|
||||
message_text=final_answer,
|
||||
reasoning_tokens=state_container.reasoning_tokens,
|
||||
citation_to_doc=state_container.citation_to_doc,
|
||||
citation_docs_info=citation_docs_info,
|
||||
tool_calls=state_container.tool_calls,
|
||||
all_search_docs=state_container.get_all_search_docs(),
|
||||
db_session=db_session,
|
||||
assistant_message=assistant_message,
|
||||
is_clarification=state_container.is_clarification,
|
||||
emitted_citations=state_container.get_emitted_citations(),
|
||||
)
|
||||
|
||||
|
||||
@@ -803,7 +790,6 @@ def stream_chat_message_objects(
|
||||
parent_message_id=new_msg_req.parent_message_id,
|
||||
chat_session_id=new_msg_req.chat_session_id,
|
||||
origin=new_msg_req.origin,
|
||||
include_citations=new_msg_req.include_citations,
|
||||
)
|
||||
return handle_stream_message_objects(
|
||||
new_msg_req=translated_new_msg_req,
|
||||
|
||||
@@ -18,7 +18,6 @@ from onyx.prompts.prompt_utils import handle_onyx_date_awareness
|
||||
from onyx.prompts.prompt_utils import replace_citation_guidance_tag
|
||||
from onyx.prompts.tool_prompts import GENERATE_IMAGE_GUIDANCE
|
||||
from onyx.prompts.tool_prompts import INTERNAL_SEARCH_GUIDANCE
|
||||
from onyx.prompts.tool_prompts import MEMORY_GUIDANCE
|
||||
from onyx.prompts.tool_prompts import OPEN_URLS_GUIDANCE
|
||||
from onyx.prompts.tool_prompts import PYTHON_TOOL_GUIDANCE
|
||||
from onyx.prompts.tool_prompts import TOOL_DESCRIPTION_SEARCH_GUIDANCE
|
||||
@@ -29,7 +28,6 @@ from onyx.tools.interface import Tool
|
||||
from onyx.tools.tool_implementations.images.image_generation_tool import (
|
||||
ImageGenerationTool,
|
||||
)
|
||||
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
|
||||
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
from onyx.tools.tool_implementations.search.search_tool import SearchTool
|
||||
@@ -180,9 +178,8 @@ def build_system_prompt(
|
||||
site_colon_disabled=WEB_SEARCH_SITE_DISABLED_GUIDANCE
|
||||
)
|
||||
+ OPEN_URLS_GUIDANCE
|
||||
+ PYTHON_TOOL_GUIDANCE
|
||||
+ GENERATE_IMAGE_GUIDANCE
|
||||
+ MEMORY_GUIDANCE
|
||||
+ PYTHON_TOOL_GUIDANCE
|
||||
)
|
||||
return system_prompt
|
||||
|
||||
@@ -196,7 +193,6 @@ def build_system_prompt(
|
||||
has_generate_image = any(
|
||||
isinstance(tool, ImageGenerationTool) for tool in tools
|
||||
)
|
||||
has_memory = any(isinstance(tool, MemoryTool) for tool in tools)
|
||||
|
||||
if has_web_search or has_internal_search or include_all_guidance:
|
||||
system_prompt += TOOL_DESCRIPTION_SEARCH_GUIDANCE
|
||||
@@ -226,7 +222,4 @@ def build_system_prompt(
|
||||
if has_generate_image or include_all_guidance:
|
||||
system_prompt += GENERATE_IMAGE_GUIDANCE
|
||||
|
||||
if has_memory or include_all_guidance:
|
||||
system_prompt += MEMORY_GUIDANCE
|
||||
|
||||
return system_prompt
|
||||
|
||||
@@ -2,9 +2,8 @@ import json
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.chat.chat_state import ChatStateContainer
|
||||
from onyx.chat.chat_state import SearchDocKey
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.models import CitationDocInfo
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.db.chat import add_search_docs_to_chat_message
|
||||
from onyx.db.chat import add_search_docs_to_tool_call
|
||||
@@ -20,6 +19,22 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _create_search_doc_key(search_doc: SearchDoc) -> tuple[str, int, tuple[str, ...]]:
|
||||
"""
|
||||
Create a unique key for a SearchDoc that accounts for different versions of the same
|
||||
document/chunk with different match_highlights.
|
||||
|
||||
Args:
|
||||
search_doc: The SearchDoc pydantic model to create a key for
|
||||
|
||||
Returns:
|
||||
A tuple of (document_id, chunk_ind, sorted match_highlights) that uniquely identifies
|
||||
this specific version of the document
|
||||
"""
|
||||
match_highlights_tuple = tuple(sorted(search_doc.match_highlights or []))
|
||||
return (search_doc.document_id, search_doc.chunk_ind, match_highlights_tuple)
|
||||
|
||||
|
||||
def _create_and_link_tool_calls(
|
||||
tool_calls: list[ToolCallInfo],
|
||||
assistant_message: ChatMessage,
|
||||
@@ -139,36 +154,38 @@ def save_chat_turn(
|
||||
message_text: str,
|
||||
reasoning_tokens: str | None,
|
||||
tool_calls: list[ToolCallInfo],
|
||||
citation_to_doc: dict[int, SearchDoc],
|
||||
all_search_docs: dict[SearchDocKey, SearchDoc],
|
||||
citation_docs_info: list[CitationDocInfo],
|
||||
db_session: Session,
|
||||
assistant_message: ChatMessage,
|
||||
is_clarification: bool = False,
|
||||
emitted_citations: set[int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Save a chat turn by populating the assistant_message and creating related entities.
|
||||
|
||||
This function:
|
||||
1. Updates the ChatMessage with text, reasoning tokens, and token count
|
||||
2. Creates DB SearchDoc entries from pre-deduplicated all_search_docs
|
||||
3. Builds tool_call -> search_doc mapping for displayed docs
|
||||
4. Builds citation mapping from citation_to_doc
|
||||
5. Links all unique SearchDocs to the ChatMessage
|
||||
2. Creates SearchDoc entries from ToolCall search_docs (for tool calls that returned documents)
|
||||
3. Collects all unique SearchDocs from all tool calls and links them to ChatMessage
|
||||
4. Builds citation mapping from citation_docs_info
|
||||
5. Links all unique SearchDocs from tool calls to the ChatMessage
|
||||
6. Creates ToolCall entries and links SearchDocs to them
|
||||
7. Builds the citations mapping for the ChatMessage
|
||||
|
||||
Deduplication Logic:
|
||||
- SearchDocs are deduplicated using (document_id, chunk_ind, match_highlights) as the key
|
||||
- This ensures that the same document/chunk with different match_highlights (from different
|
||||
queries) are stored as separate SearchDoc entries
|
||||
- Each ToolCall and ChatMessage will map to the correct version of the SearchDoc that
|
||||
matches its specific query highlights
|
||||
|
||||
Args:
|
||||
message_text: The message content to save
|
||||
reasoning_tokens: Optional reasoning tokens for the message
|
||||
tool_calls: List of tool call information to create ToolCall entries (may include search_docs)
|
||||
citation_to_doc: Mapping from citation number to SearchDoc for building citations
|
||||
all_search_docs: Pre-deduplicated search docs from ChatStateContainer
|
||||
citation_docs_info: List of citation document information for building citations mapping
|
||||
db_session: Database session for persistence
|
||||
assistant_message: The ChatMessage object to populate (should already exist in DB)
|
||||
is_clarification: Whether this assistant message is a clarification question (deep research flow)
|
||||
emitted_citations: Set of citation numbers that were actually emitted during streaming.
|
||||
If provided, only citations in this set will be saved; others are filtered out.
|
||||
"""
|
||||
# 1. Update ChatMessage with message content, reasoning tokens, and token count
|
||||
assistant_message.message = message_text
|
||||
@@ -183,53 +200,53 @@ def save_chat_turn(
|
||||
else:
|
||||
assistant_message.token_count = 0
|
||||
|
||||
# 2. Create DB SearchDoc entries from pre-deduplicated all_search_docs
|
||||
search_doc_key_to_id: dict[SearchDocKey, int] = {}
|
||||
for key, search_doc_py in all_search_docs.items():
|
||||
db_search_doc = create_db_search_doc(
|
||||
server_search_doc=search_doc_py,
|
||||
db_session=db_session,
|
||||
commit=False,
|
||||
)
|
||||
search_doc_key_to_id[key] = db_search_doc.id
|
||||
|
||||
# 3. Build tool_call -> search_doc mapping (for displayed docs in each tool call)
|
||||
# 2. Create SearchDoc entries from tool_calls
|
||||
# Build mapping from SearchDoc to DB SearchDoc ID
|
||||
# Use (document_id, chunk_ind, match_highlights) as key to avoid duplicates
|
||||
# while ensuring different versions with different highlights are stored separately
|
||||
search_doc_key_to_id: dict[tuple[str, int, tuple[str, ...]], int] = {}
|
||||
tool_call_to_search_doc_ids: dict[str, list[int]] = {}
|
||||
|
||||
# Process tool calls and their search docs
|
||||
for tool_call_info in tool_calls:
|
||||
if tool_call_info.search_docs:
|
||||
search_doc_ids_for_tool: list[int] = []
|
||||
for search_doc_py in tool_call_info.search_docs:
|
||||
key = ChatStateContainer.create_search_doc_key(search_doc_py)
|
||||
if key in search_doc_key_to_id:
|
||||
search_doc_ids_for_tool.append(search_doc_key_to_id[key])
|
||||
# Create a unique key for this SearchDoc version
|
||||
search_doc_key = _create_search_doc_key(search_doc_py)
|
||||
|
||||
# Check if we've already created this exact SearchDoc version
|
||||
if search_doc_key in search_doc_key_to_id:
|
||||
search_doc_ids_for_tool.append(search_doc_key_to_id[search_doc_key])
|
||||
else:
|
||||
# Displayed doc not in all_search_docs - create it
|
||||
# This can happen if displayed_docs contains docs not in search_docs
|
||||
# Create new DB SearchDoc entry
|
||||
db_search_doc = create_db_search_doc(
|
||||
server_search_doc=search_doc_py,
|
||||
db_session=db_session,
|
||||
commit=False,
|
||||
)
|
||||
search_doc_key_to_id[key] = db_search_doc.id
|
||||
search_doc_key_to_id[search_doc_key] = db_search_doc.id
|
||||
search_doc_ids_for_tool.append(db_search_doc.id)
|
||||
|
||||
tool_call_to_search_doc_ids[tool_call_info.tool_call_id] = list(
|
||||
set(search_doc_ids_for_tool)
|
||||
)
|
||||
|
||||
# Collect all search doc IDs for ChatMessage linking
|
||||
all_search_doc_ids_set: set[int] = set(search_doc_key_to_id.values())
|
||||
# 3. Collect all unique SearchDoc IDs from all tool calls to link to ChatMessage
|
||||
# Use a set to deduplicate by ID (since we've already deduplicated by key above)
|
||||
all_search_doc_ids_set: set[int] = set()
|
||||
for search_doc_ids in tool_call_to_search_doc_ids.values():
|
||||
all_search_doc_ids_set.update(search_doc_ids)
|
||||
|
||||
# 4. Build a citation mapping from the citation number to the saved DB SearchDoc ID
|
||||
# Only include citations that were actually emitted during streaming
|
||||
# 4. Build citation mapping from citation_docs_info
|
||||
citation_number_to_search_doc_id: dict[int, int] = {}
|
||||
|
||||
for citation_num, search_doc_py in citation_to_doc.items():
|
||||
# Skip citations that weren't actually emitted (if emitted_citations is provided)
|
||||
if emitted_citations is not None and citation_num not in emitted_citations:
|
||||
continue
|
||||
for citation_doc_info in citation_docs_info:
|
||||
# Extract SearchDoc pydantic model
|
||||
search_doc_py = citation_doc_info.search_doc
|
||||
|
||||
# Create the unique key for this SearchDoc version
|
||||
search_doc_key = ChatStateContainer.create_search_doc_key(search_doc_py)
|
||||
search_doc_key = _create_search_doc_key(search_doc_py)
|
||||
|
||||
# Get the search doc ID (should already exist from processing tool_calls)
|
||||
if search_doc_key in search_doc_key_to_id:
|
||||
@@ -266,7 +283,10 @@ def save_chat_turn(
|
||||
all_search_doc_ids_set.add(db_search_doc_id)
|
||||
|
||||
# Build mapping from citation number to search doc ID
|
||||
citation_number_to_search_doc_id[citation_num] = db_search_doc_id
|
||||
if citation_doc_info.citation_number is not None:
|
||||
citation_number_to_search_doc_id[citation_doc_info.citation_number] = (
|
||||
db_search_doc_id
|
||||
)
|
||||
|
||||
# 5. Link all unique SearchDocs (from both tool calls and citations) to ChatMessage
|
||||
final_search_doc_ids: list[int] = list(all_search_doc_ids_set)
|
||||
@@ -286,10 +306,23 @@ def save_chat_turn(
|
||||
tool_call_to_search_doc_ids=tool_call_to_search_doc_ids,
|
||||
)
|
||||
|
||||
# 7. Build citations mapping - use the mapping we already built in step 4
|
||||
assistant_message.citations = (
|
||||
citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
|
||||
)
|
||||
# 7. Build citations mapping from citation_docs_info
|
||||
# Any citation_doc_info with a citation_number appeared in the text and should be mapped
|
||||
citations: dict[int, int] = {}
|
||||
for citation_doc_info in citation_docs_info:
|
||||
if citation_doc_info.citation_number is not None:
|
||||
search_doc_id = citation_number_to_search_doc_id.get(
|
||||
citation_doc_info.citation_number
|
||||
)
|
||||
if search_doc_id is not None:
|
||||
citations[citation_doc_info.citation_number] = search_doc_id
|
||||
else:
|
||||
logger.warning(
|
||||
f"Citation number {citation_doc_info.citation_number} found in citation_docs_info "
|
||||
f"but no matching search doc ID in mapping"
|
||||
)
|
||||
|
||||
assistant_message.citations = citations if citations else None
|
||||
|
||||
# Finally save the messages, tool calls, and docs
|
||||
db_session.commit()
|
||||
|
||||
@@ -22,14 +22,6 @@ APP_PORT = 8080
|
||||
# prefix from requests directed towards the API server. In these cases, set this to `/api`
|
||||
APP_API_PREFIX = os.environ.get("API_PREFIX", "")
|
||||
|
||||
# Certain services need to make HTTP requests to the API server, such as the MCP server and Discord bot
|
||||
API_SERVER_PROTOCOL = os.environ.get("API_SERVER_PROTOCOL", "http")
|
||||
API_SERVER_HOST = os.environ.get("API_SERVER_HOST", "127.0.0.1")
|
||||
# This override allows self-hosting the MCP server with Onyx Cloud backend.
|
||||
API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS = os.environ.get(
|
||||
"API_SERVER_URL_OVERRIDE_FOR_HTTP_REQUESTS"
|
||||
)
|
||||
|
||||
# Whether to send user metadata (user_id/email and session_id) to the LLM provider.
|
||||
# Disabled by default.
|
||||
SEND_USER_METADATA_TO_LLM_PROVIDER = (
|
||||
@@ -208,19 +200,8 @@ OPENSEARCH_REST_API_PORT = int(os.environ.get("OPENSEARCH_REST_API_PORT") or 920
|
||||
OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
|
||||
OPENSEARCH_ADMIN_PASSWORD = os.environ.get("OPENSEARCH_ADMIN_PASSWORD", "")
|
||||
|
||||
# This is the "base" config for now, the idea is that at least for our dev
|
||||
# environments we always want to be dual indexing into both OpenSearch and Vespa
|
||||
# to stress test the new codepaths. Only enable this if there is some instance
|
||||
# of OpenSearch running for the relevant Onyx instance.
|
||||
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX = (
|
||||
os.environ.get("ENABLE_OPENSEARCH_INDEXING_FOR_ONYX", "").lower() == "true"
|
||||
)
|
||||
# Given that the "base" config above is true, this enables whether we want to
|
||||
# retrieve from OpenSearch or Vespa. We want to be able to quickly toggle this
|
||||
# in the event we see issues with OpenSearch retrieval in our dev environments.
|
||||
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
|
||||
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
|
||||
ENABLE_OPENSEARCH_FOR_ONYX = (
|
||||
os.environ.get("ENABLE_OPENSEARCH_FOR_ONYX", "").lower() == "true"
|
||||
)
|
||||
|
||||
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
|
||||
@@ -749,10 +730,6 @@ JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default
|
||||
LOG_ONYX_MODEL_INTERACTIONS = (
|
||||
os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
|
||||
)
|
||||
|
||||
PROMPT_CACHE_CHAT_HISTORY = (
|
||||
os.environ.get("PROMPT_CACHE_CHAT_HISTORY", "").lower() == "true"
|
||||
)
|
||||
# If set to `true` will enable additional logs about Vespa query performance
|
||||
# (time spent on finding the right docs + time spent fetching summaries from disk)
|
||||
LOG_VESPA_TIMING_INFORMATION = (
|
||||
@@ -873,7 +850,6 @@ AZURE_IMAGE_DEPLOYMENT_NAME = os.environ.get(
|
||||
|
||||
# configurable image model
|
||||
IMAGE_MODEL_NAME = os.environ.get("IMAGE_MODEL_NAME", "gpt-image-1")
|
||||
IMAGE_MODEL_PROVIDER = os.environ.get("IMAGE_MODEL_PROVIDER", "openai")
|
||||
|
||||
# Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH
|
||||
MANAGED_VESPA = os.environ.get("MANAGED_VESPA", "").lower() == "true"
|
||||
@@ -1026,19 +1002,3 @@ INSTANCE_TYPE = (
|
||||
if os.environ.get("IS_MANAGED_INSTANCE", "").lower() == "true"
|
||||
else "cloud" if AUTH_TYPE == AuthType.CLOUD else "self_hosted"
|
||||
)
|
||||
|
||||
|
||||
## Discord Bot Configuration
|
||||
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN")
|
||||
DISCORD_BOT_INVOKE_CHAR = os.environ.get("DISCORD_BOT_INVOKE_CHAR", "!")
|
||||
|
||||
|
||||
## Stripe Configuration
|
||||
# URL to fetch the Stripe publishable key from a public S3 bucket.
|
||||
# Publishable keys are safe to expose publicly - they can only initialize
|
||||
# Stripe.js and tokenize payment info, not make charges or access data.
|
||||
STRIPE_PUBLISHABLE_KEY_URL = (
|
||||
"https://onyx-stripe-public.s3.amazonaws.com/publishable-key.txt"
|
||||
)
|
||||
# Override for local testing with Stripe test keys (pk_test_*)
|
||||
STRIPE_PUBLISHABLE_KEY_OVERRIDE = os.environ.get("STRIPE_PUBLISHABLE_KEY")
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
|
||||
INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
|
||||
PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
|
||||
PERSONAS_YAML = "./onyx/seeding/personas.yaml"
|
||||
NUM_RETURNED_HITS = 50
|
||||
@@ -11,6 +12,9 @@ NUM_POSTPROCESSED_RESULTS = 20
|
||||
# May be less depending on model
|
||||
MAX_CHUNKS_FED_TO_CHAT = int(os.environ.get("MAX_CHUNKS_FED_TO_CHAT") or 25)
|
||||
|
||||
# Maximum percentage of the context window to fill with selected sections
|
||||
SELECTED_SECTIONS_MAX_WINDOW_PERCENTAGE = 0.8
|
||||
|
||||
# 1 / (1 + DOC_TIME_DECAY * doc-age-in-years), set to 0 to have no decay
|
||||
# Capped in Vespa at 0.5
|
||||
DOC_TIME_DECAY = float(
|
||||
@@ -23,6 +27,11 @@ FAVOR_RECENT_DECAY_MULTIPLIER = 2.0
|
||||
# Currently only applies to search flow not chat
|
||||
CONTEXT_CHUNKS_ABOVE = int(os.environ.get("CONTEXT_CHUNKS_ABOVE") or 1)
|
||||
CONTEXT_CHUNKS_BELOW = int(os.environ.get("CONTEXT_CHUNKS_BELOW") or 1)
|
||||
DISABLE_LLM_QUERY_REPHRASE = (
|
||||
os.environ.get("DISABLE_LLM_QUERY_REPHRASE", "").lower() == "true"
|
||||
)
|
||||
# 1 edit per 20 characters, currently unused due to fuzzy match being too slow
|
||||
QUOTE_ALLOWED_ERROR_PERCENT = 0.05
|
||||
QA_TIMEOUT = int(os.environ.get("QA_TIMEOUT") or "60") # 60 seconds
|
||||
# Weighting factor between Vector and Keyword Search, 1 for completely vector search
|
||||
HYBRID_ALPHA = max(0, min(1, float(os.environ.get("HYBRID_ALPHA") or 0.5)))
|
||||
@@ -37,6 +46,34 @@ TITLE_CONTENT_RATIO = max(
|
||||
0, min(1, float(os.environ.get("TITLE_CONTENT_RATIO") or 0.10))
|
||||
)
|
||||
|
||||
# A list of languages passed to the LLM to rephase the query
|
||||
# For example "English,French,Spanish", be sure to use the "," separator
|
||||
# TODO these are not used, should probably reintroduce these
|
||||
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
|
||||
LANGUAGE_HINT = "\n" + (
|
||||
os.environ.get("LANGUAGE_HINT")
|
||||
or "IMPORTANT: Respond in the same language as my query!"
|
||||
)
|
||||
LANGUAGE_CHAT_NAMING_HINT = (
|
||||
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
|
||||
or "The name of the conversation must be in the same language as the user query."
|
||||
)
|
||||
|
||||
# Number of prompts each persona should have
|
||||
NUM_PERSONA_PROMPTS = 4
|
||||
NUM_PERSONA_PROMPT_GENERATION_CHUNKS = 5
|
||||
|
||||
# Agentic search takes significantly more tokens and therefore has much higher cost.
|
||||
# This configuration allows users to get a search-only experience with instant results
|
||||
# and no involvement from the LLM.
|
||||
# Additionally, some LLM providers have strict rate limits which may prohibit
|
||||
# sending many API requests at once (as is done in agentic search).
|
||||
# Whether the LLM should evaluate all of the document chunks passed in for usefulness
|
||||
# in relation to the user query
|
||||
DISABLE_LLM_DOC_RELEVANCE = (
|
||||
os.environ.get("DISABLE_LLM_DOC_RELEVANCE", "").lower() == "true"
|
||||
)
|
||||
|
||||
# Stops streaming answers back to the UI if this pattern is seen:
|
||||
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
|
||||
|
||||
@@ -49,6 +86,9 @@ HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"
|
||||
NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
|
||||
NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)
|
||||
|
||||
# Enable in-house model for detecting connector-based filtering in queries
|
||||
ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False)
|
||||
|
||||
VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2)
|
||||
|
||||
# Whether or not to use the semantic & keyword search expansions for Basic Search
|
||||
@@ -56,3 +96,5 @@ USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH = (
|
||||
os.environ.get("USE_SEMANTIC_KEYWORD_EXPANSIONS_BASIC_SEARCH", "false").lower()
|
||||
== "true"
|
||||
)
|
||||
|
||||
USE_DIV_CON_AGENT = os.environ.get("USE_DIV_CON_AGENT", "false").lower() == "true"
|
||||
|
||||
@@ -23,9 +23,6 @@ PUBLIC_DOC_PAT = "PUBLIC"
|
||||
ID_SEPARATOR = ":;:"
|
||||
DEFAULT_BOOST = 0
|
||||
|
||||
# Tag for endpoints that should be included in the public API documentation
|
||||
PUBLIC_API_TAGS: list[str | Enum] = ["public"]
|
||||
|
||||
# Cookies
|
||||
FASTAPI_USERS_AUTH_COOKIE_NAME = (
|
||||
"fastapiusersauth" # Currently a constant, but logic allows for configuration
|
||||
@@ -93,7 +90,6 @@ SSL_CERT_FILE = "bundle.pem"
|
||||
DANSWER_API_KEY_PREFIX = "API_KEY__"
|
||||
DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "onyxapikey.ai"
|
||||
UNNAMED_KEY_PLACEHOLDER = "Unnamed"
|
||||
DISCORD_SERVICE_API_KEY_NAME = "discord-bot-service"
|
||||
|
||||
# Key-Value store keys
|
||||
KV_REINDEX_KEY = "needs_reindexing"
|
||||
@@ -153,6 +149,17 @@ CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300 # 5 min
|
||||
|
||||
CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT = 30 * 60 # 30 minutes (in seconds)
|
||||
|
||||
# How long a queued user-file task is valid before workers discard it.
|
||||
# Should be longer than the beat interval (20 s) but short enough to prevent
|
||||
# indefinite queue growth. Workers drop tasks older than this without touching
|
||||
# the DB, so a shorter value = faster drain of stale duplicates.
|
||||
CELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60 # 1 minute (in seconds)
|
||||
|
||||
# Maximum number of tasks allowed in the user-file-processing queue before the
|
||||
# beat generator stops adding more. Prevents unbounded queue growth when workers
|
||||
# fall behind.
|
||||
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500
|
||||
|
||||
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
|
||||
|
||||
DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:"
|
||||
@@ -423,6 +430,9 @@ class OnyxRedisLocks:
|
||||
# User file processing
|
||||
USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
|
||||
USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
|
||||
# Short-lived key set when a task is enqueued; cleared when the worker picks it up.
|
||||
# Prevents the beat from re-enqueuing the same file while a task is already queued.
|
||||
USER_FILE_QUEUED_PREFIX = "da_lock:user_file_queued"
|
||||
USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
|
||||
USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
|
||||
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
|
||||
|
||||
@@ -4,6 +4,8 @@ import os
|
||||
# Onyx Slack Bot Configs
|
||||
#####
|
||||
ONYX_BOT_NUM_RETRIES = int(os.environ.get("ONYX_BOT_NUM_RETRIES", "5"))
|
||||
# How much of the available input context can be used for thread context
|
||||
MAX_THREAD_CONTEXT_PERCENTAGE = 512 * 2 / 3072
|
||||
# Number of docs to display in "Reference Documents"
|
||||
ONYX_BOT_NUM_DOCS_TO_DISPLAY = int(os.environ.get("ONYX_BOT_NUM_DOCS_TO_DISPLAY", "5"))
|
||||
# If the LLM fails to answer, Onyx can still show the "Reference Documents"
|
||||
@@ -45,6 +47,10 @@ ONYX_BOT_MAX_WAIT_TIME = int(os.environ.get("ONYX_BOT_MAX_WAIT_TIME") or 180)
|
||||
# Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback.
|
||||
# Set to 0 to disable it (default)
|
||||
ONYX_BOT_FEEDBACK_REMINDER = int(os.environ.get("ONYX_BOT_FEEDBACK_REMINDER") or 0)
|
||||
# Set to True to rephrase the Slack users messages
|
||||
ONYX_BOT_REPHRASE_MESSAGE = (
|
||||
os.environ.get("ONYX_BOT_REPHRASE_MESSAGE", "").lower() == "true"
|
||||
)
|
||||
|
||||
# ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of
|
||||
# responses OnyxBot can send in a given time period.
|
||||
|
||||
@@ -161,8 +161,6 @@ class DocumentBase(BaseModel):
|
||||
sections: list[TextSection | ImageSection]
|
||||
source: DocumentSource | None = None
|
||||
semantic_identifier: str # displayed in the UI as the main identifier for the doc
|
||||
# TODO(andrei): Ideally we could improve this to where each value is just a
|
||||
# list of strings.
|
||||
metadata: dict[str, str | list[str]]
|
||||
|
||||
# UTC time
|
||||
@@ -204,7 +202,13 @@ class DocumentBase(BaseModel):
|
||||
if not self.metadata:
|
||||
return None
|
||||
# Combined string for the key/value for easy filtering
|
||||
return convert_metadata_dict_to_list_of_strings(self.metadata)
|
||||
attributes: list[str] = []
|
||||
for k, v in self.metadata.items():
|
||||
if isinstance(v, list):
|
||||
attributes.extend([k + INDEX_SEPARATOR + vi for vi in v])
|
||||
else:
|
||||
attributes.append(k + INDEX_SEPARATOR + v)
|
||||
return attributes
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
size = sys.getsizeof(self.id)
|
||||
@@ -236,66 +240,6 @@ class DocumentBase(BaseModel):
|
||||
return " ".join([section.text for section in self.sections if section.text])
|
||||
|
||||
|
||||
def convert_metadata_dict_to_list_of_strings(
|
||||
metadata: dict[str, str | list[str]],
|
||||
) -> list[str]:
|
||||
"""Converts a metadata dict to a list of strings.
|
||||
|
||||
Each string is a key-value pair separated by the INDEX_SEPARATOR. If a key
|
||||
points to a list of values, each value generates a unique pair.
|
||||
|
||||
Args:
|
||||
metadata: The metadata dict to convert where values can be either a
|
||||
string or a list of strings.
|
||||
|
||||
Returns:
|
||||
A list of strings where each string is a key-value pair separated by the
|
||||
INDEX_SEPARATOR.
|
||||
"""
|
||||
attributes: list[str] = []
|
||||
for k, v in metadata.items():
|
||||
if isinstance(v, list):
|
||||
attributes.extend([k + INDEX_SEPARATOR + vi for vi in v])
|
||||
else:
|
||||
attributes.append(k + INDEX_SEPARATOR + v)
|
||||
return attributes
|
||||
|
||||
|
||||
def convert_metadata_list_of_strings_to_dict(
|
||||
metadata_list: list[str],
|
||||
) -> dict[str, str | list[str]]:
|
||||
"""
|
||||
Converts a list of strings to a metadata dict. The inverse of
|
||||
convert_metadata_dict_to_list_of_strings.
|
||||
|
||||
Assumes the input strings are formatted as in the output of
|
||||
convert_metadata_dict_to_list_of_strings.
|
||||
|
||||
The schema of the output metadata dict is suboptimal yet bound to legacy
|
||||
code. Ideally each key would just point to a list of strings, where each
|
||||
list might contain just one element.
|
||||
|
||||
Args:
|
||||
metadata_list: The list of strings to convert to a metadata dict.
|
||||
|
||||
Returns:
|
||||
A metadata dict where values can be either a string or a list of
|
||||
strings.
|
||||
"""
|
||||
metadata: dict[str, str | list[str]] = {}
|
||||
for item in metadata_list:
|
||||
key, value = item.split(INDEX_SEPARATOR, 1)
|
||||
if key in metadata:
|
||||
# We have already seen this key therefore it must point to a list.
|
||||
if isinstance(metadata[key], list):
|
||||
cast(list[str], metadata[key]).append(value)
|
||||
else:
|
||||
metadata[key] = [cast(str, metadata[key]), value]
|
||||
else:
|
||||
metadata[key] = value
|
||||
return metadata
|
||||
|
||||
|
||||
class Document(DocumentBase):
|
||||
"""Used for Onyx ingestion api, the ID is required"""
|
||||
|
||||
|
||||
@@ -13,6 +13,13 @@ class RecencyBiasSetting(str, Enum):
|
||||
AUTO = "auto"
|
||||
|
||||
|
||||
class OptionalSearchSetting(str, Enum):
|
||||
ALWAYS = "always"
|
||||
NEVER = "never"
|
||||
# Determine whether to run search based on history and latest query
|
||||
AUTO = "auto"
|
||||
|
||||
|
||||
class QueryType(str, Enum):
|
||||
"""
|
||||
The type of first-pass query to use for hybrid search.
|
||||
@@ -29,3 +36,15 @@ class SearchType(str, Enum):
|
||||
KEYWORD = "keyword"
|
||||
SEMANTIC = "semantic"
|
||||
INTERNET = "internet"
|
||||
|
||||
|
||||
class LLMEvaluationType(str, Enum):
|
||||
AGENTIC = "agentic" # applies agentic evaluation
|
||||
BASIC = "basic" # applies boolean evaluation
|
||||
SKIP = "skip" # skips evaluation
|
||||
UNSPECIFIED = "unspecified" # reverts to default
|
||||
|
||||
|
||||
class QueryFlow(str, Enum):
|
||||
SEARCH = "search"
|
||||
QUESTION_ANSWER = "question-answer"
|
||||
|
||||
@@ -31,6 +31,7 @@ from onyx.context.search.federated.slack_search_utils import is_recency_query
|
||||
from onyx.context.search.federated.slack_search_utils import should_include_message
|
||||
from onyx.context.search.models import ChunkIndexRequest
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import SearchQuery
|
||||
from onyx.db.document import DocumentSource
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.document_index_utils import (
|
||||
@@ -424,6 +425,7 @@ class SlackQueryResult(BaseModel):
|
||||
|
||||
def query_slack(
|
||||
query_string: str,
|
||||
original_query: SearchQuery,
|
||||
access_token: str,
|
||||
limit: int | None = None,
|
||||
allowed_private_channel: str | None = None,
|
||||
@@ -454,7 +456,7 @@ def query_slack(
|
||||
logger.info(f"Final query to slack: {final_query}")
|
||||
|
||||
# Detect if query asks for most recent results
|
||||
sort_by_time = is_recency_query(query_string)
|
||||
sort_by_time = is_recency_query(original_query.query)
|
||||
|
||||
slack_client = WebClient(token=access_token)
|
||||
try:
|
||||
@@ -534,7 +536,8 @@ def query_slack(
|
||||
)
|
||||
document_id = f"{channel_id}_{message_id}"
|
||||
|
||||
decay_factor = DOC_TIME_DECAY
|
||||
# compute recency bias (parallels vespa calculation) and metadata
|
||||
decay_factor = DOC_TIME_DECAY * original_query.recency_bias_multiplier
|
||||
doc_time = datetime.fromtimestamp(float(message_id))
|
||||
doc_age_years = (datetime.now() - doc_time).total_seconds() / (
|
||||
365 * 24 * 60 * 60
|
||||
@@ -999,6 +1002,7 @@ def slack_retrieval(
|
||||
query_slack,
|
||||
(
|
||||
query_string,
|
||||
query,
|
||||
access_token,
|
||||
query_limit,
|
||||
allowed_private_channel,
|
||||
@@ -1041,6 +1045,7 @@ def slack_retrieval(
|
||||
query_slack,
|
||||
(
|
||||
query_string,
|
||||
query,
|
||||
access_token,
|
||||
query_limit,
|
||||
allowed_private_channel,
|
||||
@@ -1220,6 +1225,7 @@ def slack_retrieval(
|
||||
source_type=DocumentSource.SLACK,
|
||||
title=chunk.title_prefix,
|
||||
boost=0,
|
||||
recency_bias=docid_to_message[document_id].recency_bias,
|
||||
score=convert_slack_score(docid_to_message[document_id].slack_score),
|
||||
hidden=False,
|
||||
is_relevant=None,
|
||||
|
||||
@@ -13,9 +13,7 @@ from onyx.context.search.federated.models import ChannelMetadata
|
||||
from onyx.context.search.models import ChunkIndexRequest
|
||||
from onyx.federated_connectors.slack.models import SlackEntities
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.llm.models import UserMessage
|
||||
from onyx.llm.utils import llm_response_to_string
|
||||
from onyx.natural_language_processing.english_stopwords import ENGLISH_STOPWORDS_SET
|
||||
from onyx.onyxbot.slack.models import ChannelType
|
||||
from onyx.prompts.federated_search import SLACK_DATE_EXTRACTION_PROMPT
|
||||
from onyx.prompts.federated_search import SLACK_QUERY_EXPANSION_PROMPT
|
||||
@@ -114,7 +112,7 @@ def is_recency_query(query: str) -> bool:
|
||||
if not has_recency_keyword:
|
||||
return False
|
||||
|
||||
# Get combined stop words (English + Slack-specific)
|
||||
# Get combined stop words (NLTK + Slack-specific)
|
||||
all_stop_words = _get_combined_stop_words()
|
||||
|
||||
# Extract content words (excluding stop words)
|
||||
@@ -192,7 +190,7 @@ def extract_date_range_from_query(
|
||||
|
||||
try:
|
||||
prompt = SLACK_DATE_EXTRACTION_PROMPT.format(query=query)
|
||||
response = llm_response_to_string(llm.invoke(UserMessage(content=prompt)))
|
||||
response = llm_response_to_string(llm.invoke(prompt))
|
||||
|
||||
response_clean = _parse_llm_code_block_response(response)
|
||||
|
||||
@@ -489,7 +487,7 @@ def build_channel_override_query(channel_references: set[str], time_filter: str)
|
||||
return f"__CHANNEL_OVERRIDE__ {channel_filter}{time_filter}"
|
||||
|
||||
|
||||
# Slack-specific stop words (in addition to standard English stop words)
|
||||
# Slack-specific stop words (in addition to standard NLTK stop words)
|
||||
# These include Slack-specific terms and temporal/recency keywords
|
||||
SLACK_SPECIFIC_STOP_WORDS = frozenset(
|
||||
RECENCY_KEYWORDS
|
||||
@@ -509,16 +507,27 @@ SLACK_SPECIFIC_STOP_WORDS = frozenset(
|
||||
)
|
||||
|
||||
|
||||
def _get_combined_stop_words() -> frozenset[str]:
|
||||
"""Get combined English + Slack-specific stop words.
|
||||
def _get_combined_stop_words() -> set[str]:
|
||||
"""Get combined NLTK + Slack-specific stop words.
|
||||
|
||||
Returns a frozenset of stop words for filtering content words.
|
||||
Returns a set of stop words for filtering content words.
|
||||
Falls back to just Slack-specific stop words if NLTK is unavailable.
|
||||
|
||||
Note: Currently only supports English stop words. Non-English queries
|
||||
may have suboptimal content word extraction. Future enhancement could
|
||||
detect query language and load appropriate stop words.
|
||||
"""
|
||||
return ENGLISH_STOPWORDS_SET | SLACK_SPECIFIC_STOP_WORDS
|
||||
try:
|
||||
from nltk.corpus import stopwords # type: ignore
|
||||
|
||||
# TODO: Support multiple languages - currently hardcoded to English
|
||||
# Could detect language or allow configuration
|
||||
nltk_stop_words = set(stopwords.words("english"))
|
||||
except Exception:
|
||||
# Fallback if NLTK not available
|
||||
nltk_stop_words = set()
|
||||
|
||||
return nltk_stop_words | SLACK_SPECIFIC_STOP_WORDS
|
||||
|
||||
|
||||
def extract_content_words_from_recency_query(
|
||||
@@ -526,7 +535,7 @@ def extract_content_words_from_recency_query(
|
||||
) -> list[str]:
|
||||
"""Extract meaningful content words from a recency query.
|
||||
|
||||
Filters out English stop words, Slack-specific terms, channel references, and proper nouns.
|
||||
Filters out NLTK stop words, Slack-specific terms, channel references, and proper nouns.
|
||||
|
||||
Args:
|
||||
query_text: The user's query text
|
||||
@@ -535,7 +544,7 @@ def extract_content_words_from_recency_query(
|
||||
Returns:
|
||||
List of content words (up to MAX_CONTENT_WORDS)
|
||||
"""
|
||||
# Get combined stop words (English + Slack-specific)
|
||||
# Get combined stop words (NLTK + Slack-specific)
|
||||
all_stop_words = _get_combined_stop_words()
|
||||
|
||||
words = query_text.split()
|
||||
@@ -584,10 +593,8 @@ def expand_query_with_llm(query_text: str, llm: LLM) -> list[str]:
|
||||
Returns:
|
||||
List of rephrased query strings (up to MAX_SLACK_QUERY_EXPANSIONS)
|
||||
"""
|
||||
prompt = UserMessage(
|
||||
content=SLACK_QUERY_EXPANSION_PROMPT.format(
|
||||
query=query_text, max_queries=MAX_SLACK_QUERY_EXPANSIONS
|
||||
)
|
||||
prompt = SLACK_QUERY_EXPANSION_PROMPT.format(
|
||||
query=query_text, max_queries=MAX_SLACK_QUERY_EXPANSIONS
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -5,15 +5,27 @@ from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import ConfigDict
|
||||
from pydantic import Field
|
||||
from pydantic import field_validator
|
||||
|
||||
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.enums import LLMEvaluationType
|
||||
from onyx.context.search.enums import OptionalSearchSetting
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.db.models import Persona
|
||||
from onyx.db.models import SearchSettings
|
||||
from onyx.indexing.models import BaseChunk
|
||||
from onyx.indexing.models import IndexingSetting
|
||||
from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
|
||||
from shared_configs.enums import RerankerProvider
|
||||
from shared_configs.model_server_models import Embedding
|
||||
|
||||
|
||||
MAX_METRICS_CONTENT = (
|
||||
200 # Just need enough characters to identify where in the doc the chunk is
|
||||
)
|
||||
|
||||
|
||||
class QueryExpansions(BaseModel):
|
||||
@@ -26,7 +38,6 @@ class QueryExpansionType(Enum):
|
||||
SEMANTIC = "semantic"
|
||||
|
||||
|
||||
# TODO clean up this stuff, reranking is no longer used
|
||||
class RerankingDetails(BaseModel):
|
||||
# If model is None (or num_rerank is 0), then reranking is turned off
|
||||
rerank_model_name: str | None
|
||||
@@ -120,6 +131,13 @@ class IndexFilters(BaseFilters, UserFileFilters):
|
||||
tenant_id: str | None = None
|
||||
|
||||
|
||||
class ChunkMetric(BaseModel):
|
||||
document_id: str
|
||||
chunk_content_start: str
|
||||
first_link: str | None
|
||||
score: float
|
||||
|
||||
|
||||
class ChunkContext(BaseModel):
|
||||
# If not specified (None), picked up from Persona settings if there is space
|
||||
# if specified (even if 0), it always uses the specified number of chunks above and below
|
||||
@@ -144,6 +162,10 @@ class BasicChunkRequest(BaseModel):
|
||||
# In case some queries favor recency more than other queries.
|
||||
recency_bias_multiplier: float = 1.0
|
||||
|
||||
# Sometimes we may want to extract specific keywords from a more semantic query for
|
||||
# a better keyword search.
|
||||
query_keywords: list[str] | None = None # Not used currently
|
||||
|
||||
limit: int | None = None
|
||||
offset: int | None = None # This one is not set currently
|
||||
|
||||
@@ -162,8 +184,6 @@ class ChunkIndexRequest(BasicChunkRequest):
|
||||
# Calculated final filters
|
||||
filters: IndexFilters
|
||||
|
||||
query_keywords: list[str] | None = None
|
||||
|
||||
|
||||
class ContextExpansionType(str, Enum):
|
||||
NOT_RELEVANT = "not_relevant"
|
||||
@@ -172,18 +192,94 @@ class ContextExpansionType(str, Enum):
|
||||
FULL_DOCUMENT = "full_document"
|
||||
|
||||
|
||||
class SearchRequest(ChunkContext):
|
||||
query: str
|
||||
|
||||
expanded_queries: QueryExpansions | None = None
|
||||
original_query: str | None = None
|
||||
|
||||
search_type: SearchType = SearchType.SEMANTIC
|
||||
|
||||
human_selected_filters: BaseFilters | None = None
|
||||
user_file_filters: UserFileFilters | None = None
|
||||
enable_auto_detect_filters: bool | None = None
|
||||
persona: Persona | None = None
|
||||
|
||||
# if None, no offset / limit
|
||||
offset: int | None = None
|
||||
limit: int | None = None
|
||||
|
||||
multilingual_expansion: list[str] | None = None
|
||||
recency_bias_multiplier: float = 1.0
|
||||
hybrid_alpha: float | None = None
|
||||
rerank_settings: RerankingDetails | None = None
|
||||
evaluation_type: LLMEvaluationType = LLMEvaluationType.UNSPECIFIED
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
precomputed_query_embedding: Embedding | None = None
|
||||
precomputed_is_keyword: bool | None = None
|
||||
precomputed_keywords: list[str] | None = None
|
||||
|
||||
|
||||
class SearchQuery(ChunkContext):
|
||||
query: str
|
||||
processed_keywords: list[str]
|
||||
search_type: SearchType
|
||||
evaluation_type: LLMEvaluationType
|
||||
filters: IndexFilters
|
||||
|
||||
# by this point, the chunks_above and chunks_below must be set
|
||||
chunks_above: int
|
||||
chunks_below: int
|
||||
|
||||
rerank_settings: RerankingDetails | None
|
||||
hybrid_alpha: float
|
||||
recency_bias_multiplier: float
|
||||
|
||||
# Only used if LLM evaluation type is not skip, None to use default settings
|
||||
max_llm_filter_sections: int
|
||||
|
||||
num_hits: int = NUM_RETURNED_HITS
|
||||
offset: int = 0
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
precomputed_query_embedding: Embedding | None = None
|
||||
|
||||
expanded_queries: QueryExpansions | None = None
|
||||
original_query: str | None
|
||||
|
||||
|
||||
class RetrievalDetails(ChunkContext):
|
||||
# Use LLM to determine whether to do a retrieval or only rely on existing history
|
||||
# If the Persona is configured to not run search (0 chunks), this is bypassed
|
||||
# If no Prompt is configured, the only search results are shown, this is bypassed
|
||||
run_search: OptionalSearchSetting = OptionalSearchSetting.AUTO
|
||||
# Is this a real-time/streaming call or a question where Onyx can take more time?
|
||||
# Used to determine reranking flow
|
||||
real_time: bool = True
|
||||
# The following have defaults in the Persona settings which can be overridden via
|
||||
# the query, if None, then use Persona settings
|
||||
filters: BaseFilters | None = None
|
||||
enable_auto_detect_filters: bool | None = None
|
||||
# if None, no offset / limit
|
||||
offset: int | None = None
|
||||
limit: int | None = None
|
||||
|
||||
# If this is set, only the highest matching chunk (or merged chunks) is returned
|
||||
dedupe_docs: bool = False
|
||||
|
||||
|
||||
class InferenceChunk(BaseChunk):
|
||||
document_id: str
|
||||
source_type: DocumentSource
|
||||
semantic_identifier: str
|
||||
title: str | None # Separate from Semantic Identifier though often same
|
||||
boost: int
|
||||
recency_bias: float
|
||||
score: float | None
|
||||
hidden: bool
|
||||
is_relevant: bool | None = None
|
||||
relevance_explanation: str | None = None
|
||||
# TODO(andrei): Ideally we could improve this to where each value is just a
|
||||
# list of strings.
|
||||
metadata: dict[str, str | list[str]]
|
||||
# Matched sections in the chunk. Uses Vespa syntax e.g. <hi>TEXT</hi>
|
||||
# to specify that a set of words should be highlighted. For example:
|
||||
@@ -370,10 +466,6 @@ class SearchDocsResponse(BaseModel):
|
||||
# document id is the most staightforward way.
|
||||
citation_mapping: dict[int, str]
|
||||
|
||||
# For cases where the frontend only needs to display a subset of the search docs
|
||||
# The whole list is typically still needed for later steps but this set should be saved separately
|
||||
displayed_docs: list[SearchDoc] | None = None
|
||||
|
||||
|
||||
class SavedSearchDoc(SearchDoc):
|
||||
db_doc_id: int
|
||||
@@ -432,8 +524,25 @@ class SavedSearchDoc(SearchDoc):
|
||||
return self_score < other_score
|
||||
|
||||
|
||||
class CitationDocInfo(BaseModel):
|
||||
search_doc: SearchDoc
|
||||
citation_number: int | None
|
||||
|
||||
|
||||
class SavedSearchDocWithContent(SavedSearchDoc):
|
||||
"""Used for endpoints that need to return the actual contents of the retrieved
|
||||
section in addition to the match_highlights."""
|
||||
|
||||
content: str
|
||||
|
||||
|
||||
class RetrievalMetricsContainer(BaseModel):
|
||||
search_type: SearchType
|
||||
metrics: list[ChunkMetric] # This contains the scores for retrieval as well
|
||||
|
||||
|
||||
class RerankMetricsContainer(BaseModel):
|
||||
"""The score held by this is the un-boosted, averaged score of the ensemble cross-encoders"""
|
||||
|
||||
metrics: list[ChunkMetric]
|
||||
raw_similarity_scores: list[float]
|
||||
|
||||
@@ -19,7 +19,6 @@ from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.document_index.interfaces import DocumentIndex
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.natural_language_processing.english_stopwords import strip_stopwords
|
||||
from onyx.secondary_llm_flows.source_filter import extract_source_filter
|
||||
from onyx.secondary_llm_flows.time_filter import extract_time_filter
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -279,16 +278,12 @@ def search_pipeline(
|
||||
bypass_acl=chunk_search_request.bypass_acl,
|
||||
)
|
||||
|
||||
query_keywords = strip_stopwords(chunk_search_request.query)
|
||||
|
||||
query_request = ChunkIndexRequest(
|
||||
query=chunk_search_request.query,
|
||||
hybrid_alpha=chunk_search_request.hybrid_alpha,
|
||||
recency_bias_multiplier=chunk_search_request.recency_bias_multiplier,
|
||||
query_keywords=query_keywords,
|
||||
query_keywords=chunk_search_request.query_keywords,
|
||||
filters=filters,
|
||||
limit=chunk_search_request.limit,
|
||||
offset=chunk_search_request.offset,
|
||||
)
|
||||
|
||||
retrieved_chunks = search_chunks(
|
||||
|
||||
272
backend/onyx/context/search/preprocessing/preprocessing.py
Normal file
272
backend/onyx/context/search/preprocessing/preprocessing.py
Normal file
@@ -0,0 +1,272 @@
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.chat_configs import BASE_RECENCY_DECAY
|
||||
from onyx.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
|
||||
from onyx.configs.chat_configs import CONTEXT_CHUNKS_BELOW
|
||||
from onyx.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE
|
||||
from onyx.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER
|
||||
from onyx.configs.chat_configs import HYBRID_ALPHA
|
||||
from onyx.configs.chat_configs import HYBRID_ALPHA_KEYWORD
|
||||
from onyx.configs.chat_configs import NUM_POSTPROCESSED_RESULTS
|
||||
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
||||
from onyx.context.search.enums import LLMEvaluationType
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import RerankingDetails
|
||||
from onyx.context.search.models import SearchQuery
|
||||
from onyx.context.search.models import SearchRequest
|
||||
from onyx.context.search.preprocessing.access_filters import (
|
||||
build_access_filters_for_user,
|
||||
)
|
||||
from onyx.context.search.utils import (
|
||||
remove_stop_words_and_punctuation,
|
||||
)
|
||||
from onyx.db.models import User
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.natural_language_processing.search_nlp_models import QueryAnalysisModel
|
||||
from onyx.secondary_llm_flows.source_filter import extract_source_filter
|
||||
from onyx.secondary_llm_flows.time_filter import extract_time_filter
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import FunctionCall
|
||||
from onyx.utils.threadpool_concurrency import run_functions_in_parallel
|
||||
from onyx.utils.timing import log_function_time
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def query_analysis(query: str) -> tuple[bool, list[str]]:
|
||||
analysis_model = QueryAnalysisModel()
|
||||
return analysis_model.predict(query)
|
||||
|
||||
|
||||
# TODO: This is unused code.
|
||||
@log_function_time(print_only=True)
|
||||
def retrieval_preprocessing(
|
||||
search_request: SearchRequest,
|
||||
user: User | None,
|
||||
llm: LLM,
|
||||
skip_query_analysis: bool,
|
||||
db_session: Session,
|
||||
favor_recent_decay_multiplier: float = FAVOR_RECENT_DECAY_MULTIPLIER,
|
||||
base_recency_decay: float = BASE_RECENCY_DECAY,
|
||||
bypass_acl: bool = False,
|
||||
) -> SearchQuery:
|
||||
"""Logic is as follows:
|
||||
Any global disables apply first
|
||||
Then any filters or settings as part of the query are used
|
||||
Then defaults to Persona settings if not specified by the query
|
||||
"""
|
||||
query = search_request.query
|
||||
limit = search_request.limit
|
||||
offset = search_request.offset
|
||||
persona = search_request.persona
|
||||
|
||||
preset_filters = search_request.human_selected_filters or BaseFilters()
|
||||
if persona and persona.document_sets and preset_filters.document_set is None:
|
||||
preset_filters.document_set = [
|
||||
document_set.name for document_set in persona.document_sets
|
||||
]
|
||||
|
||||
time_filter = preset_filters.time_cutoff
|
||||
if time_filter is None and persona:
|
||||
time_filter = persona.search_start_date
|
||||
|
||||
source_filter = preset_filters.source_type
|
||||
|
||||
auto_detect_time_filter = True
|
||||
auto_detect_source_filter = True
|
||||
if not search_request.enable_auto_detect_filters:
|
||||
logger.debug("Retrieval details disables auto detect filters")
|
||||
auto_detect_time_filter = False
|
||||
auto_detect_source_filter = False
|
||||
elif persona and persona.llm_filter_extraction is False:
|
||||
logger.debug("Persona disables auto detect filters")
|
||||
auto_detect_time_filter = False
|
||||
auto_detect_source_filter = False
|
||||
else:
|
||||
logger.debug("Auto detect filters enabled")
|
||||
|
||||
if (
|
||||
time_filter is not None
|
||||
and persona
|
||||
and persona.recency_bias != RecencyBiasSetting.AUTO
|
||||
):
|
||||
auto_detect_time_filter = False
|
||||
logger.debug("Not extract time filter - already provided")
|
||||
if source_filter is not None:
|
||||
logger.debug("Not extract source filter - already provided")
|
||||
auto_detect_source_filter = False
|
||||
|
||||
# Based on the query figure out if we should apply any hard time filters /
|
||||
# if we should bias more recent docs even more strongly
|
||||
run_time_filters = (
|
||||
FunctionCall(extract_time_filter, (query, llm), {})
|
||||
if auto_detect_time_filter
|
||||
else None
|
||||
)
|
||||
|
||||
# Based on the query, figure out if we should apply any source filters
|
||||
run_source_filters = (
|
||||
FunctionCall(extract_source_filter, (query, llm, db_session), {})
|
||||
if auto_detect_source_filter
|
||||
else None
|
||||
)
|
||||
|
||||
# Sometimes this is pre-computed in parallel with other heavy tasks to improve
|
||||
# latency, and in that case we don't need to run the model again
|
||||
run_query_analysis = (
|
||||
None
|
||||
if (skip_query_analysis or search_request.precomputed_is_keyword is not None)
|
||||
else FunctionCall(query_analysis, (query,), {})
|
||||
)
|
||||
|
||||
functions_to_run = [
|
||||
filter_fn
|
||||
for filter_fn in [
|
||||
run_time_filters,
|
||||
run_source_filters,
|
||||
run_query_analysis,
|
||||
]
|
||||
if filter_fn
|
||||
]
|
||||
parallel_results = run_functions_in_parallel(functions_to_run)
|
||||
|
||||
predicted_time_cutoff, predicted_favor_recent = (
|
||||
parallel_results[run_time_filters.result_id]
|
||||
if run_time_filters
|
||||
else (None, None)
|
||||
)
|
||||
predicted_source_filters = (
|
||||
parallel_results[run_source_filters.result_id] if run_source_filters else None
|
||||
)
|
||||
|
||||
# The extracted keywords right now are not very reliable, not using for now
|
||||
# Can maybe use for highlighting
|
||||
is_keyword, _extracted_keywords = False, None
|
||||
if search_request.precomputed_is_keyword is not None:
|
||||
is_keyword = search_request.precomputed_is_keyword
|
||||
_extracted_keywords = search_request.precomputed_keywords
|
||||
elif run_query_analysis:
|
||||
is_keyword, _extracted_keywords = parallel_results[run_query_analysis.result_id]
|
||||
|
||||
all_query_terms = query.split()
|
||||
processed_keywords = (
|
||||
remove_stop_words_and_punctuation(all_query_terms)
|
||||
# If the user is using a different language, don't edit the query or remove english stopwords
|
||||
if not search_request.multilingual_expansion
|
||||
else all_query_terms
|
||||
)
|
||||
|
||||
user_acl_filters = (
|
||||
None if bypass_acl else build_access_filters_for_user(user, db_session)
|
||||
)
|
||||
user_file_filters = search_request.user_file_filters
|
||||
user_file_ids = (user_file_filters.user_file_ids or []) if user_file_filters else []
|
||||
if persona and persona.user_files:
|
||||
user_file_ids = list(
|
||||
set(user_file_ids) | set([file.id for file in persona.user_files])
|
||||
)
|
||||
|
||||
final_filters = IndexFilters(
|
||||
user_file_ids=user_file_ids,
|
||||
project_id=user_file_filters.project_id if user_file_filters else None,
|
||||
source_type=preset_filters.source_type or predicted_source_filters,
|
||||
document_set=preset_filters.document_set,
|
||||
time_cutoff=time_filter or predicted_time_cutoff,
|
||||
tags=preset_filters.tags, # Tags are never auto-extracted
|
||||
access_control_list=user_acl_filters,
|
||||
tenant_id=get_current_tenant_id() if MULTI_TENANT else None,
|
||||
# kg_entities=preset_filters.kg_entities,
|
||||
# kg_relationships=preset_filters.kg_relationships,
|
||||
# kg_terms=preset_filters.kg_terms,
|
||||
# kg_sources=preset_filters.kg_sources,
|
||||
# kg_chunk_id_zero_only=preset_filters.kg_chunk_id_zero_only,
|
||||
)
|
||||
|
||||
llm_evaluation_type = LLMEvaluationType.BASIC
|
||||
if search_request.evaluation_type is not LLMEvaluationType.UNSPECIFIED:
|
||||
llm_evaluation_type = search_request.evaluation_type
|
||||
|
||||
elif persona:
|
||||
llm_evaluation_type = (
|
||||
LLMEvaluationType.BASIC
|
||||
if persona.llm_relevance_filter
|
||||
else LLMEvaluationType.SKIP
|
||||
)
|
||||
|
||||
if DISABLE_LLM_DOC_RELEVANCE:
|
||||
if llm_evaluation_type:
|
||||
logger.info(
|
||||
"LLM chunk filtering would have run but has been globally disabled"
|
||||
)
|
||||
llm_evaluation_type = LLMEvaluationType.SKIP
|
||||
|
||||
rerank_settings = search_request.rerank_settings
|
||||
# If not explicitly specified by the query, use the current settings
|
||||
if rerank_settings is None:
|
||||
search_settings = get_current_search_settings(db_session)
|
||||
|
||||
# For non-streaming flows, the rerank settings are applied at the search_request level
|
||||
if not search_settings.disable_rerank_for_streaming:
|
||||
rerank_settings = RerankingDetails.from_db_model(search_settings)
|
||||
|
||||
# Decays at 1 / (1 + (multiplier * num years))
|
||||
if persona and persona.recency_bias == RecencyBiasSetting.NO_DECAY:
|
||||
recency_bias_multiplier = 0.0
|
||||
elif persona and persona.recency_bias == RecencyBiasSetting.BASE_DECAY:
|
||||
recency_bias_multiplier = base_recency_decay
|
||||
elif persona and persona.recency_bias == RecencyBiasSetting.FAVOR_RECENT:
|
||||
recency_bias_multiplier = base_recency_decay * favor_recent_decay_multiplier
|
||||
else:
|
||||
if predicted_favor_recent:
|
||||
recency_bias_multiplier = base_recency_decay * favor_recent_decay_multiplier
|
||||
else:
|
||||
recency_bias_multiplier = base_recency_decay
|
||||
|
||||
hybrid_alpha = HYBRID_ALPHA_KEYWORD if is_keyword else HYBRID_ALPHA
|
||||
if search_request.hybrid_alpha:
|
||||
hybrid_alpha = search_request.hybrid_alpha
|
||||
|
||||
# Search request overrides anything else as it's explicitly set by the request
|
||||
# If not explicitly specified, use the persona settings if they exist
|
||||
# Otherwise, use the global defaults
|
||||
chunks_above = (
|
||||
search_request.chunks_above
|
||||
if search_request.chunks_above is not None
|
||||
else (persona.chunks_above if persona else CONTEXT_CHUNKS_ABOVE)
|
||||
)
|
||||
chunks_below = (
|
||||
search_request.chunks_below
|
||||
if search_request.chunks_below is not None
|
||||
else (persona.chunks_below if persona else CONTEXT_CHUNKS_BELOW)
|
||||
)
|
||||
|
||||
return SearchQuery(
|
||||
query=query,
|
||||
original_query=search_request.original_query,
|
||||
processed_keywords=processed_keywords,
|
||||
search_type=SearchType.KEYWORD if is_keyword else SearchType.SEMANTIC,
|
||||
evaluation_type=llm_evaluation_type,
|
||||
filters=final_filters,
|
||||
hybrid_alpha=hybrid_alpha,
|
||||
recency_bias_multiplier=recency_bias_multiplier,
|
||||
num_hits=limit if limit is not None else NUM_RETURNED_HITS,
|
||||
offset=offset or 0,
|
||||
rerank_settings=rerank_settings,
|
||||
# Should match the LLM filtering to the same as the reranked, it's understood as this is the number of results
|
||||
# the user wants to do heavier processing on, so do the same for the LLM if reranking is on
|
||||
# if no reranking settings are set, then use the global default
|
||||
max_llm_filter_sections=(
|
||||
rerank_settings.num_rerank if rerank_settings else NUM_POSTPROCESSED_RESULTS
|
||||
),
|
||||
chunks_above=chunks_above,
|
||||
chunks_below=chunks_below,
|
||||
full_doc=search_request.full_doc,
|
||||
precomputed_query_embedding=search_request.precomputed_query_embedding,
|
||||
expanded_queries=search_request.expanded_queries,
|
||||
)
|
||||
@@ -1,28 +1,98 @@
|
||||
import string
|
||||
from collections.abc import Callable
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.chat_configs import HYBRID_ALPHA
|
||||
from onyx.configs.chat_configs import NUM_RETURNED_HITS
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.context.search.models import ChunkIndexRequest
|
||||
from onyx.context.search.models import ChunkMetric
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.context.search.models import MAX_METRICS_CONTENT
|
||||
from onyx.context.search.models import QueryExpansionType
|
||||
from onyx.context.search.models import RetrievalMetricsContainer
|
||||
from onyx.context.search.models import SearchQuery
|
||||
from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA
|
||||
from onyx.context.search.preprocessing.preprocessing import HYBRID_ALPHA_KEYWORD
|
||||
from onyx.context.search.utils import get_query_embedding
|
||||
from onyx.context.search.utils import get_query_embeddings
|
||||
from onyx.context.search.utils import inference_section_from_chunks
|
||||
from onyx.db.search_settings import get_multilingual_expansion
|
||||
from onyx.document_index.interfaces import DocumentIndex
|
||||
from onyx.document_index.interfaces import VespaChunkRequest
|
||||
from onyx.document_index.vespa.shared_utils.utils import (
|
||||
replace_invalid_doc_id_characters,
|
||||
)
|
||||
from onyx.federated_connectors.federated_retrieval import (
|
||||
get_federated_retrieval_functions,
|
||||
)
|
||||
from onyx.secondary_llm_flows.query_expansion import multilingual_query_expansion
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
||||
from onyx.utils.threadpool_concurrency import run_in_background
|
||||
from onyx.utils.threadpool_concurrency import TimeoutThread
|
||||
from onyx.utils.threadpool_concurrency import wait_on_background
|
||||
from onyx.utils.timing import log_function_time
|
||||
from shared_configs.model_server_models import Embedding
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _dedupe_chunks(
|
||||
chunks: list[InferenceChunk],
|
||||
) -> list[InferenceChunk]:
|
||||
used_chunks: dict[tuple[str, int], InferenceChunk] = {}
|
||||
for chunk in chunks:
|
||||
key = (chunk.document_id, chunk.chunk_id)
|
||||
if key not in used_chunks:
|
||||
used_chunks[key] = chunk
|
||||
else:
|
||||
stored_chunk_score = used_chunks[key].score or 0
|
||||
this_chunk_score = chunk.score or 0
|
||||
if stored_chunk_score < this_chunk_score:
|
||||
used_chunks[key] = chunk
|
||||
|
||||
return list(used_chunks.values())
|
||||
|
||||
|
||||
def download_nltk_data() -> None:
|
||||
import nltk # type: ignore[import-untyped]
|
||||
|
||||
resources = {
|
||||
"stopwords": "corpora/stopwords",
|
||||
# "wordnet": "corpora/wordnet", # Not in use
|
||||
"punkt_tab": "tokenizers/punkt_tab",
|
||||
}
|
||||
|
||||
for resource_name, resource_path in resources.items():
|
||||
try:
|
||||
nltk.data.find(resource_path)
|
||||
logger.info(f"{resource_name} is already downloaded.")
|
||||
except LookupError:
|
||||
try:
|
||||
logger.info(f"Downloading {resource_name}...")
|
||||
nltk.download(resource_name, quiet=True)
|
||||
logger.info(f"{resource_name} downloaded successfully.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download {resource_name}. Error: {e}")
|
||||
|
||||
|
||||
def lemmatize_text(keywords: list[str]) -> list[str]:
|
||||
raise NotImplementedError("Lemmatization should not be used currently")
|
||||
# try:
|
||||
# query = " ".join(keywords)
|
||||
# lemmatizer = WordNetLemmatizer()
|
||||
# word_tokens = word_tokenize(query)
|
||||
# lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokens]
|
||||
# combined_keywords = list(set(keywords + lemmatized_words))
|
||||
# return combined_keywords
|
||||
# except Exception:
|
||||
# return keywords
|
||||
|
||||
|
||||
def combine_retrieval_results(
|
||||
chunk_sets: list[list[InferenceChunk]],
|
||||
) -> list[InferenceChunk]:
|
||||
@@ -47,6 +117,313 @@ def combine_retrieval_results(
|
||||
return sorted_chunks
|
||||
|
||||
|
||||
# TODO: This is unused code.
|
||||
@log_function_time(print_only=True)
|
||||
def doc_index_retrieval(
|
||||
query: SearchQuery,
|
||||
document_index: DocumentIndex,
|
||||
db_session: Session,
|
||||
) -> list[InferenceChunk]:
|
||||
"""
|
||||
This function performs the search to retrieve the chunks,
|
||||
extracts chunks from the large chunks, persists the scores
|
||||
from the large chunks to the referenced chunks,
|
||||
dedupes the chunks, and cleans the chunks.
|
||||
"""
|
||||
query_embedding = query.precomputed_query_embedding or get_query_embedding(
|
||||
query.query, db_session
|
||||
)
|
||||
|
||||
keyword_embeddings_thread: TimeoutThread[list[Embedding]] | None = None
|
||||
semantic_embeddings_thread: TimeoutThread[list[Embedding]] | None = None
|
||||
top_base_chunks_standard_ranking_thread: (
|
||||
TimeoutThread[list[InferenceChunk]] | None
|
||||
) = None
|
||||
|
||||
top_semantic_chunks_thread: TimeoutThread[list[InferenceChunk]] | None = None
|
||||
|
||||
keyword_embeddings: list[Embedding] | None = None
|
||||
semantic_embeddings: list[Embedding] | None = None
|
||||
|
||||
top_semantic_chunks: list[InferenceChunk] | None = None
|
||||
|
||||
# original retrieveal method
|
||||
top_base_chunks_standard_ranking_thread = run_in_background(
|
||||
document_index.hybrid_retrieval,
|
||||
query.query,
|
||||
query_embedding,
|
||||
query.processed_keywords,
|
||||
query.filters,
|
||||
query.hybrid_alpha,
|
||||
query.recency_bias_multiplier,
|
||||
query.num_hits,
|
||||
QueryExpansionType.SEMANTIC,
|
||||
query.offset,
|
||||
)
|
||||
|
||||
if (
|
||||
query.expanded_queries
|
||||
and query.expanded_queries.keywords_expansions
|
||||
and query.expanded_queries.semantic_expansions
|
||||
):
|
||||
|
||||
keyword_embeddings_thread = run_in_background(
|
||||
get_query_embeddings,
|
||||
query.expanded_queries.keywords_expansions,
|
||||
db_session,
|
||||
)
|
||||
|
||||
if query.search_type == SearchType.SEMANTIC:
|
||||
semantic_embeddings_thread = run_in_background(
|
||||
get_query_embeddings,
|
||||
query.expanded_queries.semantic_expansions,
|
||||
db_session,
|
||||
)
|
||||
|
||||
keyword_embeddings = wait_on_background(keyword_embeddings_thread)
|
||||
if query.search_type == SearchType.SEMANTIC:
|
||||
assert semantic_embeddings_thread is not None
|
||||
semantic_embeddings = wait_on_background(semantic_embeddings_thread)
|
||||
|
||||
# Use original query embedding for keyword retrieval embedding
|
||||
keyword_embeddings = [query_embedding]
|
||||
|
||||
# Note: we generally prepped earlier for multiple expansions, but for now we only use one.
|
||||
top_keyword_chunks_thread = run_in_background(
|
||||
document_index.hybrid_retrieval,
|
||||
query.expanded_queries.keywords_expansions[0],
|
||||
keyword_embeddings[0],
|
||||
query.processed_keywords,
|
||||
query.filters,
|
||||
HYBRID_ALPHA_KEYWORD,
|
||||
query.recency_bias_multiplier,
|
||||
query.num_hits,
|
||||
QueryExpansionType.KEYWORD,
|
||||
query.offset,
|
||||
)
|
||||
|
||||
if query.search_type == SearchType.SEMANTIC:
|
||||
assert semantic_embeddings is not None
|
||||
|
||||
top_semantic_chunks_thread = run_in_background(
|
||||
document_index.hybrid_retrieval,
|
||||
query.expanded_queries.semantic_expansions[0],
|
||||
semantic_embeddings[0],
|
||||
query.processed_keywords,
|
||||
query.filters,
|
||||
HYBRID_ALPHA,
|
||||
query.recency_bias_multiplier,
|
||||
query.num_hits,
|
||||
QueryExpansionType.SEMANTIC,
|
||||
query.offset,
|
||||
)
|
||||
|
||||
top_base_chunks_standard_ranking = wait_on_background(
|
||||
top_base_chunks_standard_ranking_thread
|
||||
)
|
||||
|
||||
top_keyword_chunks = wait_on_background(top_keyword_chunks_thread)
|
||||
|
||||
if query.search_type == SearchType.SEMANTIC:
|
||||
assert top_semantic_chunks_thread is not None
|
||||
top_semantic_chunks = wait_on_background(top_semantic_chunks_thread)
|
||||
|
||||
all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks
|
||||
|
||||
# use all three retrieval methods to retrieve top chunks
|
||||
|
||||
if query.search_type == SearchType.SEMANTIC and top_semantic_chunks is not None:
|
||||
|
||||
all_top_chunks += top_semantic_chunks
|
||||
|
||||
top_chunks = _dedupe_chunks(all_top_chunks)
|
||||
|
||||
else:
|
||||
|
||||
top_base_chunks_standard_ranking = wait_on_background(
|
||||
top_base_chunks_standard_ranking_thread
|
||||
)
|
||||
|
||||
top_chunks = _dedupe_chunks(top_base_chunks_standard_ranking)
|
||||
|
||||
logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}")
|
||||
|
||||
retrieval_requests: list[VespaChunkRequest] = []
|
||||
normal_chunks: list[InferenceChunk] = []
|
||||
referenced_chunk_scores: dict[tuple[str, int], float] = {}
|
||||
for chunk in top_chunks:
|
||||
if chunk.large_chunk_reference_ids:
|
||||
retrieval_requests.append(
|
||||
VespaChunkRequest(
|
||||
document_id=replace_invalid_doc_id_characters(chunk.document_id),
|
||||
min_chunk_ind=chunk.large_chunk_reference_ids[0],
|
||||
max_chunk_ind=chunk.large_chunk_reference_ids[-1],
|
||||
)
|
||||
)
|
||||
# for each referenced chunk, persist the
|
||||
# highest score to the referenced chunk
|
||||
for chunk_id in chunk.large_chunk_reference_ids:
|
||||
key = (chunk.document_id, chunk_id)
|
||||
referenced_chunk_scores[key] = max(
|
||||
referenced_chunk_scores.get(key, 0), chunk.score or 0
|
||||
)
|
||||
else:
|
||||
normal_chunks.append(chunk)
|
||||
|
||||
# If there are no large chunks, just return the normal chunks
|
||||
if not retrieval_requests:
|
||||
return normal_chunks
|
||||
|
||||
# Retrieve and return the referenced normal chunks from the large chunks
|
||||
retrieved_inference_chunks = document_index.id_based_retrieval(
|
||||
chunk_requests=retrieval_requests,
|
||||
filters=query.filters,
|
||||
batch_retrieval=True,
|
||||
)
|
||||
|
||||
# Apply the scores from the large chunks to the chunks referenced
|
||||
# by each large chunk
|
||||
for chunk in retrieved_inference_chunks:
|
||||
if (chunk.document_id, chunk.chunk_id) in referenced_chunk_scores:
|
||||
chunk.score = referenced_chunk_scores[(chunk.document_id, chunk.chunk_id)]
|
||||
referenced_chunk_scores.pop((chunk.document_id, chunk.chunk_id))
|
||||
else:
|
||||
logger.error(
|
||||
f"Chunk {chunk.document_id} {chunk.chunk_id} not found in referenced chunk scores"
|
||||
)
|
||||
|
||||
# Log any chunks that were not found in the retrieved chunks
|
||||
for reference in referenced_chunk_scores.keys():
|
||||
logger.error(f"Chunk {reference} not found in retrieved chunks")
|
||||
|
||||
unique_chunks: dict[tuple[str, int], InferenceChunk] = {
|
||||
(chunk.document_id, chunk.chunk_id): chunk for chunk in normal_chunks
|
||||
}
|
||||
|
||||
# persist the highest score of each deduped chunk
|
||||
for chunk in retrieved_inference_chunks:
|
||||
key = (chunk.document_id, chunk.chunk_id)
|
||||
# For duplicates, keep the highest score
|
||||
if key not in unique_chunks or (chunk.score or 0) > (
|
||||
unique_chunks[key].score or 0
|
||||
):
|
||||
unique_chunks[key] = chunk
|
||||
|
||||
# Deduplicate the chunks
|
||||
deduped_chunks = list(unique_chunks.values())
|
||||
deduped_chunks.sort(key=lambda chunk: chunk.score or 0, reverse=True)
|
||||
return deduped_chunks
|
||||
|
||||
|
||||
def _simplify_text(text: str) -> str:
|
||||
return "".join(
|
||||
char for char in text if char not in string.punctuation and not char.isspace()
|
||||
).lower()
|
||||
|
||||
|
||||
# TODO delete this
|
||||
def retrieve_chunks(
|
||||
query: SearchQuery,
|
||||
user_id: UUID | None,
|
||||
document_index: DocumentIndex,
|
||||
db_session: Session,
|
||||
retrieval_metrics_callback: (
|
||||
Callable[[RetrievalMetricsContainer], None] | None
|
||||
) = None,
|
||||
) -> list[InferenceChunk]:
|
||||
"""Returns a list of the best chunks from an initial keyword/semantic/ hybrid search."""
|
||||
|
||||
multilingual_expansion = get_multilingual_expansion(db_session)
|
||||
run_queries: list[tuple[Callable, tuple]] = []
|
||||
|
||||
source_filters = (
|
||||
set(query.filters.source_type) if query.filters.source_type else None
|
||||
)
|
||||
|
||||
# Federated retrieval
|
||||
federated_retrieval_infos = get_federated_retrieval_functions(
|
||||
db_session,
|
||||
user_id,
|
||||
list(query.filters.source_type) if query.filters.source_type else None,
|
||||
query.filters.document_set,
|
||||
user_file_ids=query.filters.user_file_ids,
|
||||
)
|
||||
federated_sources = set(
|
||||
federated_retrieval_info.source.to_non_federated_source()
|
||||
for federated_retrieval_info in federated_retrieval_infos
|
||||
)
|
||||
for federated_retrieval_info in federated_retrieval_infos:
|
||||
run_queries.append((federated_retrieval_info.retrieval_function, (query,)))
|
||||
|
||||
# Normal retrieval
|
||||
normal_search_enabled = (source_filters is None) or (
|
||||
len(set(source_filters) - federated_sources) > 0
|
||||
)
|
||||
if normal_search_enabled and (
|
||||
not multilingual_expansion or "\n" in query.query or "\r" in query.query
|
||||
):
|
||||
# Don't do query expansion on complex queries, rephrasings likely would not work well
|
||||
run_queries.append((doc_index_retrieval, (query, document_index, db_session)))
|
||||
elif normal_search_enabled:
|
||||
simplified_queries = set()
|
||||
|
||||
# Currently only uses query expansion on multilingual use cases
|
||||
query_rephrases = multilingual_query_expansion(
|
||||
query.query, multilingual_expansion
|
||||
)
|
||||
# Just to be extra sure, add the original query.
|
||||
query_rephrases.append(query.query)
|
||||
for rephrase in set(query_rephrases):
|
||||
# Sometimes the model rephrases the query in the same language with minor changes
|
||||
# Avoid doing an extra search with the minor changes as this biases the results
|
||||
simplified_rephrase = _simplify_text(rephrase)
|
||||
if simplified_rephrase in simplified_queries:
|
||||
continue
|
||||
simplified_queries.add(simplified_rephrase)
|
||||
|
||||
q_copy = query.model_copy(
|
||||
update={
|
||||
"query": rephrase,
|
||||
# need to recompute for each rephrase
|
||||
# note that `SearchQuery` is a frozen model, so we can't update
|
||||
# it below
|
||||
"precomputed_query_embedding": None,
|
||||
},
|
||||
deep=True,
|
||||
)
|
||||
run_queries.append(
|
||||
(doc_index_retrieval, (q_copy, document_index, db_session))
|
||||
)
|
||||
|
||||
parallel_search_results = run_functions_tuples_in_parallel(run_queries)
|
||||
top_chunks = combine_retrieval_results(parallel_search_results)
|
||||
|
||||
if not top_chunks:
|
||||
logger.warning(
|
||||
f"Hybrid ({query.search_type.value.capitalize()}) search returned no results "
|
||||
f"with filters: {query.filters}"
|
||||
)
|
||||
return []
|
||||
|
||||
if retrieval_metrics_callback is not None:
|
||||
chunk_metrics = [
|
||||
ChunkMetric(
|
||||
document_id=chunk.document_id,
|
||||
chunk_content_start=chunk.content[:MAX_METRICS_CONTENT],
|
||||
first_link=chunk.source_links[0] if chunk.source_links else None,
|
||||
score=chunk.score if chunk.score is not None else 0,
|
||||
)
|
||||
for chunk in top_chunks
|
||||
]
|
||||
retrieval_metrics_callback(
|
||||
RetrievalMetricsContainer(
|
||||
search_type=query.search_type, metrics=chunk_metrics
|
||||
)
|
||||
)
|
||||
|
||||
return top_chunks
|
||||
|
||||
|
||||
def _embed_and_search(
|
||||
query_request: ChunkIndexRequest,
|
||||
document_index: DocumentIndex,
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
import string
|
||||
from collections.abc import Sequence
|
||||
from typing import TypeVar
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.chat.models import SectionRelevancePiece
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.context.search.models import SavedSearchDoc
|
||||
from onyx.context.search.models import SavedSearchDocWithContent
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.db.models import SearchDoc as DBSearchDoc
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
|
||||
from onyx.utils.logger import setup_logger
|
||||
@@ -37,6 +41,66 @@ TSection = TypeVar(
|
||||
)
|
||||
|
||||
|
||||
def dedupe_documents(items: list[T]) -> tuple[list[T], list[int]]:
|
||||
seen_ids = set()
|
||||
deduped_items = []
|
||||
dropped_indices = []
|
||||
for index, item in enumerate(items):
|
||||
if isinstance(item, InferenceSection):
|
||||
document_id = item.center_chunk.document_id
|
||||
else:
|
||||
document_id = item.document_id
|
||||
|
||||
if document_id not in seen_ids:
|
||||
seen_ids.add(document_id)
|
||||
deduped_items.append(item)
|
||||
else:
|
||||
dropped_indices.append(index)
|
||||
return deduped_items, dropped_indices
|
||||
|
||||
|
||||
def relevant_sections_to_indices(
|
||||
relevance_sections: list[SectionRelevancePiece] | None, items: list[TSection]
|
||||
) -> list[int]:
|
||||
if not relevance_sections:
|
||||
return []
|
||||
|
||||
relevant_set = {
|
||||
(chunk.document_id, chunk.chunk_id)
|
||||
for chunk in relevance_sections
|
||||
if chunk.relevant
|
||||
}
|
||||
|
||||
return [
|
||||
index
|
||||
for index, item in enumerate(items)
|
||||
if (
|
||||
(
|
||||
isinstance(item, InferenceSection)
|
||||
and (item.center_chunk.document_id, item.center_chunk.chunk_id)
|
||||
in relevant_set
|
||||
)
|
||||
or (
|
||||
not isinstance(item, (InferenceSection))
|
||||
and (item.document_id, item.chunk_ind) in relevant_set
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def drop_llm_indices(
|
||||
llm_indices: list[int],
|
||||
search_docs: Sequence[DBSearchDoc | SavedSearchDoc],
|
||||
dropped_indices: list[int],
|
||||
) -> list[int]:
|
||||
llm_bools = [i in llm_indices for i in range(len(search_docs))]
|
||||
if dropped_indices:
|
||||
llm_bools = [
|
||||
val for ind, val in enumerate(llm_bools) if ind not in dropped_indices
|
||||
]
|
||||
return [i for i, val in enumerate(llm_bools) if val]
|
||||
|
||||
|
||||
def inference_section_from_chunks(
|
||||
center_chunk: InferenceChunk,
|
||||
chunks: list[InferenceChunk],
|
||||
@@ -64,6 +128,26 @@ def inference_section_from_single_chunk(
|
||||
)
|
||||
|
||||
|
||||
def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]:
|
||||
from nltk.corpus import stopwords # type:ignore
|
||||
from nltk.tokenize import word_tokenize # type:ignore
|
||||
|
||||
try:
|
||||
# Re-tokenize using the NLTK tokenizer for better matching
|
||||
query = " ".join(keywords)
|
||||
stop_words = set(stopwords.words("english"))
|
||||
word_tokens = word_tokenize(query)
|
||||
text_trimmed = [
|
||||
word
|
||||
for word in word_tokens
|
||||
if (word.casefold() not in stop_words and word not in string.punctuation)
|
||||
]
|
||||
return text_trimmed or word_tokens
|
||||
except Exception as e:
|
||||
logger.warning(f"Error removing stop words and punctuation: {e}")
|
||||
return keywords
|
||||
|
||||
|
||||
def get_query_embeddings(queries: list[str], db_session: Session) -> list[Embedding]:
|
||||
search_settings = get_current_search_settings(db_session)
|
||||
|
||||
|
||||
@@ -91,6 +91,59 @@ def get_chat_sessions_by_slack_thread_id(
|
||||
return db_session.scalars(stmt).all()
|
||||
|
||||
|
||||
def get_valid_messages_from_query_sessions(
|
||||
chat_session_ids: list[UUID],
|
||||
db_session: Session,
|
||||
) -> dict[UUID, str]:
|
||||
user_message_subquery = (
|
||||
select(
|
||||
ChatMessage.chat_session_id, func.min(ChatMessage.id).label("user_msg_id")
|
||||
)
|
||||
.where(
|
||||
ChatMessage.chat_session_id.in_(chat_session_ids),
|
||||
ChatMessage.message_type == MessageType.USER,
|
||||
)
|
||||
.group_by(ChatMessage.chat_session_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
assistant_message_subquery = (
|
||||
select(
|
||||
ChatMessage.chat_session_id,
|
||||
func.min(ChatMessage.id).label("assistant_msg_id"),
|
||||
)
|
||||
.where(
|
||||
ChatMessage.chat_session_id.in_(chat_session_ids),
|
||||
ChatMessage.message_type == MessageType.ASSISTANT,
|
||||
)
|
||||
.group_by(ChatMessage.chat_session_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
query = (
|
||||
select(ChatMessage.chat_session_id, ChatMessage.message)
|
||||
.join(
|
||||
user_message_subquery,
|
||||
ChatMessage.chat_session_id == user_message_subquery.c.chat_session_id,
|
||||
)
|
||||
.join(
|
||||
assistant_message_subquery,
|
||||
ChatMessage.chat_session_id == assistant_message_subquery.c.chat_session_id,
|
||||
)
|
||||
.join(
|
||||
ChatMessage__SearchDoc,
|
||||
ChatMessage__SearchDoc.chat_message_id
|
||||
== assistant_message_subquery.c.assistant_msg_id,
|
||||
)
|
||||
.where(ChatMessage.id == user_message_subquery.c.user_msg_id)
|
||||
)
|
||||
|
||||
first_messages = db_session.execute(query).all()
|
||||
logger.info(f"Retrieved {len(first_messages)} first messages with documents")
|
||||
|
||||
return {row.chat_session_id: row.message for row in first_messages}
|
||||
|
||||
|
||||
# Retrieves chat sessions by user
|
||||
# Chat sessions do not include onyxbot flows
|
||||
def get_chat_sessions_by_user(
|
||||
@@ -457,6 +510,21 @@ def add_chats_to_session_from_slack_thread(
|
||||
)
|
||||
|
||||
|
||||
def get_search_docs_for_chat_message(
|
||||
chat_message_id: int, db_session: Session
|
||||
) -> list[DBSearchDoc]:
|
||||
stmt = (
|
||||
select(DBSearchDoc)
|
||||
.join(
|
||||
ChatMessage__SearchDoc,
|
||||
ChatMessage__SearchDoc.search_doc_id == DBSearchDoc.id,
|
||||
)
|
||||
.where(ChatMessage__SearchDoc.chat_message_id == chat_message_id)
|
||||
)
|
||||
|
||||
return list(db_session.scalars(stmt).all())
|
||||
|
||||
|
||||
def add_search_docs_to_chat_message(
|
||||
chat_message_id: int, search_doc_ids: list[int], db_session: Session
|
||||
) -> None:
|
||||
|
||||
@@ -1,451 +0,0 @@
|
||||
"""CRUD operations for Discord bot models."""
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import joinedload
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.api_key import build_displayable_api_key
|
||||
from onyx.auth.api_key import generate_api_key
|
||||
from onyx.auth.api_key import hash_api_key
|
||||
from onyx.auth.schemas import UserRole
|
||||
from onyx.configs.constants import DISCORD_SERVICE_API_KEY_NAME
|
||||
from onyx.db.api_key import insert_api_key
|
||||
from onyx.db.models import ApiKey
|
||||
from onyx.db.models import DiscordBotConfig
|
||||
from onyx.db.models import DiscordChannelConfig
|
||||
from onyx.db.models import DiscordGuildConfig
|
||||
from onyx.db.models import User
|
||||
from onyx.db.utils import DiscordChannelView
|
||||
from onyx.server.api_key.models import APIKeyArgs
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
# === DiscordBotConfig ===
|
||||
|
||||
|
||||
def get_discord_bot_config(db_session: Session) -> DiscordBotConfig | None:
|
||||
"""Get the Discord bot config for this tenant (at most one)."""
|
||||
return db_session.scalar(select(DiscordBotConfig).limit(1))
|
||||
|
||||
|
||||
def create_discord_bot_config(
|
||||
db_session: Session,
|
||||
bot_token: str,
|
||||
) -> DiscordBotConfig:
|
||||
"""Create the Discord bot config. Raises ValueError if already exists.
|
||||
|
||||
The check constraint on id='SINGLETON' ensures only one config per tenant.
|
||||
"""
|
||||
existing = get_discord_bot_config(db_session)
|
||||
if existing:
|
||||
raise ValueError("Discord bot config already exists")
|
||||
|
||||
config = DiscordBotConfig(bot_token=bot_token)
|
||||
db_session.add(config)
|
||||
try:
|
||||
db_session.flush()
|
||||
except IntegrityError:
|
||||
# Race condition: another request created the config concurrently
|
||||
db_session.rollback()
|
||||
raise ValueError("Discord bot config already exists")
|
||||
return config
|
||||
|
||||
|
||||
def delete_discord_bot_config(db_session: Session) -> bool:
|
||||
"""Delete the Discord bot config. Returns True if deleted."""
|
||||
result = db_session.execute(delete(DiscordBotConfig))
|
||||
db_session.flush()
|
||||
return result.rowcount > 0 # type: ignore[attr-defined]
|
||||
|
||||
|
||||
# === Discord Service API Key ===
|
||||
|
||||
|
||||
def get_discord_service_api_key(db_session: Session) -> ApiKey | None:
|
||||
"""Get the Discord service API key if it exists."""
|
||||
return db_session.scalar(
|
||||
select(ApiKey).where(ApiKey.name == DISCORD_SERVICE_API_KEY_NAME)
|
||||
)
|
||||
|
||||
|
||||
def get_or_create_discord_service_api_key(
|
||||
db_session: Session,
|
||||
tenant_id: str,
|
||||
) -> str:
|
||||
"""Get existing Discord service API key or create one.
|
||||
|
||||
The API key is used by the Discord bot to authenticate with the
|
||||
Onyx API pods when sending chat requests.
|
||||
|
||||
Args:
|
||||
db_session: Database session for the tenant.
|
||||
tenant_id: The tenant ID (used for logging/context).
|
||||
|
||||
Returns:
|
||||
The raw API key string (not hashed).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If API key creation fails.
|
||||
"""
|
||||
# Check for existing key
|
||||
existing = get_discord_service_api_key(db_session)
|
||||
if existing:
|
||||
# Database only stores the hash, so we must regenerate to get the raw key.
|
||||
# This is safe since the Discord bot is the only consumer of this key.
|
||||
logger.debug(
|
||||
f"Found existing Discord service API key for tenant {tenant_id} that isn't in cache, "
|
||||
"regenerating to update cache"
|
||||
)
|
||||
new_api_key = generate_api_key(tenant_id)
|
||||
existing.hashed_api_key = hash_api_key(new_api_key)
|
||||
existing.api_key_display = build_displayable_api_key(new_api_key)
|
||||
db_session.flush()
|
||||
return new_api_key
|
||||
|
||||
# Create new API key
|
||||
logger.info(f"Creating Discord service API key for tenant {tenant_id}")
|
||||
api_key_args = APIKeyArgs(
|
||||
name=DISCORD_SERVICE_API_KEY_NAME,
|
||||
role=UserRole.LIMITED, # Limited role is sufficient for chat requests
|
||||
)
|
||||
api_key_descriptor = insert_api_key(
|
||||
db_session=db_session,
|
||||
api_key_args=api_key_args,
|
||||
user_id=None, # Service account, no owner
|
||||
)
|
||||
|
||||
if not api_key_descriptor.api_key:
|
||||
raise RuntimeError(
|
||||
f"Failed to create Discord service API key for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
return api_key_descriptor.api_key
|
||||
|
||||
|
||||
def delete_discord_service_api_key(db_session: Session) -> bool:
|
||||
"""Delete the Discord service API key for a tenant.
|
||||
|
||||
Called when:
|
||||
- Bot config is deleted (self-hosted)
|
||||
- All guild configs are deleted (Cloud)
|
||||
|
||||
Args:
|
||||
db_session: Database session for the tenant.
|
||||
|
||||
Returns:
|
||||
True if the key was deleted, False if it didn't exist.
|
||||
"""
|
||||
existing_key = get_discord_service_api_key(db_session)
|
||||
if not existing_key:
|
||||
return False
|
||||
|
||||
# Also delete the associated user
|
||||
api_key_user = db_session.scalar(
|
||||
select(User).where(User.id == existing_key.user_id) # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
db_session.delete(existing_key)
|
||||
if api_key_user:
|
||||
db_session.delete(api_key_user)
|
||||
|
||||
db_session.flush()
|
||||
logger.info("Deleted Discord service API key")
|
||||
return True
|
||||
|
||||
|
||||
# === DiscordGuildConfig ===
|
||||
|
||||
|
||||
def get_guild_configs(
|
||||
db_session: Session,
|
||||
include_channels: bool = False,
|
||||
) -> list[DiscordGuildConfig]:
|
||||
"""Get all guild configs for this tenant."""
|
||||
stmt = select(DiscordGuildConfig)
|
||||
if include_channels:
|
||||
stmt = stmt.options(joinedload(DiscordGuildConfig.channels))
|
||||
return list(db_session.scalars(stmt).unique().all())
|
||||
|
||||
|
||||
def get_guild_config_by_internal_id(
|
||||
db_session: Session,
|
||||
internal_id: int,
|
||||
) -> DiscordGuildConfig | None:
|
||||
"""Get a specific guild config by its ID."""
|
||||
return db_session.scalar(
|
||||
select(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)
|
||||
)
|
||||
|
||||
|
||||
def get_guild_config_by_discord_id(
|
||||
db_session: Session,
|
||||
guild_id: int,
|
||||
) -> DiscordGuildConfig | None:
|
||||
"""Get a guild config by Discord guild ID."""
|
||||
return db_session.scalar(
|
||||
select(DiscordGuildConfig).where(DiscordGuildConfig.guild_id == guild_id)
|
||||
)
|
||||
|
||||
|
||||
def get_guild_config_by_registration_key(
|
||||
db_session: Session,
|
||||
registration_key: str,
|
||||
) -> DiscordGuildConfig | None:
|
||||
"""Get a guild config by its registration key."""
|
||||
return db_session.scalar(
|
||||
select(DiscordGuildConfig).where(
|
||||
DiscordGuildConfig.registration_key == registration_key
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def create_guild_config(
|
||||
db_session: Session,
|
||||
registration_key: str,
|
||||
) -> DiscordGuildConfig:
|
||||
"""Create a new guild config with a registration key (guild_id=NULL)."""
|
||||
config = DiscordGuildConfig(registration_key=registration_key)
|
||||
db_session.add(config)
|
||||
db_session.flush()
|
||||
return config
|
||||
|
||||
|
||||
def register_guild(
|
||||
db_session: Session,
|
||||
config: DiscordGuildConfig,
|
||||
guild_id: int,
|
||||
guild_name: str,
|
||||
) -> DiscordGuildConfig:
|
||||
"""Complete registration by setting guild_id and guild_name."""
|
||||
config.guild_id = guild_id
|
||||
config.guild_name = guild_name
|
||||
config.registered_at = datetime.now(timezone.utc)
|
||||
db_session.flush()
|
||||
return config
|
||||
|
||||
|
||||
def update_guild_config(
|
||||
db_session: Session,
|
||||
config: DiscordGuildConfig,
|
||||
enabled: bool,
|
||||
default_persona_id: int | None = None,
|
||||
) -> DiscordGuildConfig:
|
||||
"""Update guild config fields."""
|
||||
config.enabled = enabled
|
||||
config.default_persona_id = default_persona_id
|
||||
db_session.flush()
|
||||
return config
|
||||
|
||||
|
||||
def delete_guild_config(
|
||||
db_session: Session,
|
||||
internal_id: int,
|
||||
) -> bool:
|
||||
"""Delete guild config (cascades to channel configs). Returns True if deleted."""
|
||||
result = db_session.execute(
|
||||
delete(DiscordGuildConfig).where(DiscordGuildConfig.id == internal_id)
|
||||
)
|
||||
db_session.flush()
|
||||
return result.rowcount > 0 # type: ignore[attr-defined]
|
||||
|
||||
|
||||
# === DiscordChannelConfig ===
|
||||
|
||||
|
||||
def get_channel_configs(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
) -> list[DiscordChannelConfig]:
|
||||
"""Get all channel configs for a guild."""
|
||||
return list(
|
||||
db_session.scalars(
|
||||
select(DiscordChannelConfig).where(
|
||||
DiscordChannelConfig.guild_config_id == guild_config_id
|
||||
)
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def get_channel_config_by_discord_ids(
|
||||
db_session: Session,
|
||||
guild_id: int,
|
||||
channel_id: int,
|
||||
) -> DiscordChannelConfig | None:
|
||||
"""Get a specific channel config by guild_id and channel_id."""
|
||||
return db_session.scalar(
|
||||
select(DiscordChannelConfig)
|
||||
.join(DiscordGuildConfig)
|
||||
.where(
|
||||
DiscordGuildConfig.guild_id == guild_id,
|
||||
DiscordChannelConfig.channel_id == channel_id,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def get_channel_config_by_internal_ids(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
channel_config_id: int,
|
||||
) -> DiscordChannelConfig | None:
|
||||
"""Get a specific channel config by guild_config_id and channel_config_id"""
|
||||
return db_session.scalar(
|
||||
select(DiscordChannelConfig).where(
|
||||
DiscordChannelConfig.guild_config_id == guild_config_id,
|
||||
DiscordChannelConfig.id == channel_config_id,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def update_discord_channel_config(
|
||||
db_session: Session,
|
||||
config: DiscordChannelConfig,
|
||||
channel_name: str,
|
||||
thread_only_mode: bool,
|
||||
require_bot_invocation: bool,
|
||||
enabled: bool,
|
||||
persona_override_id: int | None = None,
|
||||
) -> DiscordChannelConfig:
|
||||
"""Update channel config fields."""
|
||||
config.channel_name = channel_name
|
||||
config.require_bot_invocation = require_bot_invocation
|
||||
config.persona_override_id = persona_override_id
|
||||
config.enabled = enabled
|
||||
config.thread_only_mode = thread_only_mode
|
||||
db_session.flush()
|
||||
return config
|
||||
|
||||
|
||||
def delete_discord_channel_config(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
channel_config_id: int,
|
||||
) -> bool:
|
||||
"""Delete a channel config. Returns True if deleted."""
|
||||
result = db_session.execute(
|
||||
delete(DiscordChannelConfig).where(
|
||||
DiscordChannelConfig.guild_config_id == guild_config_id,
|
||||
DiscordChannelConfig.id == channel_config_id,
|
||||
)
|
||||
)
|
||||
db_session.flush()
|
||||
return result.rowcount > 0 # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def create_channel_config(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
channel_view: DiscordChannelView,
|
||||
) -> DiscordChannelConfig:
|
||||
"""Create a new channel config with default settings (disabled by default, admin enables via UI)."""
|
||||
config = DiscordChannelConfig(
|
||||
guild_config_id=guild_config_id,
|
||||
channel_id=channel_view.channel_id,
|
||||
channel_name=channel_view.channel_name,
|
||||
channel_type=channel_view.channel_type,
|
||||
is_private=channel_view.is_private,
|
||||
)
|
||||
db_session.add(config)
|
||||
db_session.flush()
|
||||
return config
|
||||
|
||||
|
||||
def bulk_create_channel_configs(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
channels: list[DiscordChannelView],
|
||||
) -> list[DiscordChannelConfig]:
|
||||
"""Create multiple channel configs at once. Skips existing channels."""
|
||||
# Get existing channel IDs for this guild
|
||||
existing_channel_ids = set(
|
||||
db_session.scalars(
|
||||
select(DiscordChannelConfig.channel_id).where(
|
||||
DiscordChannelConfig.guild_config_id == guild_config_id
|
||||
)
|
||||
).all()
|
||||
)
|
||||
|
||||
# Create configs for new channels only
|
||||
new_configs = []
|
||||
for channel_view in channels:
|
||||
if channel_view.channel_id not in existing_channel_ids:
|
||||
config = DiscordChannelConfig(
|
||||
guild_config_id=guild_config_id,
|
||||
channel_id=channel_view.channel_id,
|
||||
channel_name=channel_view.channel_name,
|
||||
channel_type=channel_view.channel_type,
|
||||
is_private=channel_view.is_private,
|
||||
)
|
||||
db_session.add(config)
|
||||
new_configs.append(config)
|
||||
|
||||
db_session.flush()
|
||||
return new_configs
|
||||
|
||||
|
||||
def sync_channel_configs(
|
||||
db_session: Session,
|
||||
guild_config_id: int,
|
||||
current_channels: list[DiscordChannelView],
|
||||
) -> tuple[int, int, int]:
|
||||
"""Sync channel configs with current Discord channels.
|
||||
|
||||
- Creates configs for new channels (disabled by default)
|
||||
- Removes configs for deleted channels
|
||||
- Updates names and types for existing channels if changed
|
||||
|
||||
Returns: (added_count, removed_count, updated_count)
|
||||
"""
|
||||
current_channel_map = {
|
||||
channel_view.channel_id: channel_view for channel_view in current_channels
|
||||
}
|
||||
current_channel_ids = set(current_channel_map.keys())
|
||||
|
||||
# Get existing configs
|
||||
existing_configs = get_channel_configs(db_session, guild_config_id)
|
||||
existing_channel_ids = {c.channel_id for c in existing_configs}
|
||||
|
||||
# Find channels to add, remove, and potentially update
|
||||
to_add = current_channel_ids - existing_channel_ids
|
||||
to_remove = existing_channel_ids - current_channel_ids
|
||||
|
||||
# Add new channels
|
||||
added_count = 0
|
||||
for channel_id in to_add:
|
||||
channel_view = current_channel_map[channel_id]
|
||||
create_channel_config(db_session, guild_config_id, channel_view)
|
||||
added_count += 1
|
||||
|
||||
# Remove deleted channels
|
||||
removed_count = 0
|
||||
for config in existing_configs:
|
||||
if config.channel_id in to_remove:
|
||||
db_session.delete(config)
|
||||
removed_count += 1
|
||||
|
||||
# Update names, types, and privacy for existing channels if changed
|
||||
updated_count = 0
|
||||
for config in existing_configs:
|
||||
if config.channel_id in current_channel_ids:
|
||||
channel_view = current_channel_map[config.channel_id]
|
||||
changed = False
|
||||
if config.channel_name != channel_view.channel_name:
|
||||
config.channel_name = channel_view.channel_name
|
||||
changed = True
|
||||
if config.channel_type != channel_view.channel_type:
|
||||
config.channel_type = channel_view.channel_type
|
||||
changed = True
|
||||
if config.is_private != channel_view.is_private:
|
||||
config.is_private = channel_view.is_private
|
||||
changed = True
|
||||
if changed:
|
||||
updated_count += 1
|
||||
|
||||
db_session.flush()
|
||||
return added_count, removed_count, updated_count
|
||||
@@ -3,8 +3,6 @@ from uuid import UUID
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import aliased
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -20,6 +18,45 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def insert_input_prompt_if_not_exists(
|
||||
user: User | None,
|
||||
input_prompt_id: int | None,
|
||||
prompt: str,
|
||||
content: str,
|
||||
active: bool,
|
||||
is_public: bool,
|
||||
db_session: Session,
|
||||
commit: bool = True,
|
||||
) -> InputPrompt:
|
||||
if input_prompt_id is not None:
|
||||
input_prompt = (
|
||||
db_session.query(InputPrompt).filter_by(id=input_prompt_id).first()
|
||||
)
|
||||
else:
|
||||
query = db_session.query(InputPrompt).filter(InputPrompt.prompt == prompt)
|
||||
if user:
|
||||
query = query.filter(InputPrompt.user_id == user.id)
|
||||
else:
|
||||
query = query.filter(InputPrompt.user_id.is_(None))
|
||||
input_prompt = query.first()
|
||||
|
||||
if input_prompt is None:
|
||||
input_prompt = InputPrompt(
|
||||
id=input_prompt_id,
|
||||
prompt=prompt,
|
||||
content=content,
|
||||
active=active,
|
||||
is_public=is_public or user is None,
|
||||
user_id=user.id if user else None,
|
||||
)
|
||||
db_session.add(input_prompt)
|
||||
|
||||
if commit:
|
||||
db_session.commit()
|
||||
|
||||
return input_prompt
|
||||
|
||||
|
||||
def insert_input_prompt(
|
||||
prompt: str,
|
||||
content: str,
|
||||
@@ -27,41 +64,16 @@ def insert_input_prompt(
|
||||
user: User | None,
|
||||
db_session: Session,
|
||||
) -> InputPrompt:
|
||||
user_id = user.id if user else None
|
||||
|
||||
# Use atomic INSERT ... ON CONFLICT DO NOTHING with RETURNING
|
||||
# to avoid race conditions with the uniqueness check
|
||||
stmt = pg_insert(InputPrompt).values(
|
||||
input_prompt = InputPrompt(
|
||||
prompt=prompt,
|
||||
content=content,
|
||||
active=True,
|
||||
is_public=is_public,
|
||||
user_id=user_id,
|
||||
user_id=user.id if user is not None else None,
|
||||
)
|
||||
|
||||
# Use the appropriate constraint based on whether this is a user-owned or public prompt
|
||||
if user_id is not None:
|
||||
stmt = stmt.on_conflict_do_nothing(constraint="uq_inputprompt_prompt_user_id")
|
||||
else:
|
||||
# Partial unique indexes cannot be targeted by constraint name;
|
||||
# must use index_elements + index_where
|
||||
stmt = stmt.on_conflict_do_nothing(
|
||||
index_elements=[InputPrompt.prompt],
|
||||
index_where=InputPrompt.user_id.is_(None),
|
||||
)
|
||||
|
||||
stmt = stmt.returning(InputPrompt)
|
||||
|
||||
result = db_session.execute(stmt)
|
||||
input_prompt = result.scalar_one_or_none()
|
||||
|
||||
if input_prompt is None:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A prompt shortcut with the name '{prompt}' already exists",
|
||||
)
|
||||
|
||||
db_session.add(input_prompt)
|
||||
db_session.commit()
|
||||
|
||||
return input_prompt
|
||||
|
||||
|
||||
@@ -86,40 +98,23 @@ def update_input_prompt(
|
||||
input_prompt.content = content
|
||||
input_prompt.active = active
|
||||
|
||||
try:
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
db_session.rollback()
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A prompt shortcut with the name '{prompt}' already exists",
|
||||
)
|
||||
|
||||
db_session.commit()
|
||||
return input_prompt
|
||||
|
||||
|
||||
def validate_user_prompt_authorization(
|
||||
user: User | None, input_prompt: InputPrompt
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the user is authorized to modify the given input prompt.
|
||||
Returns True only if the user owns the prompt.
|
||||
Returns False for public prompts (only admins can modify those),
|
||||
unless auth is disabled (then anyone can manage public prompts).
|
||||
"""
|
||||
prompt = InputPromptSnapshot.from_model(input_prompt=input_prompt)
|
||||
|
||||
# Public prompts cannot be modified via the user API (unless auth is disabled)
|
||||
if prompt.is_public or prompt.user_id is None:
|
||||
return AUTH_TYPE == AuthType.DISABLED
|
||||
if prompt.user_id is not None:
|
||||
if user is None:
|
||||
return False
|
||||
|
||||
# User must be logged in
|
||||
if user is None:
|
||||
return False
|
||||
|
||||
# User must own the prompt
|
||||
user_details = UserInfo.from_model(user)
|
||||
return str(user_details.id) == str(prompt.user_id)
|
||||
user_details = UserInfo.from_model(user)
|
||||
if str(user_details.id) != str(prompt.user_id):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def remove_public_input_prompt(input_prompt_id: int, db_session: Session) -> None:
|
||||
|
||||
@@ -9,9 +9,6 @@ def get_memories(user: User | None, db_session: Session) -> list[str]:
|
||||
if user is None:
|
||||
return []
|
||||
|
||||
if not user.use_memories:
|
||||
return []
|
||||
|
||||
user_info = [
|
||||
f"User's name: {user.personal_name}" if user.personal_name else "",
|
||||
f"User's role: {user.personal_role}" if user.personal_role else "",
|
||||
|
||||
@@ -26,7 +26,6 @@ from sqlalchemy import ForeignKey
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import Index
|
||||
from sqlalchemy import Integer
|
||||
from sqlalchemy import BigInteger
|
||||
|
||||
from sqlalchemy import Sequence
|
||||
from sqlalchemy import String
|
||||
@@ -84,6 +83,7 @@ from onyx.utils.special_types import JSON_ro
|
||||
from onyx.file_store.models import FileDescriptor
|
||||
from onyx.llm.override_models import LLMOverride
|
||||
from onyx.llm.override_models import PromptOverride
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.kg.models import KGStage
|
||||
from onyx.server.features.mcp.models import MCPConnectionData
|
||||
from onyx.utils.encryption import decrypt_bytes_to_string
|
||||
@@ -91,8 +91,6 @@ from onyx.utils.encryption import encrypt_string_to_bytes
|
||||
from onyx.utils.headers import HeaderItemDict
|
||||
from shared_configs.enums import EmbeddingProvider
|
||||
from shared_configs.enums import RerankerProvider
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -188,7 +186,6 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
|
||||
nullable=True,
|
||||
default=None,
|
||||
)
|
||||
chat_background: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
# personalization fields are exposed via the chat user settings "Personalization" tab
|
||||
personal_name: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
personal_role: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
@@ -2335,23 +2332,6 @@ class SearchDoc(Base):
|
||||
)
|
||||
|
||||
|
||||
class SearchQuery(Base):
|
||||
# This table contains search queries for the Search UI. There are no followups and less is stored because the reply
|
||||
# functionality is simply to rerun the search query again as things may have changed and this is more common for search.
|
||||
__tablename__ = "search_query"
|
||||
id: Mapped[UUID] = mapped_column(
|
||||
PGUUID(as_uuid=True), primary_key=True, default=uuid4
|
||||
)
|
||||
user_id: Mapped[UUID] = mapped_column(PGUUID(as_uuid=True), ForeignKey("user.id"))
|
||||
query: Mapped[str] = mapped_column(String)
|
||||
query_expansions: Mapped[list[str] | None] = mapped_column(
|
||||
postgresql.ARRAY(String), nullable=True
|
||||
)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now()
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Feedback, Logging, Metrics Tables
|
||||
"""
|
||||
@@ -3040,124 +3020,6 @@ class SlackBot(Base):
|
||||
)
|
||||
|
||||
|
||||
class DiscordBotConfig(Base):
|
||||
"""Global Discord bot configuration (one per tenant).
|
||||
|
||||
Stores the bot token when not provided via DISCORD_BOT_TOKEN env var.
|
||||
Uses a fixed ID with check constraint to enforce only one row per tenant.
|
||||
"""
|
||||
|
||||
__tablename__ = "discord_bot_config"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String, primary_key=True, server_default=text("'SINGLETON'")
|
||||
)
|
||||
bot_token: Mapped[str] = mapped_column(EncryptedString(), nullable=False)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
)
|
||||
|
||||
|
||||
class DiscordGuildConfig(Base):
|
||||
"""Configuration for a Discord guild (server) connected to this tenant.
|
||||
|
||||
registration_key is a one-time key used to link a Discord server to this tenant.
|
||||
Format: discord_<tenant_id>.<random_token>
|
||||
guild_id is NULL until the Discord admin runs !register with the key.
|
||||
"""
|
||||
|
||||
__tablename__ = "discord_guild_config"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
|
||||
# Discord snowflake - NULL until registered via command in Discord
|
||||
guild_id: Mapped[int | None] = mapped_column(BigInteger, nullable=True, unique=True)
|
||||
guild_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
|
||||
# One-time registration key: discord_<tenant_id>.<random_token>
|
||||
registration_key: Mapped[str] = mapped_column(String, unique=True, nullable=False)
|
||||
|
||||
registered_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
|
||||
# Configuration
|
||||
default_persona_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id", ondelete="SET NULL"), nullable=True
|
||||
)
|
||||
enabled: Mapped[bool] = mapped_column(
|
||||
Boolean, server_default=text("true"), nullable=False
|
||||
)
|
||||
|
||||
# Relationships
|
||||
default_persona: Mapped["Persona | None"] = relationship(
|
||||
"Persona", foreign_keys=[default_persona_id]
|
||||
)
|
||||
channels: Mapped[list["DiscordChannelConfig"]] = relationship(
|
||||
back_populates="guild_config", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class DiscordChannelConfig(Base):
|
||||
"""Per-channel configuration for Discord bot behavior.
|
||||
|
||||
Used to whitelist specific channels and configure per-channel behavior.
|
||||
"""
|
||||
|
||||
__tablename__ = "discord_channel_config"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
guild_config_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("discord_guild_config.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# Discord snowflake
|
||||
channel_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||||
channel_name: Mapped[str] = mapped_column(String(), nullable=False)
|
||||
|
||||
# Channel type from Discord (text, forum)
|
||||
channel_type: Mapped[str] = mapped_column(
|
||||
String(20), server_default=text("'text'"), nullable=False
|
||||
)
|
||||
|
||||
# True if @everyone cannot view the channel
|
||||
is_private: Mapped[bool] = mapped_column(
|
||||
Boolean, server_default=text("false"), nullable=False
|
||||
)
|
||||
|
||||
# If true, bot only responds to messages in threads
|
||||
# Otherwise, will reply in channel
|
||||
thread_only_mode: Mapped[bool] = mapped_column(
|
||||
Boolean, server_default=text("false"), nullable=False
|
||||
)
|
||||
|
||||
# If true (default), bot only responds when @mentioned
|
||||
# If false, bot responds to ALL messages in this channel
|
||||
require_bot_invocation: Mapped[bool] = mapped_column(
|
||||
Boolean, server_default=text("true"), nullable=False
|
||||
)
|
||||
|
||||
# Override the guild's default persona for this channel
|
||||
persona_override_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id", ondelete="SET NULL"), nullable=True
|
||||
)
|
||||
|
||||
enabled: Mapped[bool] = mapped_column(
|
||||
Boolean, server_default=text("false"), nullable=False
|
||||
)
|
||||
|
||||
# Relationships
|
||||
guild_config: Mapped["DiscordGuildConfig"] = relationship(back_populates="channels")
|
||||
persona_override: Mapped["Persona | None"] = relationship()
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"guild_config_id", "channel_id", name="uq_discord_channel_guild_channel"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Milestone(Base):
|
||||
# This table is used to track significant events for a deployment towards finding value
|
||||
# The table is currently not used for features but it may be used in the future to inform
|
||||
@@ -3235,6 +3097,25 @@ class FileRecord(Base):
|
||||
)
|
||||
|
||||
|
||||
class AgentSearchMetrics(Base):
|
||||
__tablename__ = "agent__search_metrics"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
user_id: Mapped[UUID | None] = mapped_column(
|
||||
ForeignKey("user.id", ondelete="CASCADE"), nullable=True
|
||||
)
|
||||
persona_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("persona.id"), nullable=True
|
||||
)
|
||||
agent_type: Mapped[str] = mapped_column(String)
|
||||
start_time: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
|
||||
base_duration_s: Mapped[float] = mapped_column(Float)
|
||||
full_duration_s: Mapped[float] = mapped_column(Float)
|
||||
base_metrics: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)
|
||||
refined_metrics: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)
|
||||
all_metrics: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)
|
||||
|
||||
|
||||
"""
|
||||
************************************************************************
|
||||
Enterprise Edition Models
|
||||
@@ -3627,18 +3508,6 @@ class InputPrompt(Base):
|
||||
ForeignKey("user.id", ondelete="CASCADE"), nullable=True
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
# Unique constraint on (prompt, user_id) for user-owned prompts
|
||||
UniqueConstraint("prompt", "user_id", name="uq_inputprompt_prompt_user_id"),
|
||||
# Partial unique index for public prompts (user_id IS NULL)
|
||||
Index(
|
||||
"uq_inputprompt_prompt_public",
|
||||
"prompt",
|
||||
unique=True,
|
||||
postgresql_where=text("user_id IS NULL"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class InputPrompt__User(Base):
|
||||
__tablename__ = "inputprompt__user"
|
||||
@@ -3647,7 +3516,7 @@ class InputPrompt__User(Base):
|
||||
ForeignKey("inputprompt.id"), primary_key=True
|
||||
)
|
||||
user_id: Mapped[UUID | None] = mapped_column(
|
||||
ForeignKey("user.id"), primary_key=True
|
||||
ForeignKey("inputprompt.id"), primary_key=True
|
||||
)
|
||||
disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from onyx.db.models import SearchSettings
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.db.search_settings import get_secondary_search_settings
|
||||
from onyx.db.search_settings import update_search_settings_status
|
||||
from onyx.document_index.factory import get_all_document_indices
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.key_value_store.factory import get_kv_store
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
@@ -80,43 +80,39 @@ def _perform_index_swap(
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
# This flow is for checking and possibly creating an index so we get all
|
||||
# indices.
|
||||
document_indices = get_all_document_indices(new_search_settings, None, None)
|
||||
# remove the old index from the vector db
|
||||
document_index = get_default_document_index(new_search_settings, None)
|
||||
|
||||
WAIT_SECONDS = 5
|
||||
|
||||
for document_index in document_indices:
|
||||
success = False
|
||||
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
|
||||
try:
|
||||
logger.notice(
|
||||
f"Document index {document_index.__class__.__name__} swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
|
||||
)
|
||||
document_index.ensure_indices_exist(
|
||||
primary_embedding_dim=new_search_settings.final_embedding_dim,
|
||||
primary_embedding_precision=new_search_settings.embedding_precision,
|
||||
# just finished swap, no more secondary index
|
||||
secondary_index_embedding_dim=None,
|
||||
secondary_index_embedding_precision=None,
|
||||
)
|
||||
|
||||
logger.notice("Document index swap complete.")
|
||||
success = True
|
||||
break
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
|
||||
f"The document index services may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
|
||||
)
|
||||
time.sleep(WAIT_SECONDS)
|
||||
|
||||
if not success:
|
||||
logger.error(
|
||||
f"Document index swap for {document_index.__class__.__name__} did not succeed. "
|
||||
f"Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
|
||||
success = False
|
||||
for x in range(VESPA_NUM_ATTEMPTS_ON_STARTUP):
|
||||
try:
|
||||
logger.notice(
|
||||
f"Vespa index swap (attempt {x+1}/{VESPA_NUM_ATTEMPTS_ON_STARTUP})..."
|
||||
)
|
||||
return None
|
||||
document_index.ensure_indices_exist(
|
||||
primary_embedding_dim=new_search_settings.final_embedding_dim,
|
||||
primary_embedding_precision=new_search_settings.embedding_precision,
|
||||
# just finished swap, no more secondary index
|
||||
secondary_index_embedding_dim=None,
|
||||
secondary_index_embedding_precision=None,
|
||||
)
|
||||
|
||||
logger.notice("Vespa index swap complete.")
|
||||
success = True
|
||||
break
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Vespa index swap did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds."
|
||||
)
|
||||
time.sleep(WAIT_SECONDS)
|
||||
|
||||
if not success:
|
||||
logger.error(
|
||||
f"Vespa index swap did not succeed. Attempt limit reached. ({VESPA_NUM_ATTEMPTS_ON_STARTUP})"
|
||||
)
|
||||
return None
|
||||
|
||||
return current_search_settings
|
||||
|
||||
|
||||
@@ -139,20 +139,6 @@ def update_user_theme_preference(
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def update_user_chat_background(
|
||||
user_id: UUID,
|
||||
chat_background: str | None,
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""Update user's chat background setting."""
|
||||
db_session.execute(
|
||||
update(User)
|
||||
.where(User.id == user_id) # type: ignore
|
||||
.values(chat_background=chat_background)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def update_user_personalization(
|
||||
user_id: UUID,
|
||||
*,
|
||||
|
||||
@@ -40,10 +40,3 @@ class DocumentRow(BaseModel):
|
||||
class SortOrder(str, Enum):
|
||||
ASC = "asc"
|
||||
DESC = "desc"
|
||||
|
||||
|
||||
class DiscordChannelView(BaseModel):
|
||||
channel_id: int
|
||||
channel_name: str
|
||||
channel_type: str = "text" # text, forum
|
||||
is_private: bool = False # True if @everyone cannot view the channel
|
||||
|
||||
@@ -287,7 +287,6 @@ def run_deep_research_llm_loop(
|
||||
token_count=100,
|
||||
message_type=MessageType.USER,
|
||||
)
|
||||
|
||||
truncated_message_history = construct_message_history(
|
||||
system_prompt=system_prompt,
|
||||
custom_agent_prompt=None,
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
from onyx.configs.app_configs import BLURB_SIZE
|
||||
from onyx.configs.constants import RETURN_SEPARATOR
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import InferenceChunkUncleaned
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
from onyx.indexing.models import DocMetadataAwareIndexChunk
|
||||
|
||||
|
||||
def generate_enriched_content_for_chunk_text(chunk: DocMetadataAwareIndexChunk) -> str:
|
||||
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_keyword}"
|
||||
|
||||
|
||||
def generate_enriched_content_for_chunk_embedding(chunk: DocAwareChunk) -> str:
|
||||
return f"{chunk.title_prefix}{chunk.doc_summary}{chunk.content}{chunk.chunk_context}{chunk.metadata_suffix_semantic}"
|
||||
|
||||
|
||||
def cleanup_content_for_chunks(
|
||||
chunks: list[InferenceChunkUncleaned],
|
||||
) -> list[InferenceChunk]:
|
||||
"""
|
||||
Removes indexing-time content additions from chunks. Inverse of
|
||||
generate_enriched_content_for_chunk.
|
||||
|
||||
During indexing, chunks are augmented with additional text to improve search
|
||||
quality:
|
||||
- Title prepended to content (for better keyword/semantic matching)
|
||||
- Metadata suffix appended to content
|
||||
- Contextual RAG: doc_summary (beginning) and chunk_context (end)
|
||||
|
||||
This function strips these additions before returning chunks to users,
|
||||
restoring the original document content. Cleaning is applied in sequence:
|
||||
1. Title removal:
|
||||
- Full match: Strips exact title from beginning
|
||||
- Partial match: If content starts with title[:BLURB_SIZE], splits on
|
||||
RETURN_SEPARATOR to remove title section
|
||||
2. Metadata suffix removal:
|
||||
- Strips metadata_suffix from end, plus trailing RETURN_SEPARATOR
|
||||
3. Contextual RAG removal:
|
||||
- Strips doc_summary from beginning (if present)
|
||||
- Strips chunk_context from end (if present)
|
||||
|
||||
TODO(andrei): This entire function is not that fantastic, clean it up during
|
||||
QA before rolling out OpenSearch.
|
||||
|
||||
Args:
|
||||
chunks: Chunks as retrieved from the document index with indexing
|
||||
augmentations intact.
|
||||
|
||||
Returns:
|
||||
Clean InferenceChunk objects with augmentations removed, containing only
|
||||
the original document content that should be shown to users.
|
||||
"""
|
||||
|
||||
def _remove_title(chunk: InferenceChunkUncleaned) -> str:
|
||||
# TODO(andrei): This was ported over from
|
||||
# backend/onyx/document_index/vespa/vespa_document_index.py but I don't
|
||||
# think this logic is correct. In Vespa at least we set the title field
|
||||
# from the output of get_title_for_document_index, which is not
|
||||
# necessarily the same data that is prepended to the content; that comes
|
||||
# from title_prefix.
|
||||
# This was added in
|
||||
# https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-2a2a29d5929de75cdaea77867a397934d9f8b785ce40a861c0d704033e3663ab,
|
||||
# see postprocessing.py. At that time the content enrichment logic was
|
||||
# also added in that commit, see
|
||||
# https://github.com/onyx-dot-app/onyx/commit/e90c66c1b61c5b7da949652d703f7c906863e6e4#diff-d807718aa263a15c1d991a4ab063c360c8419eaad210b4ba70e1e9f47d2aa6d2R77
|
||||
# chunker.py.
|
||||
if not chunk.title or not chunk.content:
|
||||
return chunk.content
|
||||
|
||||
if chunk.content.startswith(chunk.title):
|
||||
return chunk.content[len(chunk.title) :].lstrip()
|
||||
|
||||
# BLURB SIZE is by token instead of char but each token is at least 1 char
|
||||
# If this prefix matches the content, it's assumed the title was prepended
|
||||
if chunk.content.startswith(chunk.title[:BLURB_SIZE]):
|
||||
return (
|
||||
chunk.content.split(RETURN_SEPARATOR, 1)[-1]
|
||||
if RETURN_SEPARATOR in chunk.content
|
||||
else chunk.content
|
||||
)
|
||||
return chunk.content
|
||||
|
||||
def _remove_metadata_suffix(chunk: InferenceChunkUncleaned) -> str:
|
||||
if not chunk.metadata_suffix:
|
||||
return chunk.content
|
||||
return chunk.content.removesuffix(chunk.metadata_suffix).rstrip(
|
||||
RETURN_SEPARATOR
|
||||
)
|
||||
|
||||
def _remove_contextual_rag(chunk: InferenceChunkUncleaned) -> str:
|
||||
# remove document summary
|
||||
if chunk.doc_summary and chunk.content.startswith(chunk.doc_summary):
|
||||
chunk.content = chunk.content[len(chunk.doc_summary) :].lstrip()
|
||||
# remove chunk context
|
||||
if chunk.chunk_context and chunk.content.endswith(chunk.chunk_context):
|
||||
chunk.content = chunk.content[
|
||||
: len(chunk.content) - len(chunk.chunk_context)
|
||||
].rstrip()
|
||||
return chunk.content
|
||||
|
||||
for chunk in chunks:
|
||||
chunk.content = _remove_title(chunk)
|
||||
chunk.content = _remove_metadata_suffix(chunk)
|
||||
chunk.content = _remove_contextual_rag(chunk)
|
||||
|
||||
return [chunk.to_inference_chunk() for chunk in chunks]
|
||||
@@ -1,8 +1,9 @@
|
||||
import httpx
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_FOR_ONYX
|
||||
from onyx.db.models import SearchSettings
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.interfaces import DocumentIndex
|
||||
from onyx.document_index.opensearch.opensearch_document_index import (
|
||||
OpenSearchOldDocumentIndex,
|
||||
@@ -16,24 +17,17 @@ def get_default_document_index(
|
||||
secondary_search_settings: SearchSettings | None,
|
||||
httpx_client: httpx.Client | None = None,
|
||||
) -> DocumentIndex:
|
||||
"""Gets the default document index from env vars.
|
||||
"""Primary index is the index that is used for querying/updating etc.
|
||||
Secondary index is for when both the currently used index and the upcoming
|
||||
index both need to be updated, updates are applied to both indices"""
|
||||
|
||||
To be used for retrieval only. Indexing should be done through both indices
|
||||
until Vespa is deprecated.
|
||||
|
||||
Pre-existing docstring for this function, although secondary indices are not
|
||||
currently supported:
|
||||
Primary index is the index that is used for querying/updating etc. Secondary
|
||||
index is for when both the currently used index and the upcoming index both
|
||||
need to be updated, updates are applied to both indices.
|
||||
"""
|
||||
secondary_index_name: str | None = None
|
||||
secondary_large_chunks_enabled: bool | None = None
|
||||
if secondary_search_settings:
|
||||
secondary_index_name = secondary_search_settings.index_name
|
||||
secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled
|
||||
|
||||
if ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX:
|
||||
if ENABLE_OPENSEARCH_FOR_ONYX:
|
||||
return OpenSearchOldDocumentIndex(
|
||||
index_name=search_settings.index_name,
|
||||
secondary_index_name=secondary_index_name,
|
||||
@@ -53,48 +47,12 @@ def get_default_document_index(
|
||||
)
|
||||
|
||||
|
||||
def get_all_document_indices(
|
||||
search_settings: SearchSettings,
|
||||
secondary_search_settings: SearchSettings | None,
|
||||
httpx_client: httpx.Client | None = None,
|
||||
) -> list[DocumentIndex]:
|
||||
"""Gets all document indices.
|
||||
|
||||
NOTE: Will only return an OpenSearch index interface if
|
||||
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is True. This is so we don't break flows
|
||||
where we know it won't be enabled.
|
||||
|
||||
Used for indexing only. Until Vespa is deprecated we will index into both
|
||||
document indices. Retrieval is done through only one index however.
|
||||
|
||||
Large chunks and secondary indices are not currently supported so we
|
||||
hardcode appropriate values.
|
||||
def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
|
||||
"""
|
||||
vespa_document_index = VespaIndex(
|
||||
index_name=search_settings.index_name,
|
||||
secondary_index_name=(
|
||||
secondary_search_settings.index_name if secondary_search_settings else None
|
||||
),
|
||||
large_chunks_enabled=search_settings.large_chunks_enabled,
|
||||
secondary_large_chunks_enabled=(
|
||||
secondary_search_settings.large_chunks_enabled
|
||||
if secondary_search_settings
|
||||
else None
|
||||
),
|
||||
multitenant=MULTI_TENANT,
|
||||
httpx_client=httpx_client,
|
||||
TODO: Use redis to cache this or something
|
||||
"""
|
||||
search_settings = get_current_search_settings(db_session)
|
||||
return get_default_document_index(
|
||||
search_settings,
|
||||
None,
|
||||
)
|
||||
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
opensearch_document_index = OpenSearchOldDocumentIndex(
|
||||
index_name=search_settings.index_name,
|
||||
secondary_index_name=None,
|
||||
large_chunks_enabled=False,
|
||||
secondary_large_chunks_enabled=None,
|
||||
multitenant=MULTI_TENANT,
|
||||
httpx_client=httpx_client,
|
||||
)
|
||||
result: list[DocumentIndex] = [vespa_document_index]
|
||||
if opensearch_document_index:
|
||||
result.append(opensearch_document_index)
|
||||
return result
|
||||
|
||||
@@ -167,9 +167,9 @@ class IndexRetrievalFilters(BaseModel):
|
||||
|
||||
class SchemaVerifiable(abc.ABC):
|
||||
"""
|
||||
Class must implement document index schema verification. For example, verify
|
||||
that all of the necessary attributes for indexing, querying, filtering, and
|
||||
fields to return from search are all valid in the schema.
|
||||
Class must implement document index schema verification. For example, verify that all of the
|
||||
necessary attributes for indexing, querying, filtering, and fields to return from search are
|
||||
all valid in the schema.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -179,18 +179,13 @@ class SchemaVerifiable(abc.ABC):
|
||||
embedding_precision: EmbeddingPrecision,
|
||||
) -> None:
|
||||
"""
|
||||
Verifies that the document index exists and is consistent with the
|
||||
expectations in the code.
|
||||
Verify that the document index exists and is consistent with the expectations in the code. For certain search
|
||||
engines, the schema needs to be created before indexing can happen. This call should create the schema if it
|
||||
does not exist.
|
||||
|
||||
For certain search engines, the schema needs to be created before
|
||||
indexing can happen. This call should create the schema if it does not
|
||||
exist.
|
||||
|
||||
Args:
|
||||
embedding_dim: Vector dimensionality for the vector similarity part
|
||||
of the search.
|
||||
embedding_precision: Precision of the values of the vectors for the
|
||||
similarity part of the search.
|
||||
Parameters:
|
||||
- embedding_dim: Vector dimensionality for the vector similarity part of the search
|
||||
- embedding_precision: Precision of the vector similarity part of the search
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -243,8 +238,8 @@ class Deletable(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
def delete(
|
||||
self,
|
||||
# TODO(andrei): Fine for now but this can probably be a batch operation
|
||||
# that takes in a list of IDs.
|
||||
# TODO(andrei): Fine for now but this can probably be a batch operation that
|
||||
# takes in a list of IDs.
|
||||
document_id: str,
|
||||
chunk_count: int | None = None,
|
||||
# TODO(andrei): Shouldn't this also have some acl filtering at minimum?
|
||||
@@ -288,7 +283,10 @@ class Updatable(abc.ABC):
|
||||
self,
|
||||
update_requests: list[MetadataUpdateRequest],
|
||||
) -> None:
|
||||
"""Updates some set of chunks.
|
||||
"""
|
||||
Updates some set of chunks. The document and fields to update are specified in the update
|
||||
requests. Each update request in the list applies its changes to a list of document ids.
|
||||
None values mean that the field does not need an update.
|
||||
|
||||
The document and fields to update are specified in the update requests.
|
||||
Each update request in the list applies its changes to a list of
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
from typing import Generic
|
||||
from typing import TypeVar
|
||||
|
||||
from opensearchpy import OpenSearch
|
||||
from opensearchpy.exceptions import TransportError
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.configs.app_configs import OPENSEARCH_ADMIN_PASSWORD
|
||||
from onyx.configs.app_configs import OPENSEARCH_ADMIN_USERNAME
|
||||
@@ -21,36 +17,10 @@ from onyx.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
# Set the logging level to WARNING to ignore INFO and DEBUG logs from
|
||||
# opensearch. By default it emits INFO-level logs for every request.
|
||||
# TODO(andrei): I don't think this is working as intended, I still see spam in
|
||||
# logs. The module name is probably wrong or opensearchpy initializes a logger
|
||||
# dynamically along with an instance of a client class. Look at the constructor
|
||||
# for OpenSearch.
|
||||
opensearch_logger = logging.getLogger("opensearchpy")
|
||||
opensearch_logger.setLevel(logging.WARNING)
|
||||
|
||||
|
||||
SchemaDocumentModel = TypeVar("SchemaDocumentModel")
|
||||
|
||||
|
||||
class SearchHit(BaseModel, Generic[SchemaDocumentModel]):
|
||||
"""Represents a hit from OpenSearch in response to a query.
|
||||
|
||||
Templated on the specific document model as defined by a schema.
|
||||
"""
|
||||
|
||||
model_config = {"frozen": True}
|
||||
|
||||
# The document chunk source retrieved from OpenSearch.
|
||||
document_chunk: SchemaDocumentModel
|
||||
# The match score for the document chunk as calculated by OpenSearch. Only
|
||||
# relevant for "fuzzy searches"; this will be None for direct queries where
|
||||
# score is not relevant like direct retrieval on ID.
|
||||
score: float | None = None
|
||||
# Maps schema property name to a list of highlighted snippets with match
|
||||
# terms wrapped in tags (e.g. "something <hi>keyword</hi> other thing").
|
||||
match_highlights: dict[str, list[str]] = {}
|
||||
|
||||
|
||||
class OpenSearchClient:
|
||||
"""Client for interacting with OpenSearch.
|
||||
|
||||
@@ -260,9 +230,9 @@ class OpenSearchClient:
|
||||
)
|
||||
result_string: str = result.get("result", "")
|
||||
match result_string:
|
||||
# Sanity check.
|
||||
case "created":
|
||||
return
|
||||
# Sanity check.
|
||||
case "updated":
|
||||
raise RuntimeError(
|
||||
f'The OpenSearch client returned result "updated" for indexing document chunk "{document_chunk_id}". '
|
||||
@@ -337,49 +307,9 @@ class OpenSearchClient:
|
||||
|
||||
return num_deleted
|
||||
|
||||
def update_document(
|
||||
self, document_chunk_id: str, properties_to_update: dict[str, Any]
|
||||
) -> None:
|
||||
"""Updates a document's properties.
|
||||
|
||||
Args:
|
||||
document_chunk_id: The OpenSearch ID of the document chunk to
|
||||
update.
|
||||
properties_to_update: The properties of the document to update. Each
|
||||
property should exist in the schema.
|
||||
|
||||
Raises:
|
||||
Exception: There was an error updating the document.
|
||||
"""
|
||||
update_body: dict[str, Any] = {"doc": properties_to_update}
|
||||
result = self._client.update(
|
||||
index=self._index_name,
|
||||
id=document_chunk_id,
|
||||
body=update_body,
|
||||
_source=False,
|
||||
)
|
||||
result_id = result.get("_id", "")
|
||||
# Sanity check.
|
||||
if result_id != document_chunk_id:
|
||||
raise RuntimeError(
|
||||
f'Upon trying to update a document, OpenSearch responded with ID "{result_id}" '
|
||||
f'instead of "{document_chunk_id}" which is the ID it was given.'
|
||||
)
|
||||
result_string: str = result.get("result", "")
|
||||
match result_string:
|
||||
# Sanity check.
|
||||
case "updated":
|
||||
return
|
||||
case "noop":
|
||||
logger.warning(
|
||||
f'OpenSearch reported a no-op when trying to update document with ID "{document_chunk_id}".'
|
||||
)
|
||||
return
|
||||
case _:
|
||||
raise RuntimeError(
|
||||
f'The OpenSearch client returned result "{result_string}" for updating document chunk "{document_chunk_id}". '
|
||||
"This is unexpected."
|
||||
)
|
||||
def update_document(self) -> None:
|
||||
# TODO(andrei): Implement this.
|
||||
raise NotImplementedError("Not implemented.")
|
||||
|
||||
def get_document(self, document_chunk_id: str) -> DocumentChunk:
|
||||
"""Gets a document.
|
||||
@@ -448,13 +378,12 @@ class OpenSearchClient:
|
||||
|
||||
def search(
|
||||
self, body: dict[str, Any], search_pipeline_id: str | None
|
||||
) -> list[SearchHit[DocumentChunk]]:
|
||||
) -> list[DocumentChunk]:
|
||||
"""Searches the index.
|
||||
|
||||
TODO(andrei): Ideally we could check that every field in the body is
|
||||
present in the index, to avoid a class of runtime bugs that could easily
|
||||
be caught during development. Or change the function signature to accept
|
||||
a predefined pydantic model of allowed fields.
|
||||
be caught during development.
|
||||
|
||||
Args:
|
||||
body: The body of the search request. See the OpenSearch
|
||||
@@ -466,7 +395,7 @@ class OpenSearchClient:
|
||||
Exception: There was an error searching the index.
|
||||
|
||||
Returns:
|
||||
List of search hits that match the search request.
|
||||
List of document chunks that match the search request.
|
||||
"""
|
||||
result: dict[str, Any]
|
||||
if search_pipeline_id:
|
||||
@@ -478,22 +407,15 @@ class OpenSearchClient:
|
||||
|
||||
hits = self._get_hits_from_search_result(result)
|
||||
|
||||
search_hits: list[SearchHit[DocumentChunk]] = []
|
||||
result_chunks: list[DocumentChunk] = []
|
||||
for hit in hits:
|
||||
document_chunk_source: dict[str, Any] | None = hit.get("_source")
|
||||
if not document_chunk_source:
|
||||
raise RuntimeError(
|
||||
f"Document chunk with ID \"{hit.get('_id', '')}\" has no data."
|
||||
)
|
||||
document_chunk_score = hit.get("_score", None)
|
||||
match_highlights: dict[str, list[str]] = hit.get("highlight", {})
|
||||
search_hit = SearchHit[DocumentChunk](
|
||||
document_chunk=DocumentChunk.model_validate(document_chunk_source),
|
||||
score=document_chunk_score,
|
||||
match_highlights=match_highlights,
|
||||
)
|
||||
search_hits.append(search_hit)
|
||||
return search_hits
|
||||
result_chunks.append(DocumentChunk.model_validate(document_chunk_source))
|
||||
return result_chunks
|
||||
|
||||
def search_for_document_ids(self, body: dict[str, Any]) -> list[str]:
|
||||
"""Searches the index and returns only document chunk IDs.
|
||||
@@ -570,9 +492,6 @@ class OpenSearchClient:
|
||||
def close(self) -> None:
|
||||
"""Closes the client.
|
||||
|
||||
TODO(andrei): Can we have some way to auto close when the client no
|
||||
longer has any references?
|
||||
|
||||
Raises:
|
||||
Exception: There was an error closing the client.
|
||||
"""
|
||||
@@ -600,55 +519,3 @@ class OpenSearchClient:
|
||||
)
|
||||
hits_second_layer: list[Any] = hits_first_layer.get("hits", [])
|
||||
return hits_second_layer
|
||||
|
||||
|
||||
def wait_for_opensearch_with_timeout(
|
||||
wait_interval_s: int = 5,
|
||||
wait_limit_s: int = 60,
|
||||
client: OpenSearchClient | None = None,
|
||||
) -> bool:
|
||||
"""Waits for OpenSearch to become ready subject to a timeout.
|
||||
|
||||
Will create a new dummy client if no client is provided. Will close this
|
||||
client at the end of the function. Will not close the client if it was
|
||||
supplied.
|
||||
|
||||
Args:
|
||||
wait_interval_s: The interval in seconds to wait between checks.
|
||||
Defaults to 5.
|
||||
wait_limit_s: The total timeout in seconds to wait for OpenSearch to
|
||||
become ready. Defaults to 60.
|
||||
client: The OpenSearch client to use for pinging. If None, a new dummy
|
||||
client will be created. Defaults to None.
|
||||
|
||||
Returns:
|
||||
True if OpenSearch is ready, False otherwise.
|
||||
"""
|
||||
made_client = False
|
||||
try:
|
||||
if client is None:
|
||||
# NOTE: index_name does not matter because we are only using this object
|
||||
# to ping.
|
||||
# TODO(andrei): Make this better.
|
||||
client = OpenSearchClient(index_name="")
|
||||
made_client = True
|
||||
time_start = time.monotonic()
|
||||
while True:
|
||||
if client.ping():
|
||||
logger.info("[OpenSearch] Readiness probe succeeded. Continuing...")
|
||||
return True
|
||||
time_elapsed = time.monotonic() - time_start
|
||||
if time_elapsed > wait_limit_s:
|
||||
logger.info(
|
||||
f"[OpenSearch] Readiness probe did not succeed within the timeout "
|
||||
f"({wait_limit_s} seconds)."
|
||||
)
|
||||
return False
|
||||
logger.info(
|
||||
f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
|
||||
)
|
||||
time.sleep(wait_interval_s)
|
||||
finally:
|
||||
if made_client:
|
||||
assert client is not None
|
||||
client.close()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -7,7 +6,6 @@ from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
get_experts_stores_representations,
|
||||
)
|
||||
from onyx.connectors.models import convert_metadata_list_of_strings_to_dict
|
||||
from onyx.context.search.enums import QueryType
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
@@ -15,10 +13,6 @@ from onyx.context.search.models import InferenceChunkUncleaned
|
||||
from onyx.context.search.models import QueryExpansionType
|
||||
from onyx.db.enums import EmbeddingPrecision
|
||||
from onyx.db.models import DocumentSource
|
||||
from onyx.document_index.chunk_content_enrichment import cleanup_content_for_chunks
|
||||
from onyx.document_index.chunk_content_enrichment import (
|
||||
generate_enriched_content_for_chunk_text,
|
||||
)
|
||||
from onyx.document_index.interfaces import DocumentIndex as OldDocumentIndex
|
||||
from onyx.document_index.interfaces import (
|
||||
DocumentInsertionRecord as OldDocumentInsertionRecord,
|
||||
@@ -35,16 +29,8 @@ from onyx.document_index.interfaces_new import IndexingMetadata
|
||||
from onyx.document_index.interfaces_new import MetadataUpdateRequest
|
||||
from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.client import OpenSearchClient
|
||||
from onyx.document_index.opensearch.client import SearchHit
|
||||
from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import CONTENT_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import DOCUMENT_SETS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import DocumentChunk
|
||||
from onyx.document_index.opensearch.schema import DocumentSchema
|
||||
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
|
||||
from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.search import DocumentQuery
|
||||
from onyx.document_index.opensearch.search import (
|
||||
MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
|
||||
@@ -68,40 +54,14 @@ from shared_configs.model_server_models import Embedding
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
|
||||
def _convert_opensearch_chunk_to_inference_chunk_uncleaned(
|
||||
chunk: DocumentChunk,
|
||||
score: float | None,
|
||||
highlights: dict[str, list[str]],
|
||||
) -> InferenceChunkUncleaned:
|
||||
"""
|
||||
Generates an inference chunk from an OpenSearch document chunk, its score,
|
||||
and its match highlights.
|
||||
|
||||
Args:
|
||||
chunk: The document chunk returned by OpenSearch.
|
||||
score: The document chunk match score as calculated by OpenSearch. Only
|
||||
relevant for searches like hybrid search. It is acceptable for this
|
||||
value to be None for results from other queries like ID-based
|
||||
retrieval as a match score makes no sense in those contexts.
|
||||
highlights: Maps schema property name to a list of highlighted snippets
|
||||
with match terms wrapped in tags (e.g. "something <hi>keyword</hi>
|
||||
other thing").
|
||||
|
||||
Returns:
|
||||
An Onyx inference chunk representation.
|
||||
"""
|
||||
return InferenceChunkUncleaned(
|
||||
chunk_id=chunk.chunk_index,
|
||||
blurb=chunk.blurb,
|
||||
# Includes extra content prepended/appended during indexing.
|
||||
content=chunk.content,
|
||||
# When we read a string and turn it into a dict the keys will be
|
||||
# strings, but in this case they need to be ints.
|
||||
source_links=(
|
||||
{int(k): v for k, v in json.loads(chunk.source_links).items()}
|
||||
if chunk.source_links
|
||||
else None
|
||||
),
|
||||
source_links=json.loads(chunk.source_links) if chunk.source_links else None,
|
||||
image_file_id=chunk.image_file_id,
|
||||
# Deprecated. Fill in some reasonable default.
|
||||
section_continuation=False,
|
||||
@@ -110,70 +70,66 @@ def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
|
||||
semantic_identifier=chunk.semantic_identifier,
|
||||
title=chunk.title,
|
||||
boost=chunk.global_boost,
|
||||
score=score,
|
||||
# TODO(andrei): Do in a followup. We should be able to get this from
|
||||
# OpenSearch.
|
||||
recency_bias=1.0,
|
||||
# TODO(andrei): This is how good the match is, we need this, key insight
|
||||
# is we can order chunks by this. Should not be hard to plumb this from
|
||||
# a search result, do that in a followup.
|
||||
score=None,
|
||||
hidden=chunk.hidden,
|
||||
metadata=(
|
||||
convert_metadata_list_of_strings_to_dict(chunk.metadata_list)
|
||||
if chunk.metadata_list
|
||||
else {}
|
||||
),
|
||||
# Extract highlighted snippets from the content field, if available. In
|
||||
# the future we may want to match on other fields too, currently we only
|
||||
# use the content field.
|
||||
match_highlights=highlights.get(CONTENT_FIELD_NAME, []),
|
||||
metadata=json.loads(chunk.metadata),
|
||||
# TODO(andrei): The vector DB needs to supply this. I vaguely know
|
||||
# OpenSearch can from the documentation I've seen till now, look at this
|
||||
# in a followup.
|
||||
match_highlights=[],
|
||||
# TODO(andrei) Consider storing a chunk content index instead of a full
|
||||
# string when working on chunk content augmentation.
|
||||
doc_summary=chunk.doc_summary,
|
||||
# TODO(andrei) Same thing as above.
|
||||
# TODO(andrei) Same thing as contx ret above, LLM gens context for each
|
||||
# chunk.
|
||||
chunk_context=chunk.chunk_context,
|
||||
updated_at=chunk.last_updated,
|
||||
primary_owners=chunk.primary_owners,
|
||||
secondary_owners=chunk.secondary_owners,
|
||||
# TODO(andrei) Same thing as chunk_context above.
|
||||
metadata_suffix=chunk.metadata_suffix,
|
||||
# TODO(andrei): This is the suffix appended to the end of the chunk
|
||||
# content to assist querying. There are better ways we can do this, for
|
||||
# ex. keeping an index of where to string split from.
|
||||
metadata_suffix=None,
|
||||
)
|
||||
|
||||
|
||||
def _convert_inference_chunk_uncleaned_to_inference_chunk(
|
||||
inference_chunk_uncleaned: InferenceChunkUncleaned,
|
||||
) -> InferenceChunk:
|
||||
# TODO(andrei): Implement this.
|
||||
return inference_chunk_uncleaned.to_inference_chunk()
|
||||
|
||||
|
||||
def _convert_onyx_chunk_to_opensearch_document(
|
||||
chunk: DocMetadataAwareIndexChunk,
|
||||
) -> DocumentChunk:
|
||||
return DocumentChunk(
|
||||
document_id=chunk.source_document.id,
|
||||
chunk_index=chunk.chunk_id,
|
||||
# Use get_title_for_document_index to match the logic used when creating
|
||||
# the title_embedding in the embedder. This method falls back to
|
||||
# semantic_identifier when title is None (but not empty string).
|
||||
title=chunk.source_document.get_title_for_document_index(),
|
||||
title=chunk.source_document.title,
|
||||
title_vector=chunk.title_embedding,
|
||||
content=generate_enriched_content_for_chunk_text(chunk),
|
||||
content=chunk.content,
|
||||
content_vector=chunk.embeddings.full_embedding,
|
||||
source_type=chunk.source_document.source.value,
|
||||
metadata_list=chunk.source_document.get_metadata_str_attributes(),
|
||||
metadata_suffix=chunk.metadata_suffix_keyword,
|
||||
metadata=json.dumps(chunk.source_document.metadata),
|
||||
last_updated=chunk.source_document.doc_updated_at,
|
||||
public=chunk.access.is_public,
|
||||
# TODO(andrei): When going over ACL look very carefully at
|
||||
# access_control_list. Notice DocumentAccess::to_acl prepends every
|
||||
# string with a type.
|
||||
access_control_list=list(chunk.access.to_acl()),
|
||||
global_boost=chunk.boost,
|
||||
semantic_identifier=chunk.source_document.semantic_identifier,
|
||||
image_file_id=chunk.image_file_id,
|
||||
# Small optimization, if this list is empty we can supply None to
|
||||
# OpenSearch and it will not store any data at all for this field, which
|
||||
# is different from supplying an empty list.
|
||||
source_links=json.dumps(chunk.source_links) if chunk.source_links else None,
|
||||
blurb=chunk.blurb,
|
||||
doc_summary=chunk.doc_summary,
|
||||
chunk_context=chunk.chunk_context,
|
||||
# Small optimization, if this list is empty we can supply None to
|
||||
# OpenSearch and it will not store any data at all for this field, which
|
||||
# is different from supplying an empty list.
|
||||
document_sets=list(chunk.document_sets) if chunk.document_sets else None,
|
||||
# Small optimization, if this list is empty we can supply None to
|
||||
# OpenSearch and it will not store any data at all for this field, which
|
||||
# is different from supplying an empty list.
|
||||
user_projects=chunk.user_project or None,
|
||||
project_ids=list(chunk.user_project) if chunk.user_project else None,
|
||||
primary_owners=get_experts_stores_representations(
|
||||
chunk.source_document.primary_owners
|
||||
),
|
||||
@@ -188,6 +144,23 @@ def _convert_onyx_chunk_to_opensearch_document(
|
||||
)
|
||||
|
||||
|
||||
def _enrich_chunk_info() -> None: # pyright: ignore[reportUnusedFunction]
|
||||
# TODO(andrei): Implement this. Until then, we do not enrich chunk content
|
||||
# with title, etc.
|
||||
raise NotImplementedError(
|
||||
"[ANDREI]: Enrich chunk info is not implemented for OpenSearch."
|
||||
)
|
||||
|
||||
|
||||
def _clean_chunk_info() -> None: # pyright: ignore[reportUnusedFunction]
|
||||
# Analogous to _cleanup_chunks in vespa_document_index.py.
|
||||
# TODO(andrei): Implement this. Until then, we do not enrich chunk content
|
||||
# with title, etc.
|
||||
raise NotImplementedError(
|
||||
"[ANDREI]: Clean chunk info is not implemented for OpenSearch."
|
||||
)
|
||||
|
||||
|
||||
class OpenSearchOldDocumentIndex(OldDocumentIndex):
|
||||
"""
|
||||
Wrapper for OpenSearch to adapt the new DocumentIndex interface with
|
||||
@@ -213,10 +186,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
|
||||
index_name=index_name,
|
||||
secondary_index_name=secondary_index_name,
|
||||
)
|
||||
if multitenant:
|
||||
raise ValueError(
|
||||
"Bug: OpenSearch is not yet ready for multitenant environments but something tried to use it."
|
||||
)
|
||||
self._real_index = OpenSearchDocumentIndex(
|
||||
index_name=index_name,
|
||||
# TODO(andrei): Sus. Do not plug this into production until all
|
||||
@@ -424,24 +393,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
def verify_and_create_index_if_necessary(
|
||||
self, embedding_dim: int, embedding_precision: EmbeddingPrecision
|
||||
) -> None:
|
||||
"""Verifies and creates the index if necessary.
|
||||
|
||||
Also puts the desired search pipeline state, creating the pipelines if
|
||||
they do not exist and updating them otherwise.
|
||||
|
||||
Args:
|
||||
embedding_dim: Vector dimensionality for the vector similarity part
|
||||
of the search.
|
||||
embedding_precision: Precision of the values of the vectors for the
|
||||
similarity part of the search.
|
||||
|
||||
Raises:
|
||||
RuntimeError: There was an error verifying or creating the index or
|
||||
search pipelines.
|
||||
"""
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary."
|
||||
)
|
||||
expected_mappings = DocumentSchema.get_document_schema(
|
||||
embedding_dim, self._tenant_state.multitenant
|
||||
)
|
||||
@@ -471,9 +422,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
chunks: list[DocMetadataAwareIndexChunk],
|
||||
indexing_metadata: IndexingMetadata,
|
||||
) -> list[DocumentInsertionRecord]:
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks for index {self._index_name}."
|
||||
)
|
||||
# Set of doc IDs.
|
||||
unique_docs_to_be_indexed: set[str] = set()
|
||||
document_indexing_results: list[DocumentInsertionRecord] = []
|
||||
@@ -504,6 +452,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
opensearch_document_chunk = _convert_onyx_chunk_to_opensearch_document(
|
||||
chunk
|
||||
)
|
||||
# TODO(andrei): Enrich chunk content here.
|
||||
# TODO(andrei): After our client supports batch indexing, use that
|
||||
# here.
|
||||
self._os_client.index_document(opensearch_document_chunk)
|
||||
@@ -518,8 +467,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
def delete(self, document_id: str, chunk_count: int | None = None) -> int:
|
||||
"""Deletes all chunks for a given document.
|
||||
|
||||
Does nothing if the specified document ID does not exist.
|
||||
|
||||
TODO(andrei): Make this method require supplying source type.
|
||||
TODO(andrei): Consider implementing this method to delete on document
|
||||
chunk IDs vs querying for matching document chunks.
|
||||
@@ -536,9 +483,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
Returns:
|
||||
The number of chunks successfully deleted.
|
||||
"""
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Deleting document {document_id} from index {self._index_name}."
|
||||
)
|
||||
query_body = DocumentQuery.delete_from_document_id_query(
|
||||
document_id=document_id,
|
||||
tenant_state=self._tenant_state,
|
||||
@@ -550,84 +494,15 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
self,
|
||||
update_requests: list[MetadataUpdateRequest],
|
||||
) -> None:
|
||||
"""Updates some set of chunks.
|
||||
|
||||
NOTE: Will raise if the specified document chunks do not exist.
|
||||
NOTE: Requires document chunk count be known; will raise if it is not.
|
||||
NOTE: Each update request must have some field to update; if not it is
|
||||
assumed there is a bug in the caller and this will raise.
|
||||
|
||||
TODO(andrei): Consider exploring a batch API for OpenSearch for this
|
||||
operation.
|
||||
|
||||
Args:
|
||||
update_requests: A list of update requests, each containing a list
|
||||
of document IDs and the fields to update. The field updates
|
||||
apply to all of the specified documents in each update request.
|
||||
|
||||
Raises:
|
||||
RuntimeError: Failed to update some or all of the chunks for the
|
||||
specified documents.
|
||||
"""
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Updating {len(update_requests)} chunks for index {self._index_name}."
|
||||
)
|
||||
for update_request in update_requests:
|
||||
properties_to_update: dict[str, Any] = dict()
|
||||
# TODO(andrei): Nit but consider if we can use DocumentChunk
|
||||
# here so we don't have to think about passing in the
|
||||
# appropriate types into this dict.
|
||||
if update_request.access is not None:
|
||||
properties_to_update[ACCESS_CONTROL_LIST_FIELD_NAME] = list(
|
||||
update_request.access.to_acl()
|
||||
)
|
||||
if update_request.document_sets is not None:
|
||||
properties_to_update[DOCUMENT_SETS_FIELD_NAME] = list(
|
||||
update_request.document_sets
|
||||
)
|
||||
if update_request.boost is not None:
|
||||
properties_to_update[GLOBAL_BOOST_FIELD_NAME] = int(
|
||||
update_request.boost
|
||||
)
|
||||
if update_request.hidden is not None:
|
||||
properties_to_update[HIDDEN_FIELD_NAME] = update_request.hidden
|
||||
if update_request.project_ids is not None:
|
||||
properties_to_update[USER_PROJECTS_FIELD_NAME] = list(
|
||||
update_request.project_ids
|
||||
)
|
||||
|
||||
for doc_id in update_request.document_ids:
|
||||
if not properties_to_update:
|
||||
raise ValueError(
|
||||
f"Bug: Tried to update document {doc_id} with no updated fields or user fields."
|
||||
)
|
||||
|
||||
doc_chunk_count = update_request.doc_id_to_chunk_cnt.get(doc_id, -1)
|
||||
if doc_chunk_count < 0:
|
||||
raise ValueError(
|
||||
f"Tried to update document {doc_id} but its chunk count is not known. Older versions of the "
|
||||
"application used to permit this but is not a supported state for a document when using OpenSearch."
|
||||
)
|
||||
if doc_chunk_count == 0:
|
||||
raise ValueError(
|
||||
f"Bug: Tried to update document {doc_id} but its chunk count was 0."
|
||||
)
|
||||
|
||||
for chunk_index in range(doc_chunk_count):
|
||||
document_chunk_id = get_opensearch_doc_chunk_id(
|
||||
document_id=doc_id, chunk_index=chunk_index
|
||||
)
|
||||
self._os_client.update_document(
|
||||
document_chunk_id=document_chunk_id,
|
||||
properties_to_update=properties_to_update,
|
||||
)
|
||||
logger.info("[ANDREI]: Updating documents...")
|
||||
# TODO(andrei): This needs to be implemented. I explicitly do not raise
|
||||
# here despite this not being implemented because indexing calls this
|
||||
# method so it is very hard to test other methods of this class if this
|
||||
# raises.
|
||||
|
||||
def id_based_retrieval(
|
||||
self,
|
||||
chunk_requests: list[DocumentSectionRequest],
|
||||
# TODO(andrei): When going over ACL look very carefully at
|
||||
# access_control_list. Notice DocumentAccess::to_acl prepends every
|
||||
# string with a type.
|
||||
filters: IndexFilters,
|
||||
# TODO(andrei): Remove this from the new interface at some point; we
|
||||
# should not be exposing this.
|
||||
@@ -637,12 +512,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
TODO(andrei): Consider implementing this method to retrieve on document
|
||||
chunk IDs vs querying for matching document chunks.
|
||||
"""
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Retrieving {len(chunk_requests)} chunks for index {self._index_name}."
|
||||
)
|
||||
results: list[InferenceChunk] = []
|
||||
for chunk_request in chunk_requests:
|
||||
search_hits: list[SearchHit[DocumentChunk]] = []
|
||||
document_chunks: list[DocumentChunk] = []
|
||||
query_body = DocumentQuery.get_from_document_id_query(
|
||||
document_id=chunk_request.document_id,
|
||||
tenant_state=self._tenant_state,
|
||||
@@ -650,20 +522,22 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
min_chunk_index=chunk_request.min_chunk_ind,
|
||||
max_chunk_index=chunk_request.max_chunk_ind,
|
||||
)
|
||||
search_hits = self._os_client.search(
|
||||
document_chunks = self._os_client.search(
|
||||
body=query_body,
|
||||
search_pipeline_id=None,
|
||||
)
|
||||
inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
|
||||
_convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
|
||||
search_hit.document_chunk, None, {}
|
||||
)
|
||||
for search_hit in search_hits
|
||||
inference_chunks_uncleaned = [
|
||||
_convert_opensearch_chunk_to_inference_chunk_uncleaned(document_chunk)
|
||||
for document_chunk in document_chunks
|
||||
]
|
||||
inference_chunks = [
|
||||
_convert_inference_chunk_uncleaned_to_inference_chunk(
|
||||
inference_chunk_uncleaned
|
||||
)
|
||||
for inference_chunk_uncleaned in inference_chunks_uncleaned
|
||||
]
|
||||
inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
|
||||
inference_chunks_uncleaned
|
||||
)
|
||||
results.extend(inference_chunks)
|
||||
# TODO(andrei): Clean chunk content here.
|
||||
return results
|
||||
|
||||
def hybrid_retrieval(
|
||||
@@ -672,16 +546,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
query_embedding: Embedding,
|
||||
final_keywords: list[str] | None,
|
||||
query_type: QueryType,
|
||||
# TODO(andrei): When going over ACL look very carefully at
|
||||
# access_control_list. Notice DocumentAccess::to_acl prepends every
|
||||
# string with a type.
|
||||
filters: IndexFilters,
|
||||
num_to_retrieve: int,
|
||||
offset: int = 0,
|
||||
) -> list[InferenceChunk]:
|
||||
logger.debug(
|
||||
f"[OpenSearchDocumentIndex] Hybrid retrieving {num_to_retrieve} chunks for index {self._index_name}."
|
||||
)
|
||||
query_body = DocumentQuery.get_hybrid_search_query(
|
||||
query_text=query,
|
||||
query_vector=query_embedding,
|
||||
@@ -689,27 +557,25 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
num_hits=num_to_retrieve,
|
||||
tenant_state=self._tenant_state,
|
||||
)
|
||||
search_hits: list[SearchHit[DocumentChunk]] = self._os_client.search(
|
||||
document_chunks = self._os_client.search(
|
||||
body=query_body,
|
||||
search_pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
|
||||
)
|
||||
inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
|
||||
_convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
|
||||
search_hit.document_chunk, search_hit.score, search_hit.match_highlights
|
||||
)
|
||||
for search_hit in search_hits
|
||||
# TODO(andrei): Clean chunk content here.
|
||||
inference_chunks_uncleaned = [
|
||||
_convert_opensearch_chunk_to_inference_chunk_uncleaned(document_chunk)
|
||||
for document_chunk in document_chunks
|
||||
]
|
||||
inference_chunks = [
|
||||
_convert_inference_chunk_uncleaned_to_inference_chunk(
|
||||
inference_chunk_uncleaned
|
||||
)
|
||||
for inference_chunk_uncleaned in inference_chunks_uncleaned
|
||||
]
|
||||
inference_chunks: list[InferenceChunk] = cleanup_content_for_chunks(
|
||||
inference_chunks_uncleaned
|
||||
)
|
||||
|
||||
return inference_chunks
|
||||
|
||||
def random_retrieval(
|
||||
self,
|
||||
# TODO(andrei): When going over ACL look very carefully at
|
||||
# access_control_list. Notice DocumentAccess::to_acl prepends every
|
||||
# string with a type.
|
||||
filters: IndexFilters,
|
||||
num_to_retrieve: int = 100,
|
||||
dirty: bool | None = None,
|
||||
|
||||
@@ -25,7 +25,7 @@ TITLE_VECTOR_FIELD_NAME = "title_vector"
|
||||
CONTENT_FIELD_NAME = "content"
|
||||
CONTENT_VECTOR_FIELD_NAME = "content_vector"
|
||||
SOURCE_TYPE_FIELD_NAME = "source_type"
|
||||
METADATA_LIST_FIELD_NAME = "metadata_list"
|
||||
METADATA_FIELD_NAME = "metadata"
|
||||
LAST_UPDATED_FIELD_NAME = "last_updated"
|
||||
PUBLIC_FIELD_NAME = "public"
|
||||
ACCESS_CONTROL_LIST_FIELD_NAME = "access_control_list"
|
||||
@@ -35,7 +35,7 @@ SEMANTIC_IDENTIFIER_FIELD_NAME = "semantic_identifier"
|
||||
IMAGE_FILE_ID_FIELD_NAME = "image_file_id"
|
||||
SOURCE_LINKS_FIELD_NAME = "source_links"
|
||||
DOCUMENT_SETS_FIELD_NAME = "document_sets"
|
||||
USER_PROJECTS_FIELD_NAME = "user_projects"
|
||||
PROJECT_IDS_FIELD_NAME = "project_ids"
|
||||
DOCUMENT_ID_FIELD_NAME = "document_id"
|
||||
CHUNK_INDEX_FIELD_NAME = "chunk_index"
|
||||
MAX_CHUNK_SIZE_FIELD_NAME = "max_chunk_size"
|
||||
@@ -43,7 +43,6 @@ TENANT_ID_FIELD_NAME = "tenant_id"
|
||||
BLURB_FIELD_NAME = "blurb"
|
||||
DOC_SUMMARY_FIELD_NAME = "doc_summary"
|
||||
CHUNK_CONTEXT_FIELD_NAME = "chunk_context"
|
||||
METADATA_SUFFIX_FIELD_NAME = "metadata_suffix"
|
||||
PRIMARY_OWNERS_FIELD_NAME = "primary_owners"
|
||||
SECONDARY_OWNERS_FIELD_NAME = "secondary_owners"
|
||||
|
||||
@@ -102,9 +101,12 @@ class DocumentChunk(BaseModel):
|
||||
content_vector: list[float]
|
||||
|
||||
source_type: str
|
||||
# A list of key-value pairs separated by INDEX_SEPARATOR. See
|
||||
# convert_metadata_dict_to_list_of_strings.
|
||||
metadata_list: list[str] | None = None
|
||||
# Contains a string representation of a dict which maps string key to either
|
||||
# string value or list of string values.
|
||||
# TODO(andrei): When we augment content with metadata this can just be an
|
||||
# index pointer, and when we support metadata list that will just be a list
|
||||
# of strings.
|
||||
metadata: str
|
||||
# If it exists, time zone should always be UTC.
|
||||
last_updated: datetime | None = None
|
||||
|
||||
@@ -121,16 +123,12 @@ class DocumentChunk(BaseModel):
|
||||
# chunk text to the link corresponding to that point.
|
||||
source_links: str | None = None
|
||||
blurb: str
|
||||
# doc_summary, chunk_context, and metadata_suffix are all stored simply to
|
||||
# reverse the augmentations to content. Ideally these would just be start
|
||||
# and stop indices into the content string. For legacy reasons they are not
|
||||
# right now.
|
||||
doc_summary: str
|
||||
chunk_context: str
|
||||
metadata_suffix: str | None = None
|
||||
|
||||
document_sets: list[str] | None = None
|
||||
user_projects: list[int] | None = None
|
||||
# User projects.
|
||||
project_ids: list[int] | None = None
|
||||
primary_owners: list[str] | None = None
|
||||
secondary_owners: list[str] | None = None
|
||||
|
||||
@@ -285,12 +283,6 @@ class DocumentSchema:
|
||||
full-text searches.
|
||||
- "store": True fields are stored and can be returned on their own,
|
||||
independent of the parent document.
|
||||
- "index": True fields can be queried on.
|
||||
- "doc_values": True fields can be sorted and aggregated efficiently.
|
||||
Not supported for "text" type fields.
|
||||
- "store": True fields are stored separately from the source document
|
||||
and can thus be returned from a query separately from _source.
|
||||
Generally this is not necessary.
|
||||
|
||||
Args:
|
||||
vector_dimension: The dimension of vector embeddings. Must be a
|
||||
@@ -317,18 +309,10 @@ class DocumentSchema:
|
||||
# TODO(andrei): Ask Yuhong do we want this?
|
||||
"keyword": {"type": "keyword", "ignore_above": 256}
|
||||
},
|
||||
# This makes highlighting text during queries more efficient
|
||||
# at the cost of disk space. See
|
||||
# https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#methods-of-obtaining-offsets
|
||||
"index_options": "offsets",
|
||||
},
|
||||
CONTENT_FIELD_NAME: {
|
||||
"type": "text",
|
||||
"store": True,
|
||||
# This makes highlighting text during queries more efficient
|
||||
# at the cost of disk space. See
|
||||
# https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#methods-of-obtaining-offsets
|
||||
"index_options": "offsets",
|
||||
},
|
||||
TITLE_VECTOR_FIELD_NAME: {
|
||||
"type": "knn_vector",
|
||||
@@ -353,7 +337,7 @@ class DocumentSchema:
|
||||
},
|
||||
},
|
||||
SOURCE_TYPE_FIELD_NAME: {"type": "keyword"},
|
||||
METADATA_LIST_FIELD_NAME: {"type": "keyword"},
|
||||
METADATA_FIELD_NAME: {"type": "keyword"},
|
||||
# TODO(andrei): Check if Vespa stores seconds, we may wanna do
|
||||
# seconds here not millis.
|
||||
LAST_UPDATED_FIELD_NAME: {
|
||||
@@ -378,13 +362,11 @@ class DocumentSchema:
|
||||
GLOBAL_BOOST_FIELD_NAME: {"type": "integer"},
|
||||
# This field is only used for displaying a useful name for the
|
||||
# doc in the UI and is not used for searching. Disabling these
|
||||
# features to increase perf. This field is therefore essentially
|
||||
# just metadata.
|
||||
# features to increase perf.
|
||||
SEMANTIC_IDENTIFIER_FIELD_NAME: {
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above; used to display an image along with the doc.
|
||||
@@ -392,7 +374,6 @@ class DocumentSchema:
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above; used to link to the source doc.
|
||||
@@ -400,7 +381,6 @@ class DocumentSchema:
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above; used to quickly summarize the doc in the UI.
|
||||
@@ -408,7 +388,6 @@ class DocumentSchema:
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above.
|
||||
@@ -418,21 +397,12 @@ class DocumentSchema:
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above.
|
||||
# TODO(andrei): If we want to search on this this needs to be
|
||||
# changed.
|
||||
CHUNK_CONTEXT_FIELD_NAME: {
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
# Generally False by default; just making sure.
|
||||
"store": False,
|
||||
},
|
||||
# Same as above.
|
||||
METADATA_SUFFIX_FIELD_NAME: {
|
||||
"type": "keyword",
|
||||
"index": False,
|
||||
"doc_values": False,
|
||||
@@ -440,7 +410,7 @@ class DocumentSchema:
|
||||
},
|
||||
# Product-specific fields.
|
||||
DOCUMENT_SETS_FIELD_NAME: {"type": "keyword"},
|
||||
USER_PROJECTS_FIELD_NAME: {"type": "integer"},
|
||||
PROJECT_IDS_FIELD_NAME: {"type": "integer"},
|
||||
PRIMARY_OWNERS_FIELD_NAME: {"type": "keyword"},
|
||||
SECONDARY_OWNERS_FIELD_NAME: {"type": "keyword"},
|
||||
# OpenSearch metadata fields.
|
||||
|
||||
@@ -244,9 +244,6 @@ class DocumentQuery:
|
||||
query_text, query_vector, num_candidates
|
||||
)
|
||||
hybrid_search_filters = DocumentQuery._get_hybrid_search_filters(tenant_state)
|
||||
match_highlights_configuration = (
|
||||
DocumentQuery._get_match_highlights_configuration()
|
||||
)
|
||||
|
||||
hybrid_search_query: dict[str, Any] = {
|
||||
"bool": {
|
||||
@@ -257,8 +254,6 @@ class DocumentQuery:
|
||||
}
|
||||
}
|
||||
],
|
||||
# TODO(andrei): When revisiting our hybrid query logic see if
|
||||
# this needs to be nested one level down.
|
||||
"filter": hybrid_search_filters,
|
||||
}
|
||||
}
|
||||
@@ -266,7 +261,6 @@ class DocumentQuery:
|
||||
final_hybrid_search_body: dict[str, Any] = {
|
||||
"query": hybrid_search_query,
|
||||
"size": num_hits,
|
||||
"highlight": match_highlights_configuration,
|
||||
}
|
||||
return final_hybrid_search_body
|
||||
|
||||
@@ -352,30 +346,3 @@ class DocumentQuery:
|
||||
{"term": {TENANT_ID_FIELD_NAME: {"value": tenant_state.tenant_id}}}
|
||||
)
|
||||
return hybrid_search_filters
|
||||
|
||||
@staticmethod
|
||||
def _get_match_highlights_configuration() -> dict[str, Any]:
|
||||
"""
|
||||
Gets configuration for returning match highlights for a hit.
|
||||
"""
|
||||
match_highlights_configuration: dict[str, Any] = {
|
||||
"fields": {
|
||||
CONTENT_FIELD_NAME: {
|
||||
# See https://docs.opensearch.org/latest/search-plugins/searching-data/highlight/#highlighter-types
|
||||
"type": "unified",
|
||||
# The length in chars of a match snippet. Somewhat
|
||||
# arbitrarily-chosen. The Vespa codepath limited total
|
||||
# highlights length to 400 chars. fragment_size *
|
||||
# number_of_fragments = 400 should be good enough.
|
||||
"fragment_size": 100,
|
||||
# The number of snippets to return per field per document
|
||||
# hit.
|
||||
"number_of_fragments": 4,
|
||||
# These tags wrap matched keywords and they match what Vespa
|
||||
# used to return. Use them to minimize changes to our code.
|
||||
"pre_tags": ["<hi>"],
|
||||
"post_tags": ["</hi>"],
|
||||
}
|
||||
}
|
||||
}
|
||||
return match_highlights_configuration
|
||||
|
||||
@@ -41,6 +41,7 @@ from onyx.document_index.vespa_constants import MAX_OR_CONDITIONS
|
||||
from onyx.document_index.vespa_constants import METADATA
|
||||
from onyx.document_index.vespa_constants import METADATA_SUFFIX
|
||||
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import RECENCY_BIAS
|
||||
from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
|
||||
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
|
||||
@@ -141,6 +142,7 @@ def _vespa_hit_to_inference_chunk(
|
||||
title=fields.get(TITLE),
|
||||
semantic_identifier=fields[SEMANTIC_IDENTIFIER],
|
||||
boost=fields.get(BOOST, 1),
|
||||
recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0),
|
||||
score=None if null_score else hit.get("relevance", 0),
|
||||
hidden=fields.get(HIDDEN, False),
|
||||
primary_owners=fields.get(PRIMARY_OWNERS),
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user