Compare commits

..

3 Commits

Author SHA1 Message Date
Yuhong Sun
8a38fdf8a5 ok 2025-11-29 22:46:22 -08:00
Vega
9155d4aa21 [FIX] Fix citation document mismatch and standardize citation format (#6484) 2025-11-29 22:43:03 -08:00
Yuhong Sun
b20591611a Single Commit Rebased 2025-11-29 21:22:58 -08:00
1421 changed files with 61385 additions and 87277 deletions

View File

@@ -8,9 +8,7 @@ runs:
using: "composite"
steps:
- name: Setup uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true

View File

@@ -5,7 +5,7 @@ updates:
schedule:
interval: "weekly"
cooldown:
default-days: 7
default-days: 4
open-pull-requests-limit: 3
assignees:
- "jmelahman"
@@ -16,7 +16,7 @@ updates:
schedule:
interval: "weekly"
cooldown:
default-days: 7
default-days: 4
open-pull-requests-limit: 3
assignees:
- "jmelahman"

View File

@@ -1,10 +1,10 @@
## Description
<!--- Provide a brief description of the changes in this PR --->
[Provide a brief description of the changes in this PR]
## How Has This Been Tested?
<!--- Describe the tests you ran to verify your changes --->
[Describe the tests you ran to verify your changes]
## Additional Options

View File

@@ -0,0 +1,33 @@
name: Check Lazy Imports
concurrency:
group: Check-Lazy-Imports-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- 'release/**'
permissions:
contents: read
jobs:
check-lazy-imports:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
- name: Check lazy imports
run: python3 backend/scripts/check_lazy_imports.py

View File

@@ -6,9 +6,8 @@ on:
- "*"
workflow_dispatch:
# Set restrictive default permissions for all jobs. Jobs that need more permissions
# should explicitly declare them.
permissions: {}
permissions:
contents: read
env:
IS_DRY_RUN: ${{ github.event_name == 'workflow_dispatch' }}
@@ -21,7 +20,6 @@ jobs:
runs-on: ubuntu-slim
timeout-minutes: 90
outputs:
build-desktop: ${{ steps.check.outputs.build-desktop }}
build-web: ${{ steps.check.outputs.build-web }}
build-web-cloud: ${{ steps.check.outputs.build-web-cloud }}
build-backend: ${{ steps.check.outputs.build-backend }}
@@ -32,7 +30,6 @@ jobs:
is-stable-standalone: ${{ steps.check.outputs.is-stable-standalone }}
is-beta-standalone: ${{ steps.check.outputs.is-beta-standalone }}
sanitized-tag: ${{ steps.check.outputs.sanitized-tag }}
short-sha: ${{ steps.check.outputs.short-sha }}
steps:
- name: Check which components to build and version info
id: check
@@ -41,7 +38,6 @@ jobs:
# Sanitize tag name by replacing slashes with hyphens (for Docker tag compatibility)
SANITIZED_TAG=$(echo "$TAG" | tr '/' '-')
IS_CLOUD=false
BUILD_DESKTOP=false
BUILD_WEB=false
BUILD_WEB_CLOUD=false
BUILD_BACKEND=true
@@ -51,6 +47,13 @@ jobs:
IS_STABLE_STANDALONE=false
IS_BETA_STANDALONE=false
if [[ "$TAG" == *cloud* ]]; then
IS_CLOUD=true
BUILD_WEB_CLOUD=true
else
BUILD_WEB=true
fi
# Version checks (for web - any stable version)
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
IS_STABLE=true
@@ -59,17 +62,6 @@ jobs:
IS_BETA=true
fi
if [[ "$TAG" == *cloud* ]]; then
IS_CLOUD=true
BUILD_WEB_CLOUD=true
else
BUILD_WEB=true
# Skip desktop builds on beta tags
if [[ "$IS_BETA" != "true" ]]; then
BUILD_DESKTOP=true
fi
fi
# Version checks (for backend/model-server - stable version excluding cloud tags)
if [[ "$TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "$TAG" != *cloud* ]]; then
IS_STABLE_STANDALONE=true
@@ -78,9 +70,7 @@ jobs:
IS_BETA_STANDALONE=true
fi
SHORT_SHA="${GITHUB_SHA::7}"
{
echo "build-desktop=$BUILD_DESKTOP"
echo "build-web=$BUILD_WEB"
echo "build-web-cloud=$BUILD_WEB_CLOUD"
echo "build-backend=$BUILD_BACKEND"
@@ -91,181 +81,8 @@ jobs:
echo "is-stable-standalone=$IS_STABLE_STANDALONE"
echo "is-beta-standalone=$IS_BETA_STANDALONE"
echo "sanitized-tag=$SANITIZED_TAG"
echo "short-sha=$SHORT_SHA"
} >> "$GITHUB_OUTPUT"
check-version-tag:
runs-on: ubuntu-slim
timeout-minutes: 10
if: ${{ !startsWith(github.ref_name, 'nightly-latest') && github.event_name != 'workflow_dispatch' }}
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Setup uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
version: "0.9.9"
# NOTE: This isn't caching much and zizmor suggests this could be poisoned, so disable.
enable-cache: false
- name: Validate tag is versioned correctly
run: |
uv run --no-sync --with release-tag tag --check
notify-slack-on-tag-check-failure:
needs:
- check-version-tag
if: always() && needs.check-version-tag.result == 'failure' && github.event_name != 'workflow_dispatch'
runs-on: ubuntu-slim
timeout-minutes: 10
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.MONITOR_DEPLOYMENTS_WEBHOOK }}
failed-jobs: "• check-version-tag"
title: "🚨 Version Tag Check Failed"
ref-name: ${{ github.ref_name }}
build-desktop:
needs: determine-builds
if: needs.determine-builds.outputs.build-desktop == 'true'
permissions:
contents: write
actions: read
strategy:
fail-fast: false
matrix:
include:
- platform: "macos-latest" # Build a universal image for macOS.
args: "--target universal-apple-darwin"
- platform: "ubuntu-24.04"
args: "--bundles deb,rpm"
- platform: "ubuntu-24.04-arm" # Only available in public repos.
args: "--bundles deb,rpm"
- platform: "windows-latest"
args: ""
runs-on: ${{ matrix.platform }}
timeout-minutes: 90
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
with:
# NOTE: persist-credentials is needed for tauri-action to create GitHub releases.
persist-credentials: true # zizmor: ignore[artipacked]
- name: install dependencies (ubuntu only)
if: startsWith(matrix.platform, 'ubuntu-')
run: |
sudo apt-get update
sudo apt-get install -y \
build-essential \
libglib2.0-dev \
libgirepository1.0-dev \
libgtk-3-dev \
libjavascriptcoregtk-4.1-dev \
libwebkit2gtk-4.1-dev \
libayatana-appindicator3-dev \
gobject-introspection \
pkg-config \
curl \
xdg-utils
- name: setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6.1.0
with:
node-version: 24
package-manager-cache: false
- name: install Rust stable
uses: dtolnay/rust-toolchain@6d9817901c499d6b02debbb57edb38d33daa680b # zizmor: ignore[impostor-commit]
with:
# Those targets are only used on macos runners so it's in an `if` to slightly speed up windows and linux builds.
targets: ${{ matrix.platform == 'macos-latest' && 'aarch64-apple-darwin,x86_64-apple-darwin' || '' }}
- name: install frontend dependencies
working-directory: ./desktop
run: npm install
- name: Inject version (Unix)
if: runner.os != 'Windows'
working-directory: ./desktop
env:
SHORT_SHA: ${{ needs.determine-builds.outputs.short-sha }}
EVENT_NAME: ${{ github.event_name }}
run: |
if [ "${EVENT_NAME}" == "workflow_dispatch" ]; then
VERSION="0.0.0-dev+${SHORT_SHA}"
else
VERSION="${GITHUB_REF_NAME#v}"
fi
echo "Injecting version: $VERSION"
# Update Cargo.toml
sed "s/^version = .*/version = \"$VERSION\"/" src-tauri/Cargo.toml > src-tauri/Cargo.toml.tmp
mv src-tauri/Cargo.toml.tmp src-tauri/Cargo.toml
# Update tauri.conf.json
jq --arg v "$VERSION" '.version = $v' src-tauri/tauri.conf.json > src-tauri/tauri.conf.json.tmp
mv src-tauri/tauri.conf.json.tmp src-tauri/tauri.conf.json
# Update package.json
jq --arg v "$VERSION" '.version = $v' package.json > package.json.tmp
mv package.json.tmp package.json
echo "Versions set to: $VERSION"
- name: Inject version (Windows)
if: runner.os == 'Windows'
working-directory: ./desktop
shell: pwsh
run: |
# Windows MSI requires numeric-only build metadata, so we skip the SHA suffix
if ("${{ github.event_name }}" -eq "workflow_dispatch") {
$VERSION = "0.0.0"
} else {
# Strip 'v' prefix and any pre-release suffix (e.g., -beta.13) for MSI compatibility
$VERSION = "$env:GITHUB_REF_NAME" -replace '^v', '' -replace '-.*$', ''
}
Write-Host "Injecting version: $VERSION"
# Update Cargo.toml
$cargo = Get-Content src-tauri/Cargo.toml -Raw
$cargo = $cargo -replace '(?m)^version = .*', "version = `"$VERSION`""
Set-Content src-tauri/Cargo.toml $cargo -NoNewline
# Update tauri.conf.json
$json = Get-Content src-tauri/tauri.conf.json | ConvertFrom-Json
$json.version = $VERSION
$json | ConvertTo-Json -Depth 100 | Set-Content src-tauri/tauri.conf.json
# Update package.json
$pkg = Get-Content package.json | ConvertFrom-Json
$pkg.version = $VERSION
$pkg | ConvertTo-Json -Depth 100 | Set-Content package.json
Write-Host "Versions set to: $VERSION"
- uses: tauri-apps/tauri-action@19b93bb55601e3e373a93cfb6eb4242e45f5af20 # ratchet:tauri-apps/tauri-action@action-v0.6.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tagName: ${{ github.event_name != 'workflow_dispatch' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseName: ${{ github.event_name != 'workflow_dispatch' && 'v__VERSION__' || format('v0.0.0-dev+{0}', needs.determine-builds.outputs.short-sha) }}
releaseBody: "See the assets to download this version and install."
releaseDraft: true
prerelease: false
args: ${{ matrix.args }}
build-web-amd64:
needs: determine-builds
if: needs.determine-builds.outputs.build-web == 'true'
@@ -283,13 +100,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -341,13 +158,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -409,7 +226,7 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -449,13 +266,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -515,13 +332,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -591,7 +408,7 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -628,13 +445,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -685,13 +502,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -752,7 +569,7 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -793,13 +610,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -857,13 +674,13 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -930,7 +747,7 @@ jobs:
- name: Docker meta
id: meta
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # ratchet:docker/metadata-action@v5
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # ratchet:docker/metadata-action@v5
with:
images: ${{ github.event_name == 'workflow_dispatch' && env.RUNS_ON_ECR_CACHE || env.REGISTRY_IMAGE }}
flavor: |
@@ -1050,7 +867,7 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1122,7 +939,6 @@ jobs:
notify-slack-on-failure:
needs:
- build-desktop
- build-web-amd64
- build-web-arm64
- merge-web
@@ -1135,13 +951,13 @@ jobs:
- build-model-server-amd64
- build-model-server-arm64
- merge-model-server
if: always() && (needs.build-desktop.result == 'failure' || needs.build-web-amd64.result == 'failure' || needs.build-web-arm64.result == 'failure' || needs.merge-web.result == 'failure' || needs.build-web-cloud-amd64.result == 'failure' || needs.build-web-cloud-arm64.result == 'failure' || needs.merge-web-cloud.result == 'failure' || needs.build-backend-amd64.result == 'failure' || needs.build-backend-arm64.result == 'failure' || needs.merge-backend.result == 'failure' || needs.build-model-server-amd64.result == 'failure' || needs.build-model-server-arm64.result == 'failure' || needs.merge-model-server.result == 'failure') && github.event_name != 'workflow_dispatch'
if: always() && (needs.build-web-amd64.result == 'failure' || needs.build-web-arm64.result == 'failure' || needs.merge-web.result == 'failure' || needs.build-web-cloud-amd64.result == 'failure' || needs.build-web-cloud-arm64.result == 'failure' || needs.merge-web-cloud.result == 'failure' || needs.build-backend-amd64.result == 'failure' || needs.build-backend-arm64.result == 'failure' || needs.merge-backend.result == 'failure' || needs.build-model-server-amd64.result == 'failure' || needs.build-model-server-arm64.result == 'failure' || needs.merge-model-server.result == 'failure') && github.event_name != 'workflow_dispatch'
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim
timeout-minutes: 90
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -1150,9 +966,6 @@ jobs:
shell: bash
run: |
FAILED_JOBS=""
if [ "${NEEDS_BUILD_DESKTOP_RESULT}" == "failure" ]; then
FAILED_JOBS="${FAILED_JOBS}• build-desktop\\n"
fi
if [ "${NEEDS_BUILD_WEB_AMD64_RESULT}" == "failure" ]; then
FAILED_JOBS="${FAILED_JOBS}• build-web-amd64\\n"
fi
@@ -1193,7 +1006,6 @@ jobs:
FAILED_JOBS=$(printf '%s' "$FAILED_JOBS" | sed 's/\\n$//')
echo "jobs=$FAILED_JOBS" >> "$GITHUB_OUTPUT"
env:
NEEDS_BUILD_DESKTOP_RESULT: ${{ needs.build-desktop.result }}
NEEDS_BUILD_WEB_AMD64_RESULT: ${{ needs.build-web-amd64.result }}
NEEDS_BUILD_WEB_ARM64_RESULT: ${{ needs.build-web-arm64.result }}
NEEDS_MERGE_WEB_RESULT: ${{ needs.merge-web.result }}

View File

@@ -15,7 +15,7 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -32,7 +32,6 @@ jobs:
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Build chart dependencies

View File

@@ -1,31 +0,0 @@
name: Merge Group-Specific
on:
merge_group:
permissions:
contents: read
jobs:
# This job immediately succeeds to satisfy branch protection rules on merge_group events.
# There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
# integration tests. That job runs on both pull_request and merge_group events, and this job
# exists solely to provide a fast-passing check with the same name for branch protection.
# The actual tests remain enforced on presubmit (pull_request events).
required:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Success
run: echo "Success"
# This job immediately succeeds to satisfy branch protection rules on merge_group events.
# There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
# the actual playwright tests. That job runs on both pull_request and merge_group events, and
# this job exists solely to provide a fast-passing check with the same name for branch protection.
# The actual tests remain enforced on presubmit (pull_request events).
playwright-required:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Success
run: echo "Success"

View File

@@ -28,12 +28,12 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: '3.11'
cache: 'pip'

View File

@@ -1,62 +0,0 @@
name: Database Tests
concurrency:
group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
database-tests:
runs-on:
- runs-on
- runner=2cpu-linux-arm64
- "run-id=${{ github.run_id }}-database-tests"
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup Python and Install Dependencies
uses: ./.github/actions/setup-python-and-install-dependencies
with:
requirements: |
backend/requirements/default.txt
backend/requirements/dev.txt
- name: Generate OpenAPI schema and Python client
shell: bash
run: |
ods openapi all
# needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Start Docker containers
working-directory: ./deployment/docker_compose
run: |
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
relational_db
- name: Run Database Tests
working-directory: ./backend
run: pytest -m alembic tests/integration/tests/migrations/

View File

@@ -7,9 +7,6 @@ on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
permissions:
contents: read
@@ -32,9 +29,6 @@ env:
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
# Jira
JIRA_ADMIN_API_TOKEN: ${{ secrets.JIRA_ADMIN_API_TOKEN }}
# LLMs
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -52,7 +46,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -80,13 +74,12 @@ jobs:
env:
PYTHONPATH: ./backend
MODEL_SERVER_HOST: "disabled"
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -114,7 +107,6 @@ jobs:
run: |
cat <<EOF > deployment/docker_compose/.env
CODE_INTERPRETER_BETA_ENABLED=true
DISABLE_TELEMETRY=true
EOF
- name: Set up Standard Dependencies
@@ -170,7 +162,7 @@ jobs:
- name: Upload Docker logs
if: failure()
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: docker-logs-${{ matrix.test-dir }}
path: docker-logs/

View File

@@ -7,9 +7,6 @@ on:
merge_group:
pull_request:
branches: [ main ]
push:
tags:
- "v*.*.*"
workflow_dispatch: # Allows manual triggering
permissions:
@@ -24,7 +21,7 @@ jobs:
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
@@ -83,7 +80,6 @@ jobs:
helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
helm repo add minio https://charts.min.io/
helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
helm repo update
- name: Install Redis operator

View File

@@ -9,9 +9,6 @@ on:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
permissions:
contents: read
@@ -33,11 +30,6 @@ env:
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}
jobs:
discover-test-dirs:
@@ -48,7 +40,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -72,36 +64,17 @@ jobs:
all_dirs="[${all_dirs%,}]"
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -122,46 +95,23 @@ jobs:
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -182,28 +132,18 @@ jobs:
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
build-integration-image:
runs-on:
[
runs-on,
runner=2cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-integration-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -219,40 +159,16 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Build and push integration test image with Docker Bake
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
cd backend && docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
@@ -279,7 +195,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -294,28 +210,23 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Create .env file for Docker Compose
- name: Start Docker containers
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
MCP_SERVER_ENABLED=true
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
@@ -419,11 +330,6 @@ jobs:
-e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
-e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
-e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
-e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
-e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
-e TEST_WEB_HOSTNAME=test-runner \
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
@@ -452,22 +358,21 @@ jobs:
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
multitenant-tests:
needs:
[build-backend-image, build-model-server-image, build-integration-image]
runs-on:
[
runs-on,
runner=8cpu-linux-arm64,
"run-id=${{ github.run_id }}-multitenant-tests",
"extras=ecr-cache",
build-backend-image,
build-model-server-image,
build-integration-image,
]
runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -3,15 +3,7 @@ concurrency:
group: Run-Jest-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
on: push
permissions:
contents: read
@@ -23,15 +15,15 @@ jobs:
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: 'npm'
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies

View File

@@ -6,9 +6,6 @@ concurrency:
on:
merge_group:
types: [checks_requested]
push:
tags:
- "v*.*.*"
permissions:
contents: read
@@ -40,7 +37,7 @@ jobs:
test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -65,35 +62,15 @@ jobs:
echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -114,46 +91,21 @@ jobs:
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-backend-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -174,45 +126,20 @@ jobs:
push: true
tags: ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-model-server-test-${{ github.run_id }}
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
build-integration-image:
runs-on:
[
runs-on,
runner=2cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-integration-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -229,22 +156,12 @@ jobs:
env:
INTEGRATION_REPOSITORY: ${{ env.RUNS_ON_ECR_CACHE }}
TAG: integration-test-${{ github.run_id }}
CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
cd backend && docker buildx bake --push \
--set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
--set backend.cache-from=type=registry,ref=onyxdotapp/onyx-backend:latest \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX},mode=max \
--set backend.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache,mode=max \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX} \
--set integration.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${HEAD_SHA},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache-${CACHE_SUFFIX},mode=max \
--set integration.cache-to=type=registry,ref=${RUNS_ON_ECR_CACHE}:integration-cache,mode=max \
integration
@@ -271,7 +188,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -286,26 +203,21 @@ jobs:
# NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
# NOTE: don't need web server for integration tests
- name: Create .env file for Docker Compose
- name: Start Docker containers
env:
ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
RUN_ID: ${{ github.run_id }}
run: |
cat <<EOF > deployment/docker_compose/.env
AUTH_TYPE=basic
POSTGRES_POOL_PRE_PING=true
POSTGRES_USE_NULL_POOL=true
REQUIRE_EMAIL_VERIFICATION=false
DISABLE_TELEMETRY=true
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
INTEGRATION_TESTS_MODE=true
MCP_SERVER_ENABLED=true
EOF
- name: Start Docker containers
run: |
cd deployment/docker_compose
AUTH_TYPE=basic \
POSTGRES_POOL_PRE_PING=true \
POSTGRES_USE_NULL_POOL=true \
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
INTEGRATION_TESTS_MODE=true \
MCP_SERVER_ENABLED=true \
docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
relational_db \
index \
@@ -438,6 +350,7 @@ jobs:
path: ${{ github.workspace }}/docker-compose.log
# ------------------------------------------------------------
required:
# NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
runs-on: ubuntu-slim

View File

@@ -3,15 +3,7 @@ concurrency:
group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
cancel-in-progress: true
on:
merge_group:
pull_request:
branches:
- main
- "release/**"
push:
tags:
- "v*.*.*"
on: push
permissions:
contents: read
@@ -54,36 +46,16 @@ env:
jobs:
build-web-image:
runs-on:
[
runs-on,
runner=4cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-web-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -104,47 +76,22 @@ jobs:
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-web-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache
type=registry,ref=onyxdotapp/onyx-web-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:web-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-backend-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-backend-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -165,47 +112,23 @@ jobs:
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-backend-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache
type=registry,ref=onyxdotapp/onyx-backend:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
build-model-server-image:
runs-on:
[
runs-on,
runner=1cpu-linux-arm64,
"run-id=${{ github.run_id }}-build-model-server-image",
"extras=ecr-cache",
]
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
timeout-minutes: 45
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Format branch name for cache
id: format-branch
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
REF_NAME: ${{ github.ref_name }}
run: |
if [ -n "${PR_NUMBER}" ]; then
CACHE_SUFFIX="${PR_NUMBER}"
else
# shellcheck disable=SC2001
CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
fi
echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3
@@ -226,14 +149,9 @@ jobs:
tags: ${{ env.RUNS_ON_ECR_CACHE }}:playwright-test-model-server-${{ github.run_id }}
push: true
cache-from: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
type=registry,ref=onyxdotapp/onyx-model-server:latest
cache-to: |
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
cache-to: type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}
playwright-tests:
@@ -254,15 +172,16 @@ jobs:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
with:
node-version: 22
cache: "npm"
cache: 'npm'
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
@@ -471,6 +390,7 @@ jobs:
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
run: exit 1
# NOTE: Chromatic UI diff testing is currently disabled.
# We are using Playwright for local and CI testing without visual regression checks.
# Chromatic may be reintroduced in the future for UI diff testing if needed.
@@ -488,12 +408,12 @@ jobs:
# ]
# steps:
# - name: Checkout code
# uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
# uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
# with:
# fetch-depth: 0
# - name: Setup node
# uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
# uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # ratchet:actions/setup-node@v4
# with:
# node-version: 22

View File

@@ -9,14 +9,29 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
validate-requirements:
runs-on: ubuntu-slim
timeout-minutes: 45
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Setup uv
uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
# TODO: Enable caching once there is a uv.lock file checked in.
# with:
# enable-cache: true
- name: Validate requirements lock files
run: ./backend/scripts/compile_requirements.py --check
mypy-check:
# See https://runs-on.com/runners/linux/
# Note: Mypy seems quite optimized for x64 compared to arm64.
@@ -27,7 +42,7 @@ jobs:
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -40,10 +55,35 @@ jobs:
backend/requirements/model_server.txt
backend/requirements/ee.txt
- name: Generate OpenAPI schema and Python client
- name: Generate OpenAPI schema
shell: bash
working-directory: backend
env:
PYTHONPATH: "."
run: |
python scripts/onyx_openapi_schema.py --filename generated/openapi.json
# needed for pulling openapitools/openapi-generator-cli
# otherwise, we hit the "Unauthenticated users" limit
# https://docs.docker.com/docker-hub/usage/
- name: Login to Docker Hub
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Generate OpenAPI Python client
shell: bash
run: |
ods openapi all
docker run --rm \
-v "${{ github.workspace }}/backend/generated:/local" \
openapitools/openapi-generator-cli generate \
-i /local/openapi.json \
-g python \
-o /local/onyx_openapi_client \
--package-name onyx_openapi_client \
--skip-validate-spec \
--openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
- name: Cache mypy cache
if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
@@ -61,8 +101,11 @@ jobs:
TERM: xterm-256color
run: mypy .
- name: Run MyPy (tools/)
env:
MYPY_FORCE_COLOR: 1
TERM: xterm-256color
run: mypy tools/
- name: Check import order with reorder-python-imports
working-directory: ./backend
run: |
find ./onyx -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Check code formatting with Black
working-directory: ./backend
run: black --check .

View File

@@ -7,9 +7,6 @@ on:
merge_group:
pull_request:
branches: [main]
push:
tags:
- "v*.*.*"
schedule:
# This cron expression runs the job daily at 16:00 UTC (9am PT)
- cron: "0 16 * * *"
@@ -133,13 +130,12 @@ jobs:
env:
PYTHONPATH: ./backend
DISABLE_TELEMETRY: "true"
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -161,20 +157,16 @@ jobs:
hubspot:
- 'backend/onyx/connectors/hubspot/**'
- 'backend/tests/daily/connectors/hubspot/**'
- 'uv.lock'
salesforce:
- 'backend/onyx/connectors/salesforce/**'
- 'backend/tests/daily/connectors/salesforce/**'
- 'uv.lock'
github:
- 'backend/onyx/connectors/github/**'
- 'backend/tests/daily/connectors/github/**'
- 'uv.lock'
file_processing:
- 'backend/onyx/file_processing/**'
- 'uv.lock'
- name: Run Tests (excluding HubSpot, Salesforce, GitHub, and Coda)
- name: Run Tests (excluding HubSpot, Salesforce, and GitHub)
shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
run: |
py.test \
@@ -187,8 +179,7 @@ jobs:
backend/tests/daily/connectors \
--ignore backend/tests/daily/connectors/hubspot \
--ignore backend/tests/daily/connectors/salesforce \
--ignore backend/tests/daily/connectors/github \
--ignore backend/tests/daily/connectors/coda
--ignore backend/tests/daily/connectors/github
- name: Run HubSpot Connector Tests
if: ${{ github.event_name == 'schedule' || steps.changes.outputs.hubspot == 'true' || steps.changes.outputs.file_processing == 'true' }}

View File

@@ -39,7 +39,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false
@@ -61,7 +61,7 @@ jobs:
docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
- name: Set up Python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
cache: "pip"

View File

@@ -9,9 +9,6 @@ on:
branches:
- main
- 'release/**'
push:
tags:
- "v*.*.*"
permissions:
contents: read
@@ -26,13 +23,15 @@ jobs:
env:
PYTHONPATH: ./backend
REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }}
DISABLE_TELEMETRY: "true"
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
SF_SECURITY_TOKEN: ${{ secrets.SF_SECURITY_TOKEN }}
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
persist-credentials: false

View File

@@ -6,42 +6,32 @@ concurrency:
on:
merge_group:
pull_request: null
push:
branches:
- main
tags:
- "v*.*.*"
permissions:
contents: read
jobs:
quality-checks:
runs-on: ubuntu-latest
# See https://runs-on.com/runners/linux/
runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-quality-checks"]
timeout-minutes: 45
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # ratchet:actions/setup-python@v6
with:
python-version: "3.11"
- name: Setup Terraform
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # ratchet:hashicorp/setup-terraform@v3
- name: Setup node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v6
with: # zizmor: ignore[cache-poisoning]
node-version: 22
cache: "npm"
cache-dependency-path: ./web/package-lock.json
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@91fd7d7cf70ae1dee9f4f44e7dfa5d1073fe6623 # ratchet:j178/prek-action@v1
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # ratchet:pre-commit/action@v3.0.1
env:
# uv-run is mypy's id and mypy is covered by the Python Checks which caches dependencies better.
SKIP: uv-run
with:
prek-version: '0.2.21'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }}
- name: Check Actions
uses: giner/check-actions@28d366c7cbbe235f9624a88aa31a628167eee28c # ratchet:giner/check-actions@v1.0.1
with:

View File

@@ -1,41 +0,0 @@
name: Release Devtools
on:
push:
tags:
- "ods/v*.*.*"
jobs:
pypi:
runs-on: ubuntu-latest
environment:
name: release-devtools
permissions:
id-token: write
timeout-minutes: 10
strategy:
matrix:
os-arch:
- { goos: "linux", goarch: "amd64" }
- { goos: "linux", goarch: "arm64" }
- { goos: "windows", goarch: "amd64" }
- { goos: "windows", goarch: "arm64" }
- { goos: "darwin", goarch: "amd64" }
- { goos: "darwin", goarch: "arm64" }
- { goos: "", goarch: "" }
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
- run: |
GOOS="${{ matrix.os-arch.goos }}" \
GOARCH="${{ matrix.os-arch.goarch }}" \
uv build --wheel
working-directory: tools/ods
- run: uv publish
working-directory: tools/ods

View File

@@ -14,7 +14,7 @@ jobs:
contents: read
steps:
- name: Checkout main Onyx repo
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
fetch-depth: 0
persist-credentials: false

View File

@@ -18,7 +18,7 @@ jobs:
# see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we
# implement here which needs an actual user's deploy key
- name: Checkout code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6
with:
ssh-key: "${{ secrets.DEPLOY_KEY }}"
persist-credentials: true

View File

@@ -17,18 +17,15 @@ jobs:
security-events: write # needed for SARIF uploads
steps:
- name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6.0.1
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # ratchet:actions/checkout@v6.0.0
with:
persist-credentials: false
- name: Install the latest version of uv
uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
with:
enable-cache: false
version: "0.9.9"
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # ratchet:astral-sh/setup-uv@v7.1.3
- name: Run zizmor
run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
run: uvx zizmor==1.16.3 --format=sarif . > results.sarif
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

3
.gitignore vendored
View File

@@ -53,6 +53,3 @@ node_modules
# MCP configs
.playwright-mcp
# plans
plans/

View File

@@ -5,138 +5,73 @@ default_install_hook_types:
- post-rewrite
repos:
- repo: https://github.com/astral-sh/uv-pre-commit
# From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
# This revision is from https://github.com/astral-sh/uv-pre-commit/pull/53
rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
hooks:
- id: uv-sync
args: ["--active", "--locked", "--all-extras"]
- id: uv-lock
files: ^pyproject\.toml$
- id: uv-export
name: uv-export default.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"backend",
"-o",
"backend/requirements/default.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export dev.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"dev",
"-o",
"backend/requirements/dev.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export ee.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"ee",
"-o",
"backend/requirements/ee.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-export
name: uv-export model_server.txt
args:
[
"--no-emit-project",
"--no-default-groups",
"--no-hashes",
"--extra",
"model_server",
"-o",
"backend/requirements/model_server.txt",
]
files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
- id: uv-run
name: Check lazy imports
args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
files: ^backend/(?!\.venv/).*\.py$
# NOTE: This takes ~6s on a single, large module which is prohibitively slow.
# - id: uv-run
# name: mypy
# args: ["--all-extras", "mypy"]
# pass_filenames: true
# files: ^backend/.*\.py$
name: mypy
args: ["mypy"]
pass_filenames: true
files: ^backend/.*\.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-yaml
files: ^.github/
- repo: https://github.com/rhysd/actionlint
rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
rev: v1.7.8
hooks:
- id: actionlint
- repo: https://github.com/psf/black
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
rev: 25.1.0
hooks:
- id: black
language_version: python3.11
- id: black
language_version: python3.11
# this is a fork which keeps compatibility with black
- repo: https://github.com/wimglenn/reorder-python-imports-black
rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
rev: v3.14.0
hooks:
- id: reorder-python-imports
args: ["--py311-plus", "--application-directories=backend/"]
# need to ignore alembic files, since reorder-python-imports gets confused
# and thinks that alembic is a local package since there is a folder
# in the backend directory called `alembic`
exclude: ^backend/alembic/
- id: reorder-python-imports
args: ['--py311-plus', '--application-directories=backend/']
# need to ignore alembic files, since reorder-python-imports gets confused
# and thinks that alembic is a local package since there is a folder
# in the backend directory called `alembic`
exclude: ^backend/alembic/
# These settings will remove unused imports with side effects
# Note: The repo currently does not and should not have imports with side effects
- repo: https://github.com/PyCQA/autoflake
rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
rev: v2.3.1
hooks:
- id: autoflake
args:
[
"--remove-all-unused-imports",
"--remove-unused-variables",
"--in-place",
"--recursive",
]
- repo: https://github.com/golangci/golangci-lint
rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
hooks:
- id: golangci-lint
entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"
args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
rev: v0.11.4
hooks:
- id: ruff
- repo: https://github.com/pre-commit/mirrors-prettier
rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
rev: v3.1.0
hooks:
- id: prettier
types_or: [html, css, javascript, ts, tsx]
language_version: system
- id: prettier
types_or: [html, css, javascript, ts, tsx]
language_version: system
- repo: https://github.com/sirwart/ripsecrets
rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
rev: v0.1.11
hooks:
- id: ripsecrets
args:
- --additional-pattern
- ^sk-[A-Za-z0-9_\-]{20,}$
- --additional-pattern
- ^sk-[A-Za-z0-9_\-]{20,}$
- repo: local
hooks:
@@ -147,13 +82,8 @@ repos:
pass_filenames: false
files: \.tf$
# Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
# This is a preview package - if it breaks:
# 1. Try updating: cd web && npm update @typescript/native-preview
# 2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
- id: typescript-check
name: TypeScript type check
entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
- id: check-lazy-imports
name: Check lazy imports
entry: python3 backend/scripts/check_lazy_imports.py
language: system
pass_filenames: false
files: ^web/.*\.(ts|tsx)$
files: ^backend/(?!\.venv/).*\.py$

View File

@@ -5,8 +5,11 @@
# For local dev, often user Authentication is not needed
AUTH_TYPE=disabled
# Skip warm up for dev
SKIP_WARM_UP=True
# Always keep these on for Dev
# Logs model prompts, reasoning, and answer to stdout
# Logs all model prompts to stdout
LOG_ONYX_MODEL_INTERACTIONS=True
# More verbose logging
LOG_LEVEL=debug
@@ -34,16 +37,31 @@ OPENAI_API_KEY=<REPLACE THIS>
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# For Onyx Slack Bot, overrides the UI values so no need to set this up via UI every time
# Only needed if using OnyxBot
#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
# Python stuff
PYTHONPATH=../backend
PYTHONUNBUFFERED=1
# Internet Search
EXA_API_KEY=<REPLACE THIS>
# Enable the full set of Danswer Enterprise Edition features
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
# Agent Search configs # TODO: Remove give proper namings
AGENT_RETRIEVAL_STATS=False # Note: This setting will incur substantial re-ranking effort
AGENT_RERANKING_STATS=True
AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
# S3 File Store Configuration (MinIO for local development)
S3_ENDPOINT_URL=http://localhost:9004
S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket

View File

@@ -133,6 +133,8 @@
},
"consoleTitle": "API Server Console"
},
// For the listener to access the Slack API,
// ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"consoleName": "Slack Bot",
@@ -508,6 +510,7 @@
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true,
"presentation": {
"group": "3"
}
@@ -553,10 +556,10 @@
"name": "Install Python Requirements",
"type": "node",
"request": "launch",
"runtimeExecutable": "uv",
"runtimeExecutable": "bash",
"runtimeArgs": [
"sync",
"--all-extras"
"-c",
"pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
@@ -569,14 +572,14 @@
"name": "Onyx OpenAPI Schema Generator",
"type": "debugpy",
"request": "launch",
"program": "backend/scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}",
"program": "scripts/onyx_openapi_schema.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"env": {
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "backend"
"PYTHONPATH": "."
},
"args": ["--filename", "backend/generated/openapi.json", "--generate-python-client"]
"args": ["--filename", "generated/openapi.json"]
},
{
// script to debug multi tenant db issues

View File

@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## KEY NOTES
- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
to assume the python venv.
- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password

View File

@@ -71,12 +71,12 @@ If using a higher version, sometimes some libraries will not be available (i.e.
#### Backend: Python requirements
Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
Currently, we use pip and recommend creating a virtual environment.
For convenience here's a command for it:
```bash
uv venv .venv --python 3.11
python -m venv .venv
source .venv/bin/activate
```
@@ -95,15 +95,33 @@ If using PowerShell, the command slightly differs:
Install the required python dependencies:
```bash
uv sync --all-extras
pip install -r backend/requirements/combined.txt
```
Install Playwright for Python (headless browser required by the Web Connector):
or
```bash
uv run playwright install
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/ee.txt
pip install -r backend/requirements/model_server.txt
```
Fix vscode/cursor auto-imports:
```bash
pip install -e .
```
Install Playwright for Python (headless browser required by the Web Connector)
In the activated Python virtualenv, install Playwright for Python by running:
```bash
playwright install
```
You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path.
#### Frontend: Node dependencies
Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
@@ -112,7 +130,7 @@ to manage your Node installations. Once installed, you can run
```bash
nvm install 22 && nvm use 22
node -v # verify your active version
```
```
Navigate to `onyx/web` and run:
@@ -126,15 +144,21 @@ npm i
For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
Then run:
With the virtual environment active, install the pre-commit library with:
```bash
uv run pre-commit install
pip install pre-commit
```
Then, from the `onyx/backend` directory, run:
```bash
pre-commit install
```
Additionally, we use `mypy` for static type checking.
Onyx is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
To run the mypy checks manually, run `python -m mypy .` from the `onyx/backend` directory.
### Web
@@ -161,7 +185,7 @@ You will need Docker installed to run these containers.
First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
```bash
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
docker compose up -d index relational_db cache minio
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)

View File

@@ -15,4 +15,3 @@ build/
dist/
.coverage
htmlcov/
model_server/legacy/

View File

@@ -13,10 +13,23 @@ RUN uv pip install --system --no-cache-dir --upgrade \
-r /tmp/requirements.txt && \
rm -rf ~/.cache/uv /tmp/*.txt
# Stage for downloading embedding models
# Stage for downloading tokenizers
FROM base AS tokenizers
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1');"
# Stage for downloading Onyx models
FROM base AS onyx-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
snapshot_download(repo_id='onyx-dot-app/information-content-model');"
# Stage for downloading embedding and reranking models
FROM base AS embedding-models
RUN python -c "from huggingface_hub import snapshot_download; \
snapshot_download('nomic-ai/nomic-embed-text-v1');"
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1');"
# Initialize SentenceTransformer to cache the custom architecture
RUN python -c "from sentence_transformers import SentenceTransformer; \
@@ -41,6 +54,8 @@ RUN groupadd -g 1001 onyx && \
# In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
# running Onyx, move the current contents of the cache folder to a temporary location to ensure
# it's preserved in order to combine with the user's cache contents
COPY --chown=onyx:onyx --from=tokenizers /app/.cache/huggingface /app/.cache/temp_huggingface
COPY --chown=onyx:onyx --from=onyx-models /app/.cache/huggingface /app/.cache/temp_huggingface
COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface
WORKDIR /app

View File

@@ -7,12 +7,8 @@ Onyx migrations use a generic single-database configuration with an async dbapi.
## To generate new migrations:
From onyx/backend, run:
`alembic revision -m <DESCRIPTION_OF_MIGRATION>`
Note: you cannot use the `--autogenerate` flag as the automatic schema parsing does not work.
Manually populate the upgrade and downgrade in your new migration.
run from onyx/backend:
`alembic revision --autogenerate -m <DESCRIPTION_OF_MIGRATION>`
More info can be found here: https://alembic.sqlalchemy.org/en/latest/autogenerate.html

View File

@@ -39,9 +39,7 @@ config = context.config
if config.config_file_name is not None and config.attributes.get(
"configure_logger", True
):
# disable_existing_loggers=False prevents breaking pytest's caplog fixture
# See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
fileConfig(config.config_file_name, disable_existing_loggers=False)
fileConfig(config.config_file_name)
target_metadata = [Base.metadata, ResultModelBase.metadata]
@@ -462,49 +460,8 @@ def run_migrations_offline() -> None:
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
Supports pytest-alembic by checking for a pre-configured connection
in context.config.attributes["connection"]. If present, uses that
connection/engine directly instead of creating a new async engine.
"""
# Check if pytest-alembic is providing a connection/engine
connectable = context.config.attributes.get("connection", None)
if connectable is not None:
# pytest-alembic is providing an engine - use it directly
logger.info("run_migrations_online starting (pytest-alembic mode).")
# For pytest-alembic, we use the default schema (public)
schema_name = context.config.attributes.get(
"schema_name", POSTGRES_DEFAULT_SCHEMA
)
# pytest-alembic passes an Engine, we need to get a connection from it
with connectable.connect() as connection:
# Set search path for the schema
connection.execute(text(f'SET search_path TO "{schema_name}"'))
context.configure(
connection=connection,
target_metadata=target_metadata, # type: ignore
include_object=include_object,
version_table_schema=schema_name,
include_schemas=True,
compare_type=True,
compare_server_default=True,
script_location=config.get_main_option("script_location"),
)
with context.begin_transaction():
context.run_migrations()
# Commit the transaction to ensure changes are visible to next migration
connection.commit()
else:
# Normal operation - use async migrations
logger.info("run_migrations_online starting.")
asyncio.run(run_async_migrations())
logger.info("run_migrations_online starting.")
asyncio.run(run_async_migrations())
if context.is_offline_mode():

View File

@@ -1,29 +0,0 @@
"""add is_clarification to chat_message
Revision ID: 18b5b2524446
Revises: 87c52ec39f84
Create Date: 2025-01-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "18b5b2524446"
down_revision = "87c52ec39f84"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"chat_message",
sa.Column(
"is_clarification", sa.Boolean(), nullable=False, server_default="false"
),
)
def downgrade() -> None:
op.drop_column("chat_message", "is_clarification")

View File

@@ -12,8 +12,8 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "23957775e5f5"
down_revision = "bc9771dccadf"
branch_labels = None
depends_on = None
branch_labels = None # type: ignore
depends_on = None # type: ignore
def upgrade() -> None:

View File

@@ -1,27 +0,0 @@
"""add last refreshed at mcp server
Revision ID: 2a391f840e85
Revises: 4cebcbc9b2ae
Create Date: 2025-12-06 15:19:59.766066
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembi.
revision = "2a391f840e85"
down_revision = "4cebcbc9b2ae"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"mcp_server",
sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
)
def downgrade() -> None:
op.drop_column("mcp_server", "last_refreshed_at")

View File

@@ -1,27 +0,0 @@
"""add tab_index to tool_call
Revision ID: 4cebcbc9b2ae
Revises: a1b2c3d4e5f6
Create Date: 2025-12-16
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "4cebcbc9b2ae"
down_revision = "a1b2c3d4e5f6"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.add_column(
"tool_call",
sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
)
def downgrade() -> None:
op.drop_column("tool_call", "tab_index")

View File

@@ -62,11 +62,6 @@ def upgrade() -> None:
)
"""
)
# Drop the temporary table to avoid conflicts if migration runs again
# (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
op.execute("DROP TABLE IF EXISTS temp_connector_credential")
# If no exception was raised, alter the column
op.alter_column("credential", "source", nullable=True) # TODO modify
# # ### end Alembic commands ###

View File

@@ -1,55 +0,0 @@
"""update_default_persona_prompt
Revision ID: 5e6f7a8b9c0d
Revises: 4f8a2b3c1d9e
Create Date: 2025-11-30 12:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "5e6f7a8b9c0d"
down_revision = "4f8a2b3c1d9e"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].{citation_reminder_or_empty}
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :system_prompt
WHERE id = :persona_id
"""
),
{"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# We don't revert the system prompt on downgrade since we don't know
# what the previous value was. The new prompt is a reasonable default.
pass

View File

@@ -1,27 +0,0 @@
"""Add display_name to model_configuration
Revision ID: 7bd55f264e1b
Revises: e8f0d2a38171
Create Date: 2025-12-04
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "7bd55f264e1b"
down_revision = "e8f0d2a38171"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"model_configuration",
sa.Column("display_name", sa.String(), nullable=True),
)
def downgrade() -> None:
op.drop_column("model_configuration", "display_name")

View File

@@ -42,13 +42,13 @@ def upgrade() -> None:
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
)
@@ -63,13 +63,13 @@ def upgrade() -> None:
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
server_default=sa.text("now()"), # type: ignore
nullable=False,
),
sa.ForeignKeyConstraint(

View File

@@ -1,55 +0,0 @@
"""update_default_system_prompt
Revision ID: 87c52ec39f84
Revises: 7bd55f264e1b
Create Date: 2025-12-05 15:54:06.002452
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "87c52ec39f84"
down_revision = "7bd55f264e1b"
branch_labels = None
depends_on = None
DEFAULT_PERSONA_ID = 0
# ruff: noqa: E501, W605 start
DEFAULT_SYSTEM_PROMPT = """
You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
# Response Style
You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
For code you prefer to use Markdown and specify the language.
You can use horizontal rules (---) to separate sections of your responses.
You can use Markdown tables to format your responses for data, lists, and other structured information.
""".lstrip()
# ruff: noqa: E501, W605 end
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
UPDATE persona
SET system_prompt = :system_prompt
WHERE id = :persona_id
"""
),
{"system_prompt": DEFAULT_SYSTEM_PROMPT, "persona_id": DEFAULT_PERSONA_ID},
)
def downgrade() -> None:
# We don't revert the system prompt on downgrade since we don't know
# what the previous value was. The new prompt is a reasonable default.
pass

View File

@@ -1,62 +0,0 @@
"""update_default_tool_descriptions
Revision ID: a01bf2971c5d
Revises: 87c52ec39f84
Create Date: 2025-12-16 15:21:25.656375
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a01bf2971c5d"
down_revision = "18b5b2524446"
branch_labels = None
depends_on = None
# new tool descriptions (12/2025)
TOOL_DESCRIPTIONS = {
"SearchTool": "The Search Action allows the agent to search through connected knowledge to help build an answer.",
"ImageGenerationTool": (
"The Image Generation Action allows the agent to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
"The action will be used when the user asks the agent to generate an image."
),
"WebSearchTool": (
"The Web Search Action allows the agent "
"to perform internet searches for up-to-date information."
),
"KnowledgeGraphTool": (
"The Knowledge Graph Search Action allows the agent to search the "
"Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Agent, "
"and it requires the Knowledge Graph to be enabled."
),
"OktaProfileTool": (
"The Okta Profile Action allows the agent to fetch the current user's information from Okta. "
"This may include the user's name, email, phone number, address, and other details such as their "
"manager and direct reports."
),
}
def upgrade() -> None:
conn = op.get_bind()
conn.execute(sa.text("BEGIN"))
try:
for tool_id, description in TOOL_DESCRIPTIONS.items():
conn.execute(
sa.text(
"UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
),
{"description": description, "tool_id": tool_id},
)
conn.execute(sa.text("COMMIT"))
except Exception as e:
conn.execute(sa.text("ROLLBACK"))
raise e
def downgrade() -> None:
pass

View File

@@ -1,49 +0,0 @@
"""add license table
Revision ID: a1b2c3d4e5f6
Revises: a01bf2971c5d
Create Date: 2025-12-04 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a1b2c3d4e5f6"
down_revision = "a01bf2971c5d"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table(
"license",
sa.Column("id", sa.Integer(), primary_key=True),
sa.Column("license_data", sa.Text(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
)
# Singleton pattern - only ever one row in this table
op.create_index(
"idx_license_singleton",
"license",
[sa.text("(true)")],
unique=True,
)
def downgrade() -> None:
op.drop_index("idx_license_singleton", table_name="license")
op.drop_table("license")

View File

@@ -1,27 +0,0 @@
"""Remove fast_default_model_name from llm_provider
Revision ID: a2b3c4d5e6f7
Revises: 2a391f840e85
Create Date: 2024-12-17
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "a2b3c4d5e6f7"
down_revision = "2a391f840e85"
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:
op.drop_column("llm_provider", "fast_default_model_name")
def downgrade() -> None:
op.add_column(
"llm_provider",
sa.Column("fast_default_model_name", sa.String(), nullable=True),
)

View File

@@ -18,253 +18,412 @@ depends_on = None
def upgrade() -> None:
# 1. Drop old research/agent tables (CASCADE handles dependencies)
# Drop research agent tables (if they exist)
op.execute("DROP TABLE IF EXISTS research_agent_iteration_sub_step CASCADE")
op.execute("DROP TABLE IF EXISTS research_agent_iteration CASCADE")
# Drop agent sub query and sub question tables (if they exist)
op.execute("DROP TABLE IF EXISTS agent__sub_query__search_doc CASCADE")
op.execute("DROP TABLE IF EXISTS agent__sub_query CASCADE")
op.execute("DROP TABLE IF EXISTS agent__sub_question CASCADE")
# 2. ChatMessage table changes
# Rename columns and add FKs
op.alter_column(
"chat_message", "parent_message", new_column_name="parent_message_id"
)
op.create_foreign_key(
"fk_chat_message_parent_message_id",
"chat_message",
"chat_message",
["parent_message_id"],
["id"],
)
op.alter_column(
"chat_message",
"latest_child_message",
new_column_name="latest_child_message_id",
)
op.create_foreign_key(
"fk_chat_message_latest_child_message_id",
"chat_message",
"chat_message",
["latest_child_message_id"],
["id"],
# Update ChatMessage table
# Rename parent_message to parent_message_id and make it a foreign key (if not already done)
conn = op.get_bind()
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'chat_message' AND column_name = 'parent_message'
"""
)
)
if result.fetchone():
op.alter_column(
"chat_message", "parent_message", new_column_name="parent_message_id"
)
op.create_foreign_key(
"fk_chat_message_parent_message_id",
"chat_message",
"chat_message",
["parent_message_id"],
["id"],
)
# Add new column
op.add_column(
"chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
# Rename latest_child_message to latest_child_message_id and make it a foreign key (if not already done)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'chat_message' AND column_name = 'latest_child_message'
"""
)
)
if result.fetchone():
op.alter_column(
"chat_message",
"latest_child_message",
new_column_name="latest_child_message_id",
)
op.create_foreign_key(
"fk_chat_message_latest_child_message_id",
"chat_message",
"chat_message",
["latest_child_message_id"],
["id"],
)
# Drop old columns
op.drop_column("chat_message", "rephrased_query")
op.drop_column("chat_message", "alternate_assistant_id")
op.drop_column("chat_message", "overridden_model")
op.drop_column("chat_message", "is_agentic")
op.drop_column("chat_message", "refined_answer_improvement")
op.drop_column("chat_message", "research_type")
op.drop_column("chat_message", "research_plan")
op.drop_column("chat_message", "research_answer_purpose")
# Add reasoning_tokens column (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'chat_message' AND column_name = 'reasoning_tokens'
"""
)
)
if not result.fetchone():
op.add_column(
"chat_message", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
)
# 3. ToolCall table changes
# Drop the unique constraint first
# Drop columns no longer needed (if they exist)
for col in [
"rephrased_query",
"alternate_assistant_id",
"overridden_model",
"is_agentic",
"refined_answer_improvement",
"research_type",
"research_plan",
"research_answer_purpose",
]:
result = conn.execute(
sa.text(
f"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'chat_message' AND column_name = '{col}'
"""
)
)
if result.fetchone():
op.drop_column("chat_message", col)
# Update ToolCall table
# Add chat_session_id column (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'chat_session_id'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call",
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=False),
)
op.create_foreign_key(
"fk_tool_call_chat_session_id",
"tool_call",
"chat_session",
["chat_session_id"],
["id"],
)
# Rename message_id to parent_chat_message_id and make nullable (if not already done)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'message_id'
"""
)
)
if result.fetchone():
op.alter_column(
"tool_call",
"message_id",
new_column_name="parent_chat_message_id",
nullable=True,
)
# Add parent_tool_call_id (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'parent_tool_call_id'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
)
op.create_foreign_key(
"fk_tool_call_parent_tool_call_id",
"tool_call",
"tool_call",
["parent_tool_call_id"],
["id"],
)
op.drop_constraint("uq_tool_call_message_id", "tool_call", type_="unique")
# Delete orphaned tool_call rows (those without valid chat_message)
op.execute(
"DELETE FROM tool_call WHERE message_id NOT IN (SELECT id FROM chat_message)"
)
# Add chat_session_id as nullable first, populate, then make NOT NULL
op.add_column(
"tool_call",
sa.Column("chat_session_id", postgresql.UUID(as_uuid=True), nullable=True),
)
# Populate chat_session_id from the related chat_message
op.execute(
# Add turn_number, tool_id (if not exists)
for col_name in ["turn_number", "tool_id"]:
result = conn.execute(
sa.text(
f"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = '{col_name}'
"""
UPDATE tool_call
SET chat_session_id = chat_message.chat_session_id
FROM chat_message
WHERE tool_call.message_id = chat_message.id
)
)
if not result.fetchone():
op.add_column(
"tool_call",
sa.Column(col_name, sa.Integer(), nullable=False, server_default="0"),
)
# Add tool_call_id as String (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_call_id'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call",
sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
)
# Now make it NOT NULL and add FK
op.alter_column("tool_call", "chat_session_id", nullable=False)
op.create_foreign_key(
"fk_tool_call_chat_session_id",
"tool_call",
"chat_session",
["chat_session_id"],
["id"],
ondelete="CASCADE",
# Add reasoning_tokens (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'reasoning_tokens'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True)
)
# Rename message_id and make nullable, recreate FK with CASCADE
op.drop_constraint("tool_call_message_id_fkey", "tool_call", type_="foreignkey")
op.alter_column(
"tool_call",
"message_id",
new_column_name="parent_chat_message_id",
nullable=True,
)
op.create_foreign_key(
"fk_tool_call_parent_chat_message_id",
"tool_call",
"chat_message",
["parent_chat_message_id"],
["id"],
ondelete="CASCADE",
# Rename tool_arguments to tool_call_arguments (if not already done)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_arguments'
"""
)
)
if result.fetchone():
op.alter_column(
"tool_call", "tool_arguments", new_column_name="tool_call_arguments"
)
# Add parent_tool_call_id with FK
op.add_column(
"tool_call", sa.Column("parent_tool_call_id", sa.Integer(), nullable=True)
# Rename tool_result to tool_call_response and change type from JSONB to Text (if not already done)
result = conn.execute(
sa.text(
"""
SELECT column_name, data_type FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_result'
"""
)
)
op.create_foreign_key(
"fk_tool_call_parent_tool_call_id",
"tool_call",
"tool_call",
["parent_tool_call_id"],
["id"],
ondelete="CASCADE",
)
# Add other new columns
op.add_column(
"tool_call",
sa.Column("turn_number", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column(
"tool_call",
sa.Column("tool_call_id", sa.String(), nullable=False, server_default=""),
)
op.add_column("tool_call", sa.Column("reasoning_tokens", sa.Text(), nullable=True))
op.add_column(
"tool_call",
sa.Column("tool_call_tokens", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column(
"tool_call",
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
)
# Rename columns
op.alter_column(
"tool_call", "tool_arguments", new_column_name="tool_call_arguments"
)
op.alter_column("tool_call", "tool_result", new_column_name="tool_call_response")
# Change tool_call_response type from JSONB to Text
op.execute(
tool_result_row = result.fetchone()
if tool_result_row:
op.alter_column(
"tool_call", "tool_result", new_column_name="tool_call_response"
)
# Change type from JSONB to Text
op.execute(
sa.text(
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE TEXT
USING tool_call_response::text
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE TEXT
USING tool_call_response::text
)
)
else:
# Check if tool_call_response already exists and is JSONB, then convert to Text
result = conn.execute(
sa.text(
"""
SELECT data_type FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_call_response'
"""
)
)
tool_call_response_row = result.fetchone()
if tool_call_response_row and tool_call_response_row[0] == "jsonb":
op.execute(
sa.text(
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE TEXT
USING tool_call_response::text
"""
)
)
# Add tool_call_tokens (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_call_tokens'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call",
sa.Column(
"tool_call_tokens", sa.Integer(), nullable=False, server_default="0"
),
)
# Drop old columns
op.drop_column("tool_call", "tool_name")
# 4. Create new association table
op.create_table(
"tool_call__search_doc",
sa.Column("tool_call_id", sa.Integer(), nullable=False),
sa.Column("search_doc_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(
["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
# Add generated_images column for image generation tool replay (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'generated_images'
"""
)
)
if not result.fetchone():
op.add_column(
"tool_call",
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
)
# 5. Persona table change
op.add_column(
"persona",
sa.Column(
"replace_base_system_prompt",
sa.Boolean(),
nullable=False,
server_default="false",
),
# Drop tool_name column (if exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'tool_call' AND column_name = 'tool_name'
"""
)
)
if result.fetchone():
op.drop_column("tool_call", "tool_name")
# Create tool_call__search_doc association table (if not exists)
result = conn.execute(
sa.text(
"""
SELECT table_name FROM information_schema.tables
WHERE table_name = 'tool_call__search_doc'
"""
)
)
if not result.fetchone():
op.create_table(
"tool_call__search_doc",
sa.Column("tool_call_id", sa.Integer(), nullable=False),
sa.Column("search_doc_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["tool_call_id"], ["tool_call.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(
["search_doc_id"], ["search_doc.id"], ondelete="CASCADE"
),
sa.PrimaryKeyConstraint("tool_call_id", "search_doc_id"),
)
# Add replace_base_system_prompt to persona table (if not exists)
result = conn.execute(
sa.text(
"""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'persona' AND column_name = 'replace_base_system_prompt'
"""
)
)
if not result.fetchone():
op.add_column(
"persona",
sa.Column(
"replace_base_system_prompt",
sa.Boolean(),
nullable=False,
server_default="false",
),
)
def downgrade() -> None:
# Reverse persona changes
op.drop_column("persona", "replace_base_system_prompt")
# Drop new association table
op.drop_table("tool_call__search_doc")
# Drop tool_call__search_doc association table
op.execute("DROP TABLE IF EXISTS tool_call__search_doc CASCADE")
# Reverse ToolCall changes
op.add_column(
"tool_call",
sa.Column("tool_name", sa.String(), nullable=False, server_default=""),
)
# Change tool_call_response back to JSONB
op.add_column("tool_call", sa.Column("tool_name", sa.String(), nullable=False))
op.drop_column("tool_call", "tool_id")
op.drop_column("tool_call", "tool_call_tokens")
op.drop_column("tool_call", "generated_images")
# Change tool_call_response back to JSONB before renaming
op.execute(
"""
sa.text(
"""
ALTER TABLE tool_call
ALTER COLUMN tool_call_response TYPE JSONB
USING tool_call_response::jsonb
"""
)
)
op.alter_column("tool_call", "tool_call_response", new_column_name="tool_result")
op.alter_column(
"tool_call", "tool_call_arguments", new_column_name="tool_arguments"
)
op.drop_column("tool_call", "generated_images")
op.drop_column("tool_call", "tool_call_tokens")
op.drop_column("tool_call", "reasoning_tokens")
op.drop_column("tool_call", "tool_call_id")
op.drop_column("tool_call", "turn_number")
op.drop_constraint(
"fk_tool_call_parent_tool_call_id", "tool_call", type_="foreignkey"
)
op.drop_column("tool_call", "parent_tool_call_id")
op.drop_constraint(
"fk_tool_call_parent_chat_message_id", "tool_call", type_="foreignkey"
)
op.alter_column(
"tool_call",
"parent_chat_message_id",
new_column_name="message_id",
nullable=False,
)
op.create_foreign_key(
"tool_call_message_id_fkey",
"tool_call",
"chat_message",
["message_id"],
["id"],
)
op.drop_constraint("fk_tool_call_chat_session_id", "tool_call", type_="foreignkey")
op.drop_column("tool_call", "chat_session_id")
op.create_unique_constraint("uq_tool_call_message_id", "tool_call", ["message_id"])
# Reverse ChatMessage changes
# Note: research_answer_purpose and research_type were originally String columns,
# not Enum types (see migrations 5ae8240accb3 and f8a9b2c3d4e5)
op.add_column(
"chat_message",
sa.Column("research_answer_purpose", sa.String(), nullable=True),
sa.Column(
"research_answer_purpose",
sa.Enum("INTRO", "DEEP_DIVE", name="researchanswerpurpose"),
nullable=True,
),
)
op.add_column(
"chat_message", sa.Column("research_plan", postgresql.JSONB(), nullable=True)
)
op.add_column(
"chat_message",
sa.Column("research_type", sa.String(), nullable=True),
sa.Column(
"research_type",
sa.Enum("SIMPLE", "DEEP", name="researchtype"),
nullable=True,
),
)
op.add_column(
"chat_message",
@@ -280,20 +439,10 @@ def downgrade() -> None:
op.add_column(
"chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
)
# Recreate the FK constraint that was implicitly dropped when the column was dropped
op.create_foreign_key(
"fk_chat_message_persona",
"chat_message",
"persona",
["alternate_assistant_id"],
["id"],
)
op.add_column(
"chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
)
op.drop_column("chat_message", "reasoning_tokens")
op.drop_constraint(
"fk_chat_message_latest_child_message_id", "chat_message", type_="foreignkey"
)
@@ -302,7 +451,6 @@ def downgrade() -> None:
"latest_child_message_id",
new_column_name="latest_child_message",
)
op.drop_constraint(
"fk_chat_message_parent_message_id", "chat_message", type_="foreignkey"
)
@@ -411,7 +559,6 @@ def downgrade() -> None:
sa.Column("queries", postgresql.JSONB(), nullable=True),
sa.Column("generated_images", postgresql.JSONB(), nullable=True),
sa.Column("additional_data", postgresql.JSONB(), nullable=True),
sa.Column("file_ids", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(
["primary_question_id", "iteration_nr"],
[

View File

@@ -1,46 +0,0 @@
"""Drop milestone table
Revision ID: b8c9d0e1f2a3
Revises: a2b3c4d5e6f7
Create Date: 2025-12-18
"""
from alembic import op
import sqlalchemy as sa
import fastapi_users_db_sqlalchemy
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = "b8c9d0e1f2a3"
down_revision = "a2b3c4d5e6f7"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.drop_table("milestone")
def downgrade() -> None:
op.create_table(
"milestone",
sa.Column("id", sa.UUID(), nullable=False),
sa.Column("tenant_id", sa.String(), nullable=True),
sa.Column(
"user_id",
fastapi_users_db_sqlalchemy.generics.GUID(),
nullable=True,
),
sa.Column("event_type", sa.String(), nullable=False),
sa.Column(
"time_created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
)

View File

@@ -1,52 +0,0 @@
"""add_deep_research_tool
Revision ID: c1d2e3f4a5b6
Revises: b8c9d0e1f2a3
Create Date: 2025-12-18 16:00:00.000000
"""
from alembic import op
from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "c1d2e3f4a5b6"
down_revision = "b8c9d0e1f2a3"
branch_labels = None
depends_on = None
DEEP_RESEARCH_TOOL = {
"name": RESEARCH_AGENT_DB_NAME,
"display_name": "Research Agent",
"description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
"in_code_tool_id": "ResearchAgent",
}
def upgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
VALUES (:name, :display_name, :description, :in_code_tool_id, false)
"""
),
DEEP_RESEARCH_TOOL,
)
def downgrade() -> None:
conn = op.get_bind()
conn.execute(
sa.text(
"""
DELETE FROM tool
WHERE in_code_tool_id = :in_code_tool_id
"""
),
{"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
)

View File

@@ -257,8 +257,8 @@ def _migrate_files_to_external_storage() -> None:
print(f"File {file_id} not found in PostgreSQL storage.")
continue
lobj_id = cast(int, file_record.lobj_oid)
file_metadata = cast(Any, file_record.file_metadata)
lobj_id = cast(int, file_record.lobj_oid) # type: ignore
file_metadata = cast(Any, file_record.file_metadata) # type: ignore
# Read file content from PostgreSQL
try:
@@ -280,7 +280,7 @@ def _migrate_files_to_external_storage() -> None:
else:
# Convert other types to dict if possible, otherwise None
try:
file_metadata = dict(file_record.file_metadata)
file_metadata = dict(file_record.file_metadata) # type: ignore
except (TypeError, ValueError):
file_metadata = None

View File

@@ -11,8 +11,8 @@ import sqlalchemy as sa
revision = "e209dc5a8156"
down_revision = "48d14957fe80"
branch_labels = None
depends_on = None
branch_labels = None # type: ignore
depends_on = None # type: ignore
def upgrade() -> None:

View File

@@ -1,115 +0,0 @@
"""add status to mcp server and make auth fields nullable
Revision ID: e8f0d2a38171
Revises: ed9e44312505
Create Date: 2025-11-28 11:15:37.667340
"""
from alembic import op
import sqlalchemy as sa
from onyx.db.enums import (
MCPTransport,
MCPAuthenticationType,
MCPAuthenticationPerformer,
MCPServerStatus,
)
# revision identifiers, used by Alembic.
revision = "e8f0d2a38171"
down_revision = "ed9e44312505"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make auth fields nullable
op.alter_column(
"mcp_server",
"transport",
existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
nullable=True,
)
op.alter_column(
"mcp_server",
"auth_type",
existing_type=sa.Enum(
MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
),
nullable=True,
)
op.alter_column(
"mcp_server",
"auth_performer",
existing_type=sa.Enum(
MCPAuthenticationPerformer,
name="mcp_authentication_performer",
native_enum=False,
),
nullable=True,
)
# Add status column with default
op.add_column(
"mcp_server",
sa.Column(
"status",
sa.Enum(MCPServerStatus, name="mcp_server_status", native_enum=False),
nullable=False,
server_default="CREATED",
),
)
# For existing records, mark status as CONNECTED
bind = op.get_bind()
bind.execute(
sa.text(
"""
UPDATE mcp_server
SET status = 'CONNECTED'
WHERE status != 'CONNECTED'
and admin_connection_config_id IS NOT NULL
"""
)
)
def downgrade() -> None:
# Remove status column
op.drop_column("mcp_server", "status")
# Make auth fields non-nullable (set defaults first)
op.execute(
"UPDATE mcp_server SET transport = 'STREAMABLE_HTTP' WHERE transport IS NULL"
)
op.execute("UPDATE mcp_server SET auth_type = 'NONE' WHERE auth_type IS NULL")
op.execute(
"UPDATE mcp_server SET auth_performer = 'ADMIN' WHERE auth_performer IS NULL"
)
op.alter_column(
"mcp_server",
"transport",
existing_type=sa.Enum(MCPTransport, name="mcp_transport", native_enum=False),
nullable=False,
)
op.alter_column(
"mcp_server",
"auth_type",
existing_type=sa.Enum(
MCPAuthenticationType, name="mcp_authentication_type", native_enum=False
),
nullable=False,
)
op.alter_column(
"mcp_server",
"auth_performer",
existing_type=sa.Enum(
MCPAuthenticationPerformer,
name="mcp_authentication_performer",
native_enum=False,
),
nullable=False,
)

View File

@@ -1,34 +0,0 @@
"""Add icon_name field
Revision ID: ed9e44312505
Revises: 5e6f7a8b9c0d
Create Date: 2025-12-03 16:35:07.828393
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "ed9e44312505"
down_revision = "5e6f7a8b9c0d"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add icon_name column
op.add_column("persona", sa.Column("icon_name", sa.String(), nullable=True))
# Remove old icon columns
op.drop_column("persona", "icon_shape")
op.drop_column("persona", "icon_color")
def downgrade() -> None:
# Re-add old icon columns
op.add_column("persona", sa.Column("icon_color", sa.String(), nullable=True))
op.add_column("persona", sa.Column("icon_shape", sa.Integer(), nullable=True))
# Remove icon_name column
op.drop_column("persona", "icon_name")

View File

@@ -20,9 +20,7 @@ config = context.config
if config.config_file_name is not None and config.attributes.get(
"configure_logger", True
):
# disable_existing_loggers=False prevents breaking pytest's caplog fixture
# See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
fileConfig(config.config_file_name, disable_existing_loggers=False)
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
@@ -84,9 +82,9 @@ def run_migrations_offline() -> None:
def do_run_migrations(connection: Connection) -> None:
context.configure(
connection=connection,
target_metadata=target_metadata, # type: ignore[arg-type]
target_metadata=target_metadata, # type: ignore
include_object=include_object,
)
) # type: ignore
with context.begin_transaction():
context.run_migrations()
@@ -110,24 +108,9 @@ async def run_async_migrations() -> None:
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
"""Run migrations in 'online' mode."""
Supports pytest-alembic by checking for a pre-configured connection
in context.config.attributes["connection"]. If present, uses that
connection/engine directly instead of creating a new async engine.
"""
# Check if pytest-alembic is providing a connection/engine
connectable = context.config.attributes.get("connection", None)
if connectable is not None:
# pytest-alembic is providing an engine - use it directly
with connectable.connect() as connection:
do_run_migrations(connection)
# Commit to ensure changes are visible to next migration
connection.commit()
else:
# Normal operation - use async migrations
asyncio.run(run_async_migrations())
asyncio.run(run_async_migrations())
if context.is_offline_mode():

View File

@@ -1,15 +1,11 @@
group "default" {
targets = ["backend", "model-server", "web"]
targets = ["backend", "model-server"]
}
variable "BACKEND_REPOSITORY" {
default = "onyxdotapp/onyx-backend"
}
variable "WEB_SERVER_REPOSITORY" {
default = "onyxdotapp/onyx-web-server"
}
variable "MODEL_SERVER_REPOSITORY" {
default = "onyxdotapp/onyx-model-server"
}
@@ -23,7 +19,7 @@ variable "TAG" {
}
target "backend" {
context = "backend"
context = "."
dockerfile = "Dockerfile"
cache-from = ["type=registry,ref=${BACKEND_REPOSITORY}:latest"]
@@ -32,18 +28,8 @@ target "backend" {
tags = ["${BACKEND_REPOSITORY}:${TAG}"]
}
target "web" {
context = "web"
dockerfile = "Dockerfile"
cache-from = ["type=registry,ref=${WEB_SERVER_REPOSITORY}:latest"]
cache-to = ["type=inline"]
tags = ["${WEB_SERVER_REPOSITORY}:${TAG}"]
}
target "model-server" {
context = "backend"
context = "."
dockerfile = "Dockerfile.model_server"
@@ -54,7 +40,7 @@ target "model-server" {
}
target "integration" {
context = "backend"
context = "."
dockerfile = "tests/integration/Dockerfile"
// Provide the base image via build context from the backend target

View File

@@ -41,10 +41,6 @@ CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
JIRA_PERMISSION_DOC_SYNC_FREQUENCY = int(
os.environ.get("JIRA_PERMISSION_DOC_SYNC_FREQUENCY") or 30 * 60
)
# In seconds, default is 30 minutes
JIRA_PERMISSION_GROUP_SYNC_FREQUENCY = int(
os.environ.get("JIRA_PERMISSION_GROUP_SYNC_FREQUENCY") or 30 * 60
)
#####

View File

@@ -118,6 +118,6 @@ def fetch_document_sets(
.all()
)
document_set_with_cc_pairs.append((document_set, cc_pairs))
document_set_with_cc_pairs.append((document_set, cc_pairs)) # type: ignore
return document_set_with_cc_pairs

View File

@@ -1,278 +0,0 @@
"""Database and cache operations for the license table."""
from datetime import datetime
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import Session
from ee.onyx.server.license.models import LicenseMetadata
from ee.onyx.server.license.models import LicensePayload
from ee.onyx.server.license.models import LicenseSource
from onyx.db.models import License
from onyx.db.models import User
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
LICENSE_METADATA_KEY = "license:metadata"
LICENSE_CACHE_TTL_SECONDS = 86400 # 24 hours
# -----------------------------------------------------------------------------
# Database CRUD Operations
# -----------------------------------------------------------------------------
def get_license(db_session: Session) -> License | None:
"""
Get the current license (singleton pattern - only one row).
Args:
db_session: Database session
Returns:
License object if exists, None otherwise
"""
return db_session.execute(select(License)).scalars().first()
def upsert_license(db_session: Session, license_data: str) -> License:
"""
Insert or update the license (singleton pattern).
Args:
db_session: Database session
license_data: Base64-encoded signed license blob
Returns:
The created or updated License object
"""
existing = get_license(db_session)
if existing:
existing.license_data = license_data
db_session.commit()
db_session.refresh(existing)
logger.info("License updated")
return existing
new_license = License(license_data=license_data)
db_session.add(new_license)
db_session.commit()
db_session.refresh(new_license)
logger.info("License created")
return new_license
def delete_license(db_session: Session) -> bool:
"""
Delete the current license.
Args:
db_session: Database session
Returns:
True if deleted, False if no license existed
"""
existing = get_license(db_session)
if existing:
db_session.delete(existing)
db_session.commit()
logger.info("License deleted")
return True
return False
# -----------------------------------------------------------------------------
# Seat Counting
# -----------------------------------------------------------------------------
def get_used_seats(tenant_id: str | None = None) -> int:
"""
Get current seat usage.
For multi-tenant: counts users in UserTenantMapping for this tenant.
For self-hosted: counts all active users (includes both Onyx UI users
and Slack users who have been converted to Onyx users).
"""
if MULTI_TENANT:
from ee.onyx.server.tenants.user_mapping import get_tenant_count
return get_tenant_count(tenant_id or get_current_tenant_id())
else:
# Self-hosted: count all active users (Onyx + converted Slack users)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
with get_session_with_current_tenant() as db_session:
result = db_session.execute(
select(func.count()).select_from(User).where(User.is_active) # type: ignore
)
return result.scalar() or 0
# -----------------------------------------------------------------------------
# Redis Cache Operations
# -----------------------------------------------------------------------------
def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
"""
Get license metadata from Redis cache.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
Returns:
LicenseMetadata if cached, None otherwise
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_replica_client(tenant_id=tenant)
cached = redis_client.get(LICENSE_METADATA_KEY)
if cached:
try:
cached_str: str
if isinstance(cached, bytes):
cached_str = cached.decode("utf-8")
else:
cached_str = str(cached)
return LicenseMetadata.model_validate_json(cached_str)
except Exception as e:
logger.warning(f"Failed to parse cached license metadata: {e}")
return None
return None
def invalidate_license_cache(tenant_id: str | None = None) -> None:
"""
Invalidate the license metadata cache (not the license itself).
This deletes the cached LicenseMetadata from Redis. The actual license
in the database is not affected. Redis delete is idempotent - if the
key doesn't exist, this is a no-op.
Args:
tenant_id: Tenant ID (for multi-tenant deployments)
"""
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
redis_client.delete(LICENSE_METADATA_KEY)
logger.info("License cache invalidated")
def update_license_cache(
payload: LicensePayload,
source: LicenseSource | None = None,
grace_period_end: datetime | None = None,
tenant_id: str | None = None,
) -> LicenseMetadata:
"""
Update the Redis cache with license metadata.
We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
1. Frontend needs status to show appropriate UI/banners
2. Caching avoids repeated DB + crypto verification on every request
3. Status enforcement happens at the feature level, not here
Args:
payload: Verified license payload
source: How the license was obtained
grace_period_end: Optional grace period end time
tenant_id: Tenant ID (for multi-tenant deployments)
Returns:
The cached LicenseMetadata
"""
from ee.onyx.utils.license import get_license_status
tenant = tenant_id or get_current_tenant_id()
redis_client = get_redis_client(tenant_id=tenant)
used_seats = get_used_seats(tenant)
status = get_license_status(payload, grace_period_end)
metadata = LicenseMetadata(
tenant_id=payload.tenant_id,
organization_name=payload.organization_name,
seats=payload.seats,
used_seats=used_seats,
plan_type=payload.plan_type,
issued_at=payload.issued_at,
expires_at=payload.expires_at,
grace_period_end=grace_period_end,
status=status,
source=source,
stripe_subscription_id=payload.stripe_subscription_id,
)
redis_client.setex(
LICENSE_METADATA_KEY,
LICENSE_CACHE_TTL_SECONDS,
metadata.model_dump_json(),
)
logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
return metadata
def refresh_license_cache(
db_session: Session,
tenant_id: str | None = None,
) -> LicenseMetadata | None:
"""
Refresh the license cache from the database.
Args:
db_session: Database session
tenant_id: Tenant ID (for multi-tenant deployments)
Returns:
LicenseMetadata if license exists, None otherwise
"""
from ee.onyx.utils.license import verify_license_signature
license_record = get_license(db_session)
if not license_record:
invalidate_license_cache(tenant_id)
return None
try:
payload = verify_license_signature(license_record.license_data)
return update_license_cache(
payload,
source=LicenseSource.AUTO_FETCH,
tenant_id=tenant_id,
)
except ValueError as e:
logger.error(f"Failed to verify license during cache refresh: {e}")
invalidate_license_cache(tenant_id)
return None
def get_license_metadata(
db_session: Session,
tenant_id: str | None = None,
) -> LicenseMetadata | None:
"""
Get license metadata, using cache if available.
Args:
db_session: Database session
tenant_id: Tenant ID (for multi-tenant deployments)
Returns:
LicenseMetadata if license exists, None otherwise
"""
# Try cache first
cached = get_cached_license_metadata(tenant_id)
if cached:
return cached
# Refresh from database
return refresh_license_cache(db_session, tenant_id)

View File

@@ -8,7 +8,6 @@ from sqlalchemy import func
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import Session
from ee.onyx.server.user_group.models import SetCuratorRequest
@@ -363,29 +362,14 @@ def _check_user_group_is_modifiable(user_group: UserGroup) -> None:
def _add_user__user_group_relationships__no_commit(
db_session: Session, user_group_id: int, user_ids: list[UUID]
) -> None:
"""NOTE: does not commit the transaction.
This function is idempotent - it will skip users who are already in the group
to avoid duplicate key violations during concurrent operations or re-syncs.
Uses ON CONFLICT DO NOTHING to keep inserts atomic under concurrency.
"""
if not user_ids:
return
insert_stmt = (
insert(User__UserGroup)
.values(
[
{"user_id": user_id, "user_group_id": user_group_id}
for user_id in user_ids
]
)
.on_conflict_do_nothing(
index_elements=[User__UserGroup.user_group_id, User__UserGroup.user_id]
)
)
db_session.execute(insert_stmt)
) -> list[User__UserGroup]:
"""NOTE: does not commit the transaction."""
relationships = [
User__UserGroup(user_id=user_id, user_group_id=user_group_id)
for user_id in user_ids
]
db_session.add_all(relationships)
return relationships
def _add_user_group__cc_pair_relationships__no_commit(

View File

@@ -3,15 +3,12 @@ from collections.abc import Generator
from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from onyx.background.error_logging import emit_background_error
from onyx.configs.app_configs import CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC
from onyx.connectors.confluence.onyx_confluence import (
get_user_email_from_username__server,
)
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.models import ConnectorCredentialPair
from onyx.db.users import get_all_users
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -22,7 +19,7 @@ def _build_group_member_email_map(
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
for user in confluence_client.paginated_cql_user_retrieval():
logger.info(f"Processing groups for user: {user}")
logger.debug(f"Processing groups for user: {user}")
email = user.email
if not email:
@@ -34,8 +31,6 @@ def _build_group_member_email_map(
confluence_client=confluence_client,
user_name=user_name,
)
else:
logger.error(f"user result missing username field: {user}")
if not email:
# If we still don't have an email, skip this user
@@ -69,92 +64,6 @@ def _build_group_member_email_map(
return group_member_emails
def _build_group_member_email_map_from_onyx_users(
confluence_client: OnyxConfluence,
) -> dict[str, set[str]]:
"""Hacky, but it's the only way to do this as long as the
Confluence APIs are broken.
This is fixed in Confluence Data Center 10.1.0, so first choice
is to tell users to upgrade to 10.1.0.
https://jira.atlassian.com/browse/CONFSERVER-95999
"""
with get_session_with_current_tenant() as db_session:
# don't include external since they are handled by the "through confluence"
# user fetching mechanism
user_emails = [
user.email for user in get_all_users(db_session, include_external=False)
]
def _infer_username_from_email(email: str) -> str:
return email.split("@")[0]
group_member_emails: dict[str, set[str]] = {}
for email in user_emails:
logger.info(f"Processing groups for user with email: {email}")
try:
user_name = _infer_username_from_email(email)
response = confluence_client.get_user_details_by_username(user_name)
user_key = response.get("userKey")
if not user_key:
logger.error(f"User key not found for user with email {email}")
continue
all_users_groups: set[str] = set()
for group in confluence_client.paginated_groups_by_user_retrieval(user_key):
# group name uniqueness is enforced by Confluence, so we can use it as a group ID
group_id = group["name"]
group_member_emails.setdefault(group_id, set()).add(email)
all_users_groups.add(group_id)
if not all_users_groups:
msg = f"No groups found for user with email: {email}"
logger.error(msg)
else:
logger.info(
f"Found groups {all_users_groups} for user with email {email}"
)
except Exception:
logger.exception(f"Error getting user details for user with email {email}")
return group_member_emails
def _build_final_group_to_member_email_map(
confluence_client: OnyxConfluence,
cc_pair_id: int,
# if set, will infer confluence usernames from onyx users in addition to using the
# confluence users API. This is a hacky workaround for the fact that the Confluence
# users API is broken before Confluence Data Center 10.1.0.
use_onyx_users: bool = CONFLUENCE_USE_ONYX_USERS_FOR_GROUP_SYNC,
) -> dict[str, set[str]]:
group_to_member_email_map = _build_group_member_email_map(
confluence_client=confluence_client,
cc_pair_id=cc_pair_id,
)
group_to_member_email_map_from_onyx_users = (
(
_build_group_member_email_map_from_onyx_users(
confluence_client=confluence_client,
)
)
if use_onyx_users
else {}
)
all_group_ids = set(group_to_member_email_map.keys()) | set(
group_to_member_email_map_from_onyx_users.keys()
)
final_group_to_member_email_map = {}
for group_id in all_group_ids:
group_member_emails = group_to_member_email_map.get(
group_id, set()
) | group_to_member_email_map_from_onyx_users.get(group_id, set())
final_group_to_member_email_map[group_id] = group_member_emails
return final_group_to_member_email_map
def confluence_group_sync(
tenant_id: str,
cc_pair: ConnectorCredentialPair,
@@ -178,12 +87,13 @@ def confluence_group_sync(
confluence_client._probe_connection(**probe_kwargs)
confluence_client._initialize_connection(**final_kwargs)
group_to_member_email_map = _build_final_group_to_member_email_map(
confluence_client, cc_pair.id
group_member_email_map = _build_group_member_email_map(
confluence_client=confluence_client,
cc_pair_id=cc_pair.id,
)
all_found_emails = set()
for group_id, group_member_emails in group_to_member_email_map.items():
for group_id, group_member_emails in group_member_email_map.items():
yield (
ExternalUserGroup(
id=group_id,

View File

@@ -1,136 +0,0 @@
from collections.abc import Generator
from jira import JIRA
from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.connectors.jira.utils import build_jira_client
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger
logger = setup_logger()
def _get_jira_group_members_email(
jira_client: JIRA,
group_name: str,
) -> list[str]:
"""Get all member emails for a Jira group.
Filters out app accounts (bots, integrations) and only returns real user emails.
"""
emails: list[str] = []
try:
# group_members returns an OrderedDict of account_id -> member_info
members = jira_client.group_members(group=group_name)
if not members:
logger.warning(f"No members found for group {group_name}")
return emails
for account_id, member_info in members.items():
# member_info is a dict with keys like 'fullname', 'email', 'active'
email = member_info.get("email")
# Skip "hidden" emails - these are typically app accounts
if email and email != "hidden":
emails.append(email)
else:
# For cloud, we might need to fetch user details separately
try:
user = jira_client.user(id=account_id)
# Skip app accounts (bots, integrations, etc.)
if hasattr(user, "accountType") and user.accountType == "app":
logger.info(
f"Skipping app account {account_id} for group {group_name}"
)
continue
if hasattr(user, "emailAddress") and user.emailAddress:
emails.append(user.emailAddress)
else:
logger.warning(f"User {account_id} has no email address")
except Exception as e:
logger.warning(
f"Could not fetch email for user {account_id} in group {group_name}: {e}"
)
except Exception as e:
logger.error(f"Error fetching members for group {group_name}: {e}")
return emails
def _build_group_member_email_map(
jira_client: JIRA,
) -> dict[str, set[str]]:
"""Build a map of group names to member emails."""
group_member_emails: dict[str, set[str]] = {}
try:
# Get all groups from Jira - returns a list of group name strings
group_names = jira_client.groups()
if not group_names:
logger.warning("No groups found in Jira")
return group_member_emails
logger.info(f"Found {len(group_names)} groups in Jira")
for group_name in group_names:
if not group_name:
continue
member_emails = _get_jira_group_members_email(
jira_client=jira_client,
group_name=group_name,
)
if member_emails:
group_member_emails[group_name] = set(member_emails)
logger.debug(
f"Found {len(member_emails)} members for group {group_name}"
)
else:
logger.debug(f"No members found for group {group_name}")
except Exception as e:
logger.error(f"Error building group member email map: {e}")
return group_member_emails
def jira_group_sync(
tenant_id: str,
cc_pair: ConnectorCredentialPair,
) -> Generator[ExternalUserGroup, None, None]:
"""
Sync Jira groups and their members.
This function fetches all groups from Jira and yields ExternalUserGroup
objects containing the group ID and member emails.
"""
jira_base_url = cc_pair.connector.connector_specific_config.get("jira_base_url", "")
scoped_token = cc_pair.connector.connector_specific_config.get(
"scoped_token", False
)
if not jira_base_url:
raise ValueError("No jira_base_url found in connector config")
jira_client = build_jira_client(
credentials=cc_pair.credential.credential_json,
jira_base=jira_base_url,
scoped_token=scoped_token,
)
group_member_email_map = _build_group_member_email_map(jira_client=jira_client)
if not group_member_email_map:
raise ValueError(f"No groups with members found for cc_pair_id={cc_pair.id}")
for group_id, group_member_emails in group_member_email_map.items():
yield ExternalUserGroup(
id=group_id,
user_emails=list(group_member_emails),
)

View File

@@ -16,10 +16,6 @@ HolderMap = dict[str, list[Holder]]
logger = setup_logger()
def _get_role_id(holder: Holder) -> str | None:
return holder.get("value") or holder.get("parameter")
def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
"""
A "Holder" in JIRA is a person / entity who "holds" the corresponding permission.
@@ -114,137 +110,80 @@ def _get_user_emails(user_holders: list[Holder]) -> list[str]:
return emails
def _get_user_emails_and_groups_from_project_roles(
def _get_user_emails_from_project_roles(
jira_client: JIRA,
jira_project: str,
project_role_holders: list[Holder],
) -> tuple[list[str], list[str]]:
"""
Get user emails and group names from project roles.
Returns a tuple of (emails, group_names).
"""
# Get role IDs - Cloud uses "value", Data Center uses "parameter"
role_ids = []
for holder in project_role_holders:
role_id = _get_role_id(holder)
if role_id:
role_ids.append(role_id)
else:
logger.warning(f"No value or parameter in projectRole holder: {holder}")
) -> list[str]:
# NOTE (@raunakab) a `parallel_yield` may be helpful here...?
roles = [
jira_client.project_role(project=jira_project, id=role_id)
for role_id in role_ids
jira_client.project_role(project=jira_project, id=project_role_holder["value"])
for project_role_holder in project_role_holders
if "value" in project_role_holder
]
emails = []
groups = []
for role in roles:
if not hasattr(role, "actors"):
logger.warning(f"Project role {role} has no actors attribute")
continue
for actor in role.actors:
# Handle group actors
if hasattr(actor, "actorGroup"):
group_name = getattr(actor.actorGroup, "name", None) or getattr(
actor.actorGroup, "displayName", None
)
if group_name:
groups.append(group_name)
if not hasattr(actor, "actorUser") or not hasattr(
actor.actorUser, "accountId"
):
continue
# Handle user actors
if hasattr(actor, "actorUser"):
account_id = getattr(actor.actorUser, "accountId", None)
if not account_id:
logger.error(f"No accountId in actorUser: {actor.actorUser}")
continue
user = jira_client.user(id=account_id)
if not hasattr(user, "accountType") or user.accountType != "atlassian":
logger.info(
f"Skipping user {account_id} because it is not an atlassian user"
)
continue
if not hasattr(user, "emailAddress"):
msg = f"User's email address was not able to be retrieved; {actor.actorUser.accountId=}"
if hasattr(user, "displayName"):
msg += f" {actor.displayName=}"
logger.warning(msg)
continue
emails.append(user.emailAddress)
user = jira_client.user(id=actor.actorUser.accountId)
if not hasattr(user, "accountType") or user.accountType != "atlassian":
continue
logger.debug(f"Skipping actor type: {actor}")
if not hasattr(user, "emailAddress"):
msg = f"User's email address was not able to be retrieved; {actor.actorUser.accountId=}"
if hasattr(user, "displayName"):
msg += f" {actor.displayName=}"
logger.warn(msg)
continue
return emails, groups
emails.append(user.emailAddress)
return emails
def _build_external_access_from_holder_map(
jira_client: JIRA, jira_project: str, holder_map: HolderMap
) -> ExternalAccess:
"""
Build ExternalAccess from the holder map.
Holder types handled:
- "anyone": Public project, anyone can access
- "applicationRole": All users with a Jira license can access (treated as public)
- "user": Specific users with access
- "projectRole": Project roles containing users and/or groups
- "group": Groups directly assigned in the permission scheme
# Note:
If the `holder_map` contains an instance of "anyone", then this is a public JIRA project.
Otherwise, we fetch the "projectRole"s (i.e., the user-groups in JIRA speak), and the user emails.
"""
# Public access - anyone can view
if "anyone" in holder_map:
return ExternalAccess(
external_user_emails=set(), external_user_group_ids=set(), is_public=True
)
# applicationRole means all users with a Jira license can access - treat as public
if "applicationRole" in holder_map:
return ExternalAccess(
external_user_emails=set(), external_user_group_ids=set(), is_public=True
)
# Get emails from explicit user holders
user_emails = (
_get_user_emails(user_holders=holder_map["user"])
if "user" in holder_map
else []
)
# Get emails and groups from project roles
project_role_user_emails: list[str] = []
project_role_groups: list[str] = []
if "projectRole" in holder_map:
project_role_user_emails, project_role_groups = (
_get_user_emails_and_groups_from_project_roles(
jira_client=jira_client,
jira_project=jira_project,
project_role_holders=holder_map["projectRole"],
)
project_role_user_emails = (
_get_user_emails_from_project_roles(
jira_client=jira_client,
jira_project=jira_project,
project_role_holders=holder_map["projectRole"],
)
# Get groups directly assigned in permission scheme (common in Data Center)
# Format: {'type': 'group', 'parameter': 'group-name', 'expand': 'group'}
direct_groups: list[str] = []
if "group" in holder_map:
for group_holder in holder_map["group"]:
group_name = _get_role_id(group_holder)
if group_name:
direct_groups.append(group_name)
else:
logger.error(f"No parameter/value in group holder: {group_holder}")
if "projectRole" in holder_map
else []
)
external_user_emails = set(user_emails + project_role_user_emails)
external_user_group_ids = set(project_role_groups + direct_groups)
return ExternalAccess(
external_user_emails=external_user_emails,
external_user_group_ids=external_user_group_ids,
external_user_group_ids=set(),
is_public=False,
)
@@ -258,11 +197,9 @@ def get_project_permissions(
)
if not hasattr(project_permissions, "permissions"):
logger.error(f"Project {jira_project} has no permissions attribute")
return None
if not isinstance(project_permissions.permissions, list):
logger.error(f"Project {jira_project} permissions is not a list")
return None
holder_map = _build_holder_map(permissions=project_permissions.permissions)

View File

@@ -15,7 +15,6 @@ from ee.onyx.db.external_perm import ExternalUserGroup
from onyx.access.models import ExternalAccess
from onyx.access.utils import build_ext_group_name_for_onyx
from onyx.configs.constants import DocumentSource
from onyx.connectors.sharepoint.connector import SHARED_DOCUMENTS_MAP_REVERSE
from onyx.connectors.sharepoint.connector import sleep_and_retry
from onyx.utils.logger import setup_logger
@@ -512,8 +511,8 @@ def get_external_access_from_sharepoint(
f"Failed to get SharePoint list item ID for item {drive_item.id}"
)
if drive_name in SHARED_DOCUMENTS_MAP_REVERSE:
drive_name = SHARED_DOCUMENTS_MAP_REVERSE[drive_name]
if drive_name == "Shared Documents":
drive_name = "Documents"
item = client_context.web.lists.get_by_title(drive_name).items.get_by_id(
item_id

View File

@@ -11,7 +11,6 @@ from ee.onyx.configs.app_configs import GITHUB_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GITHUB_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import GOOGLE_DRIVE_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import JIRA_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_DOC_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SHAREPOINT_PERMISSION_GROUP_SYNC_FREQUENCY
from ee.onyx.configs.app_configs import SLACK_PERMISSION_DOC_SYNC_FREQUENCY
@@ -24,7 +23,6 @@ from ee.onyx.external_permissions.gmail.doc_sync import gmail_doc_sync
from ee.onyx.external_permissions.google_drive.doc_sync import gdrive_doc_sync
from ee.onyx.external_permissions.google_drive.group_sync import gdrive_group_sync
from ee.onyx.external_permissions.jira.doc_sync import jira_doc_sync
from ee.onyx.external_permissions.jira.group_sync import jira_group_sync
from ee.onyx.external_permissions.perm_sync_types import CensoringFuncType
from ee.onyx.external_permissions.perm_sync_types import DocSyncFuncType
from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsFunction
@@ -112,11 +110,6 @@ _SOURCE_TO_SYNC_CONFIG: dict[DocumentSource, SyncConfig] = {
doc_sync_func=jira_doc_sync,
initial_index_should_sync=True,
),
group_sync_config=GroupSyncConfig(
group_sync_frequency=JIRA_PERMISSION_GROUP_SYNC_FREQUENCY,
group_sync_func=jira_group_sync,
group_sync_is_cc_pair_agnostic=True,
),
),
# Groups are not needed for Slack.
# All channel access is done at the individual user level.

View File

@@ -14,7 +14,6 @@ from ee.onyx.server.enterprise_settings.api import (
basic_router as enterprise_settings_router,
)
from ee.onyx.server.evals.api import router as evals_router
from ee.onyx.server.license.api import router as license_router
from ee.onyx.server.manage.standard_answer import router as standard_answer_router
from ee.onyx.server.middleware.tenant_tracking import (
add_api_server_tenant_id_middleware,
@@ -140,8 +139,6 @@ def get_application() -> FastAPI:
)
include_router_with_global_prefix_prepended(application, enterprise_settings_router)
include_router_with_global_prefix_prepended(application, usage_export_router)
# License management
include_router_with_global_prefix_prepended(application, license_router)
if MULTI_TENANT:
# Tenant management

View File

@@ -1,246 +0,0 @@
"""License API endpoints."""
import requests
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import HTTPException
from fastapi import UploadFile
from sqlalchemy.orm import Session
from ee.onyx.auth.users import current_admin_user
from ee.onyx.db.license import delete_license as db_delete_license
from ee.onyx.db.license import get_license_metadata
from ee.onyx.db.license import invalidate_license_cache
from ee.onyx.db.license import refresh_license_cache
from ee.onyx.db.license import update_license_cache
from ee.onyx.db.license import upsert_license
from ee.onyx.server.license.models import LicenseResponse
from ee.onyx.server.license.models import LicenseSource
from ee.onyx.server.license.models import LicenseStatusResponse
from ee.onyx.server.license.models import LicenseUploadResponse
from ee.onyx.server.license.models import SeatUsageResponse
from ee.onyx.server.tenants.access import generate_data_plane_token
from ee.onyx.utils.license import verify_license_signature
from onyx.auth.users import User
from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
from onyx.db.engine.sql_engine import get_session
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
router = APIRouter(prefix="/license")
@router.get("")
async def get_license_status(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> LicenseStatusResponse:
"""Get current license status and seat usage."""
metadata = get_license_metadata(db_session)
if not metadata:
return LicenseStatusResponse(has_license=False)
return LicenseStatusResponse(
has_license=True,
seats=metadata.seats,
used_seats=metadata.used_seats,
plan_type=metadata.plan_type,
issued_at=metadata.issued_at,
expires_at=metadata.expires_at,
grace_period_end=metadata.grace_period_end,
status=metadata.status,
source=metadata.source,
)
@router.get("/seats")
async def get_seat_usage(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> SeatUsageResponse:
"""Get detailed seat usage information."""
metadata = get_license_metadata(db_session)
if not metadata:
return SeatUsageResponse(
total_seats=0,
used_seats=0,
available_seats=0,
)
return SeatUsageResponse(
total_seats=metadata.seats,
used_seats=metadata.used_seats,
available_seats=max(0, metadata.seats - metadata.used_seats),
)
@router.post("/fetch")
async def fetch_license(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> LicenseResponse:
"""
Fetch license from control plane.
Used after Stripe checkout completion to retrieve the new license.
"""
tenant_id = get_current_tenant_id()
try:
token = generate_data_plane_token()
except ValueError as e:
logger.error(f"Failed to generate data plane token: {e}")
raise HTTPException(
status_code=500, detail="Authentication configuration error"
)
try:
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
url = f"{CONTROL_PLANE_API_BASE_URL}/license/{tenant_id}"
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
if not isinstance(data, dict) or "license" not in data:
raise HTTPException(
status_code=502, detail="Invalid response from control plane"
)
license_data = data["license"]
if not license_data:
raise HTTPException(status_code=404, detail="No license found")
# Verify signature before persisting
payload = verify_license_signature(license_data)
# Verify the fetched license is for this tenant
if payload.tenant_id != tenant_id:
logger.error(
f"License tenant mismatch: expected {tenant_id}, got {payload.tenant_id}"
)
raise HTTPException(
status_code=400,
detail="License tenant ID mismatch - control plane returned wrong license",
)
# Persist to DB and update cache atomically
upsert_license(db_session, license_data)
try:
update_license_cache(payload, source=LicenseSource.AUTO_FETCH)
except Exception as cache_error:
# Log but don't fail - DB is source of truth, cache will refresh on next read
logger.warning(f"Failed to update license cache: {cache_error}")
return LicenseResponse(success=True, license=payload)
except requests.HTTPError as e:
status_code = e.response.status_code if e.response is not None else 502
logger.error(f"Control plane returned error: {status_code}")
raise HTTPException(
status_code=status_code,
detail="Failed to fetch license from control plane",
)
except ValueError as e:
logger.error(f"License verification failed: {type(e).__name__}")
raise HTTPException(status_code=400, detail=str(e))
except requests.RequestException:
logger.exception("Failed to fetch license from control plane")
raise HTTPException(
status_code=502, detail="Failed to connect to control plane"
)
@router.post("/upload")
async def upload_license(
license_file: UploadFile = File(...),
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> LicenseUploadResponse:
"""
Upload a license file manually.
Used for air-gapped deployments where control plane is not accessible.
"""
try:
content = await license_file.read()
license_data = content.decode("utf-8").strip()
except UnicodeDecodeError:
raise HTTPException(status_code=400, detail="Invalid license file format")
try:
payload = verify_license_signature(license_data)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
tenant_id = get_current_tenant_id()
if payload.tenant_id != tenant_id:
raise HTTPException(
status_code=400,
detail=f"License tenant ID mismatch. Expected {tenant_id}, got {payload.tenant_id}",
)
# Persist to DB and update cache
upsert_license(db_session, license_data)
try:
update_license_cache(payload, source=LicenseSource.MANUAL_UPLOAD)
except Exception as cache_error:
# Log but don't fail - DB is source of truth, cache will refresh on next read
logger.warning(f"Failed to update license cache: {cache_error}")
return LicenseUploadResponse(
success=True,
message=f"License uploaded successfully. {payload.seats} seats, expires {payload.expires_at.date()}",
)
@router.post("/refresh")
async def refresh_license_cache_endpoint(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> LicenseStatusResponse:
"""
Force refresh the license cache from the database.
Useful after manual database changes or to verify license validity.
"""
metadata = refresh_license_cache(db_session)
if not metadata:
return LicenseStatusResponse(has_license=False)
return LicenseStatusResponse(
has_license=True,
seats=metadata.seats,
used_seats=metadata.used_seats,
plan_type=metadata.plan_type,
issued_at=metadata.issued_at,
expires_at=metadata.expires_at,
grace_period_end=metadata.grace_period_end,
status=metadata.status,
source=metadata.source,
)
@router.delete("")
async def delete_license(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> dict[str, bool]:
"""
Delete the current license.
Admin only - removes license and invalidates cache.
"""
# Invalidate cache first - if DB delete fails, stale cache is worse than no cache
try:
invalidate_license_cache()
except Exception as cache_error:
logger.warning(f"Failed to invalidate license cache: {cache_error}")
deleted = db_delete_license(db_session)
return {"deleted": deleted}

View File

@@ -1,92 +0,0 @@
from datetime import datetime
from enum import Enum
from pydantic import BaseModel
from onyx.server.settings.models import ApplicationStatus
class PlanType(str, Enum):
MONTHLY = "monthly"
ANNUAL = "annual"
class LicenseSource(str, Enum):
AUTO_FETCH = "auto_fetch"
MANUAL_UPLOAD = "manual_upload"
class LicensePayload(BaseModel):
"""The payload portion of a signed license."""
version: str
tenant_id: str
organization_name: str | None = None
issued_at: datetime
expires_at: datetime
seats: int
plan_type: PlanType
billing_cycle: str | None = None
grace_period_days: int = 30
stripe_subscription_id: str | None = None
stripe_customer_id: str | None = None
class LicenseData(BaseModel):
"""Full signed license structure."""
payload: LicensePayload
signature: str
class LicenseMetadata(BaseModel):
"""Cached license metadata stored in Redis."""
tenant_id: str
organization_name: str | None = None
seats: int
used_seats: int
plan_type: PlanType
issued_at: datetime
expires_at: datetime
grace_period_end: datetime | None = None
status: ApplicationStatus
source: LicenseSource | None = None
stripe_subscription_id: str | None = None
class LicenseStatusResponse(BaseModel):
"""Response for license status API."""
has_license: bool
seats: int = 0
used_seats: int = 0
plan_type: PlanType | None = None
issued_at: datetime | None = None
expires_at: datetime | None = None
grace_period_end: datetime | None = None
status: ApplicationStatus | None = None
source: LicenseSource | None = None
class LicenseResponse(BaseModel):
"""Response after license fetch/upload."""
success: bool
message: str | None = None
license: LicensePayload | None = None
class LicenseUploadResponse(BaseModel):
"""Response after license upload."""
success: bool
message: str | None = None
class SeatUsageResponse(BaseModel):
"""Response for seat usage API."""
total_seats: int
used_seats: int
available_seats: int

View File

@@ -8,10 +8,12 @@ from ee.onyx.server.query_and_chat.models import (
BasicCreateChatMessageWithHistoryRequest,
)
from onyx.auth.users import current_user
from onyx.chat.chat_utils import combine_message_thread
from onyx.chat.chat_utils import create_chat_history_chain
from onyx.chat.models import ChatBasicResponse
from onyx.chat.process_message import gather_stream
from onyx.chat.process_message import stream_chat_message_objects
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from onyx.configs.constants import MessageType
from onyx.context.search.models import OptionalSearchSetting
from onyx.context.search.models import RetrievalDetails
@@ -20,8 +22,9 @@ from onyx.db.chat import create_new_chat_message
from onyx.db.chat import get_or_create_root_message
from onyx.db.engine.sql_engine import get_session
from onyx.db.models import User
from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llms_for_persona
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.secondary_llm_flows.query_expansion import thread_based_query_rephrase
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.utils.logger import setup_logger
@@ -66,9 +69,9 @@ def handle_simplified_chat_message(
chat_session_id = chat_message_req.chat_session_id
try:
parent_message = create_chat_history_chain(
parent_message, _ = create_chat_history_chain(
chat_session_id=chat_session_id, db_session=db_session
)[-1]
)
except Exception:
parent_message = get_or_create_root_message(
chat_session_id=chat_session_id, db_session=db_session
@@ -100,6 +103,7 @@ def handle_simplified_chat_message(
chunks_below=0,
full_doc=chat_message_req.full_doc,
structured_response_format=chat_message_req.structured_response_format,
use_agentic_search=chat_message_req.use_agentic_search,
)
packets = stream_chat_message_objects(
@@ -157,13 +161,15 @@ def handle_send_message_simple_with_history(
persona_id=req.persona_id,
)
llm = get_llm_for_persona(persona=chat_session.persona, user=user)
llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)
llm_tokenizer = get_tokenizer(
model_name=llm.config.model_name,
provider_type=llm.config.model_provider,
)
max_history_tokens = int(llm.config.max_input_tokens * CHAT_TARGET_CHUNK_PERCENTAGE)
# Every chat Session begins with an empty root message
root_message = get_or_create_root_message(
chat_session_id=chat_session.id, db_session=db_session
@@ -182,6 +188,17 @@ def handle_send_message_simple_with_history(
)
db_session.commit()
history_str = combine_message_thread(
messages=msg_history,
max_tokens=max_history_tokens,
llm_tokenizer=llm_tokenizer,
)
rephrased_query = req.query_override or thread_based_query_rephrase(
user_query=query,
history_str=history_str,
)
if req.retrieval_options is None and req.search_doc_ids is None:
retrieval_options: RetrievalDetails | None = RetrievalDetails(
run_search=OptionalSearchSetting.ALWAYS,
@@ -199,11 +216,12 @@ def handle_send_message_simple_with_history(
retrieval_options=retrieval_options,
# Simple API does not support reranking, hide complexity from user
rerank_settings=None,
query_override=None,
query_override=rephrased_query,
chunks_above=0,
chunks_below=0,
full_doc=req.full_doc,
structured_response_format=req.structured_response_format,
use_agentic_search=req.use_agentic_search,
)
packets = stream_chat_message_objects(

View File

@@ -54,6 +54,9 @@ class BasicCreateChatMessageRequest(ChunkContext):
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
# If True, uses agentic search instead of basic search
use_agentic_search: bool = False
@model_validator(mode="after")
def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
if self.chat_session_id is None and self.persona_id is None:
@@ -73,6 +76,8 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
# only works if using an OpenAI model. See the following for more details:
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
# If True, uses agentic search instead of basic search
use_agentic_search: bool = False
class SimpleDoc(BaseModel):

View File

@@ -37,14 +37,17 @@ from onyx.db.models import AvailableTenant
from onyx.db.models import IndexModelStatus
from onyx.db.models import SearchSettings
from onyx.db.models import UserTenantMapping
from onyx.llm.constants import LlmProviderNames
from onyx.llm.llm_provider_options import ANTHROPIC_PROVIDER_NAME
from onyx.llm.llm_provider_options import ANTHROPIC_VISIBLE_MODEL_NAMES
from onyx.llm.llm_provider_options import get_anthropic_model_names
from onyx.llm.llm_provider_options import get_openai_model_names
from onyx.llm.llm_provider_options import OPEN_AI_MODEL_NAMES
from onyx.llm.llm_provider_options import OPEN_AI_VISIBLE_MODEL_NAMES
from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.setup import setup_onyx
from onyx.utils.telemetry import mt_cloud_telemetry
from onyx.utils.telemetry import create_milestone_and_report
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
from shared_configs.configs import TENANT_ID_PREFIX
@@ -265,13 +268,14 @@ def configure_default_api_keys(db_session: Session) -> None:
if ANTHROPIC_DEFAULT_API_KEY:
anthropic_provider = LLMProviderUpsertRequest(
name="Anthropic",
provider=LlmProviderNames.ANTHROPIC,
provider=ANTHROPIC_PROVIDER_NAME,
api_key=ANTHROPIC_DEFAULT_API_KEY,
default_model_name="claude-3-7-sonnet-20250219",
fast_default_model_name="claude-3-5-sonnet-20241022",
model_configurations=[
ModelConfigurationUpsertRequest(
name=name,
is_visible=False,
is_visible=name in ANTHROPIC_VISIBLE_MODEL_NAMES,
max_input_tokens=None,
)
for name in get_anthropic_model_names()
@@ -291,16 +295,17 @@ def configure_default_api_keys(db_session: Session) -> None:
if OPENAI_DEFAULT_API_KEY:
openai_provider = LLMProviderUpsertRequest(
name="OpenAI",
provider=LlmProviderNames.OPENAI,
provider=OPENAI_PROVIDER_NAME,
api_key=OPENAI_DEFAULT_API_KEY,
default_model_name="gpt-4o",
fast_default_model_name="gpt-4o-mini",
model_configurations=[
ModelConfigurationUpsertRequest(
name=model_name,
is_visible=False,
is_visible=model_name in OPEN_AI_VISIBLE_MODEL_NAMES,
max_input_tokens=None,
)
for model_name in get_openai_model_names()
for model_name in OPEN_AI_MODEL_NAMES
],
api_key_changed=True,
)
@@ -559,11 +564,17 @@ async def assign_tenant_to_user(
try:
add_users_to_tenant([email], tenant_id)
mt_cloud_telemetry(
tenant_id=tenant_id,
distinct_id=email,
event=MilestoneRecordType.TENANT_CREATED,
)
# Create milestone record in the same transaction context as the tenant assignment
with get_session_with_tenant(tenant_id=tenant_id) as db_session:
create_milestone_and_report(
user=None,
distinct_id=tenant_id,
event_type=MilestoneRecordType.TENANT_CREATED,
properties={
"email": email,
},
db_session=db_session,
)
except Exception:
logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
raise Exception("Failed to assign tenant to user")

View File

@@ -249,17 +249,6 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
)
raise
# Remove from invited users list since they've accepted
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
try:
invited_users = get_invited_users()
if email in invited_users:
invited_users.remove(email)
write_invited_users(invited_users)
logger.info(f"Removed {email} from invited users list after acceptance")
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
def deny_user_invite(email: str, tenant_id: str) -> None:
"""

View File

@@ -1,126 +0,0 @@
"""RSA-4096 license signature verification utilities."""
import base64
import json
import os
from datetime import datetime
from datetime import timezone
from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
from ee.onyx.server.license.models import LicenseData
from ee.onyx.server.license.models import LicensePayload
from onyx.server.settings.models import ApplicationStatus
from onyx.utils.logger import setup_logger
logger = setup_logger()
# RSA-4096 Public Key for license verification
# Load from environment variable - key is generated on the control plane
# In production, inject via Kubernetes secrets or secrets manager
LICENSE_PUBLIC_KEY_PEM = os.environ.get("LICENSE_PUBLIC_KEY_PEM", "")
def _get_public_key() -> RSAPublicKey:
"""Load the public key from environment variable."""
if not LICENSE_PUBLIC_KEY_PEM:
raise ValueError(
"LICENSE_PUBLIC_KEY_PEM environment variable not set. "
"License verification requires the control plane public key."
)
key = serialization.load_pem_public_key(LICENSE_PUBLIC_KEY_PEM.encode())
if not isinstance(key, RSAPublicKey):
raise ValueError("Expected RSA public key")
return key
def verify_license_signature(license_data: str) -> LicensePayload:
"""
Verify RSA-4096 signature and return payload if valid.
Args:
license_data: Base64-encoded JSON containing payload and signature
Returns:
LicensePayload if signature is valid
Raises:
ValueError: If license data is invalid or signature verification fails
"""
try:
# Decode the license data
decoded = json.loads(base64.b64decode(license_data))
license_obj = LicenseData(**decoded)
payload_json = json.dumps(
license_obj.payload.model_dump(mode="json"), sort_keys=True
)
signature_bytes = base64.b64decode(license_obj.signature)
# Verify signature using PSS padding (modern standard)
public_key = _get_public_key()
public_key.verify(
signature_bytes,
payload_json.encode(),
padding.PSS(
mgf=padding.MGF1(hashes.SHA256()),
salt_length=padding.PSS.MAX_LENGTH,
),
hashes.SHA256(),
)
return license_obj.payload
except InvalidSignature:
logger.error("License signature verification failed")
raise ValueError("Invalid license signature")
except json.JSONDecodeError:
logger.error("Failed to decode license JSON")
raise ValueError("Invalid license format: not valid JSON")
except (ValueError, KeyError, TypeError) as e:
logger.error(f"License data validation error: {type(e).__name__}")
raise ValueError(f"Invalid license format: {type(e).__name__}")
except Exception:
logger.exception("Unexpected error during license verification")
raise ValueError("License verification failed: unexpected error")
def get_license_status(
payload: LicensePayload,
grace_period_end: datetime | None = None,
) -> ApplicationStatus:
"""
Determine current license status based on expiry.
Args:
payload: The verified license payload
grace_period_end: Optional grace period end datetime
Returns:
ApplicationStatus indicating current license state
"""
now = datetime.now(timezone.utc)
# Check if grace period has expired
if grace_period_end and now > grace_period_end:
return ApplicationStatus.GATED_ACCESS
# Check if license has expired
if now > payload.expires_at:
if grace_period_end and now <= grace_period_end:
return ApplicationStatus.GRACE_PERIOD
return ApplicationStatus.GATED_ACCESS
# License is valid
return ApplicationStatus.ACTIVE
def is_license_valid(payload: LicensePayload) -> bool:
"""Check if a license is currently valid (not expired)."""
now = datetime.now(timezone.utc)
return now <= payload.expires_at

View File

@@ -1,4 +1,5 @@
MODEL_WARM_UP_STRING = "hi " * 512
INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi " * 16
class GPUStatus:

View File

@@ -0,0 +1,562 @@
from typing import cast
from typing import Optional
from typing import TYPE_CHECKING
import numpy as np
import torch
import torch.nn.functional as F
from fastapi import APIRouter
from huggingface_hub import snapshot_download # type: ignore
from model_server.constants import INFORMATION_CONTENT_MODEL_WARM_UP_STRING
from model_server.constants import MODEL_WARM_UP_STRING
from model_server.onyx_torch_model import ConnectorClassifier
from model_server.onyx_torch_model import HybridClassifier
from model_server.utils import simple_log_function_time
from onyx.utils.logger import setup_logger
from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
from shared_configs.configs import (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH,
)
from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
from shared_configs.configs import (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE,
)
from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import INFORMATION_CONTENT_MODEL_TAG
from shared_configs.configs import INFORMATION_CONTENT_MODEL_VERSION
from shared_configs.configs import INTENT_MODEL_TAG
from shared_configs.configs import INTENT_MODEL_VERSION
from shared_configs.model_server_models import ConnectorClassificationRequest
from shared_configs.model_server_models import ConnectorClassificationResponse
from shared_configs.model_server_models import ContentClassificationPrediction
from shared_configs.model_server_models import IntentRequest
from shared_configs.model_server_models import IntentResponse
if TYPE_CHECKING:
from setfit import SetFitModel # type: ignore
from transformers import PreTrainedTokenizer, BatchEncoding # type: ignore
logger = setup_logger()
router = APIRouter(prefix="/custom")
_CONNECTOR_CLASSIFIER_TOKENIZER: Optional["PreTrainedTokenizer"] = None
_CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
_INTENT_TOKENIZER: Optional["PreTrainedTokenizer"] = None
_INTENT_MODEL: HybridClassifier | None = None
_INFORMATION_CONTENT_MODEL: Optional["SetFitModel"] = None
_INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = "" # spec to model version!
def get_connector_classifier_tokenizer() -> "PreTrainedTokenizer":
global _CONNECTOR_CLASSIFIER_TOKENIZER
from transformers import AutoTokenizer, PreTrainedTokenizer
if _CONNECTOR_CLASSIFIER_TOKENIZER is None:
# The tokenizer details are not uploaded to the HF hub since it's just the
# unmodified distilbert tokenizer.
_CONNECTOR_CLASSIFIER_TOKENIZER = cast(
PreTrainedTokenizer,
AutoTokenizer.from_pretrained("distilbert-base-uncased"),
)
return _CONNECTOR_CLASSIFIER_TOKENIZER
def get_local_connector_classifier(
model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,
tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,
) -> ConnectorClassifier:
global _CONNECTOR_CLASSIFIER_MODEL
if _CONNECTOR_CLASSIFIER_MODEL is None:
try:
# Calculate where the cache should be, then load from local if available
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=True
)
_CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
local_path
)
except Exception as e:
logger.warning(f"Failed to load model directly: {e}")
try:
# Attempt to download the model snapshot
logger.info(f"Downloading model snapshot for {model_name_or_path}")
local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
_CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
local_path
)
except Exception as e:
logger.error(
f"Failed to load model even after attempted snapshot download: {e}"
)
raise
return _CONNECTOR_CLASSIFIER_MODEL
def get_intent_model_tokenizer() -> "PreTrainedTokenizer":
from transformers import AutoTokenizer, PreTrainedTokenizer
global _INTENT_TOKENIZER
if _INTENT_TOKENIZER is None:
# The tokenizer details are not uploaded to the HF hub since it's just the
# unmodified distilbert tokenizer.
_INTENT_TOKENIZER = cast(
PreTrainedTokenizer,
AutoTokenizer.from_pretrained("distilbert-base-uncased"),
)
return _INTENT_TOKENIZER
def get_local_intent_model(
model_name_or_path: str = INTENT_MODEL_VERSION,
tag: str | None = INTENT_MODEL_TAG,
) -> HybridClassifier:
global _INTENT_MODEL
if _INTENT_MODEL is None:
try:
# Calculate where the cache should be, then load from local if available
logger.notice(f"Loading model from local cache: {model_name_or_path}")
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=True
)
_INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
logger.notice(f"Loaded model from local cache: {local_path}")
except Exception as e:
logger.warning(f"Failed to load model directly: {e}")
try:
# Attempt to download the model snapshot
logger.notice(f"Downloading model snapshot for {model_name_or_path}")
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=False
)
_INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
except Exception as e:
logger.error(
f"Failed to load model even after attempted snapshot download: {e}"
)
raise
return _INTENT_MODEL
def get_local_information_content_model(
model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
) -> "SetFitModel":
from setfit import SetFitModel
global _INFORMATION_CONTENT_MODEL
if _INFORMATION_CONTENT_MODEL is None:
try:
# Calculate where the cache should be, then load from local if available
logger.notice(
f"Loading content information model from local cache: {model_name_or_path}"
)
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=True
)
_INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
logger.notice(
f"Loaded content information model from local cache: {local_path}"
)
except Exception as e:
logger.warning(f"Failed to load content information model directly: {e}")
try:
# Attempt to download the model snapshot
logger.notice(
f"Downloading content information model snapshot for {model_name_or_path}"
)
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=False
)
_INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
except Exception as e:
logger.error(
f"Failed to load content information model even after attempted snapshot download: {e}"
)
raise
return _INFORMATION_CONTENT_MODEL
def tokenize_connector_classification_query(
connectors: list[str],
query: str,
tokenizer: "PreTrainedTokenizer",
connector_token_end_id: int,
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models
The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end
token and then the user query.
"""
input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)
for connector in connectors:
connector_token_ids = tokenizer(
connector,
add_special_tokens=False,
return_tensors="pt",
)
input_ids = torch.cat(
(
input_ids,
connector_token_ids["input_ids"].squeeze(dim=0),
torch.tensor([connector_token_end_id], dtype=torch.long),
),
dim=-1,
)
query_token_ids = tokenizer(
query,
add_special_tokens=False,
return_tensors="pt",
)
input_ids = torch.cat(
(
input_ids,
query_token_ids["input_ids"].squeeze(dim=0),
torch.tensor([tokenizer.sep_token_id], dtype=torch.long),
),
dim=-1,
)
attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)
return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)
def warm_up_connector_classifier_model() -> None:
logger.info(
f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}"
)
connector_classifier_tokenizer = get_connector_classifier_tokenizer()
connector_classifier = get_local_connector_classifier()
input_ids, attention_mask = tokenize_connector_classification_query(
["GitHub"],
"onyx classifier query google doc",
connector_classifier_tokenizer,
connector_classifier.connector_end_token_id,
)
input_ids = input_ids.to(connector_classifier.device)
attention_mask = attention_mask.to(connector_classifier.device)
connector_classifier(input_ids, attention_mask)
def warm_up_intent_model() -> None:
logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}")
intent_tokenizer = get_intent_model_tokenizer()
tokens = intent_tokenizer(
MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True
)
intent_model = get_local_intent_model()
device = intent_model.device
intent_model(
query_ids=tokens["input_ids"].to(device),
query_mask=tokens["attention_mask"].to(device),
)
def warm_up_information_content_model() -> None:
logger.notice("Warming up Content Model") # TODO: add version if needed
information_content_model = get_local_information_content_model()
information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)
@simple_log_function_time()
def run_inference(tokens: "BatchEncoding") -> tuple[list[float], list[float]]:
intent_model = get_local_intent_model()
device = intent_model.device
outputs = intent_model(
query_ids=tokens["input_ids"].to(device),
query_mask=tokens["attention_mask"].to(device),
)
token_logits = outputs["token_logits"]
intent_logits = outputs["intent_logits"]
# Move tensors to CPU before applying softmax and converting to numpy
intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]
token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]
# Extract the probabilities for the positive class (index 1) for each token
token_positive_probs = token_probabilities[:, 1].tolist()
return intent_probabilities.tolist(), token_positive_probs
@simple_log_function_time()
def run_content_classification_inference(
text_inputs: list[str],
) -> list[ContentClassificationPrediction]:
"""
Assign a score to the segments in question. The model stored in get_local_information_content_model()
creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
In the code outside of the model/inference model servers that score will be converted into the actual
boost factor.
"""
def _prob_to_score(prob: float) -> float:
"""
Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
"""
_MIN_BASE_SCORE = 0.25
_MAX_BASE_SCORE = 0.75
if prob < _MIN_BASE_SCORE:
raw_score = 0.0
elif prob < _MAX_BASE_SCORE:
raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
else:
raw_score = 1.0
return (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
+ (
INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
- INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
)
* raw_score
)
_BATCH_SIZE = 32
content_model = get_local_information_content_model()
# Process inputs in batches
all_output_classes: list[int] = []
all_base_output_probabilities: list[float] = []
for i in range(0, len(text_inputs), _BATCH_SIZE):
batch = text_inputs[i : i + _BATCH_SIZE]
batch_with_prefix = []
batch_indices = []
# Pre-allocate results for this batch
batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)
# Pre-process batch to handle long input exceptions
for j, text in enumerate(batch):
if len(text) == 0:
# if no input, treat as non-informative from the model's perspective
batch_output_classes[j] = np.array(0)
batch_probabilities[j] = np.array(0.0)
logger.warning("Input for Content Information Model is empty")
elif (
len(text.split())
<= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
):
# if input is short, use the model
batch_with_prefix.append(
_INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
)
batch_indices.append(j)
else:
# if longer than cutoff, treat as informative (stay with default), but issue warning
logger.warning("Input for Content Information Model too long")
if batch_with_prefix: # Only run model if we have valid inputs
# Get predictions for the batch
model_output_classes = content_model(batch_with_prefix)
model_output_probabilities = content_model.predict_proba(batch_with_prefix)
# Place results in the correct positions
for idx, batch_idx in enumerate(batch_indices):
batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
batch_probabilities[batch_idx] = model_output_probabilities[idx][
1
].numpy() # x[1] is prob of the positive class
all_output_classes.extend([int(x) for x in batch_output_classes])
all_base_output_probabilities.extend([float(x) for x in batch_probabilities])
logits = [
np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
for p in all_base_output_probabilities
]
scaled_logits = [
logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
for logit in logits
]
output_probabilities_with_temp = [
np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
for scaled_logit in scaled_logits
]
prediction_scores = [
_prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
]
content_classification_predictions = [
ContentClassificationPrediction(
predicted_label=predicted_label, content_boost_factor=output_score
)
for predicted_label, output_score in zip(all_output_classes, prediction_scores)
]
return content_classification_predictions
def map_keywords(
input_ids: torch.Tensor, tokenizer: "PreTrainedTokenizer", is_keyword: list[bool]
) -> list[str]:
tokens = tokenizer.convert_ids_to_tokens(input_ids) # type: ignore
if not len(tokens) == len(is_keyword):
raise ValueError("Length of tokens and keyword predictions must match")
if input_ids[0] == tokenizer.cls_token_id:
tokens = tokens[1:]
is_keyword = is_keyword[1:]
if input_ids[-1] == tokenizer.sep_token_id:
tokens = tokens[:-1]
is_keyword = is_keyword[:-1]
unk_token = tokenizer.unk_token
if unk_token in tokens:
raise ValueError("Unknown token detected in the input")
keywords = []
current_keyword = ""
for ind, token in enumerate(tokens):
if is_keyword[ind]:
if token.startswith("##"):
current_keyword += token[2:]
else:
if current_keyword:
keywords.append(current_keyword)
current_keyword = token
else:
# If mispredicted a later token of a keyword, add it to the current keyword
# to complete it
if current_keyword:
if len(current_keyword) > 2 and current_keyword.startswith("##"):
current_keyword = current_keyword[2:]
else:
keywords.append(current_keyword)
current_keyword = ""
if current_keyword:
keywords.append(current_keyword)
return keywords
def clean_keywords(keywords: list[str]) -> list[str]:
cleaned_words = []
for word in keywords:
word = word[:-2] if word.endswith("'s") else word
word = word.replace("/", " ")
word = word.replace("'", "").replace('"', "")
cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])
return cleaned_words
def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:
tokenizer = get_connector_classifier_tokenizer()
model = get_local_connector_classifier()
connector_names = req.available_connectors
input_ids, attention_mask = tokenize_connector_classification_query(
connector_names,
req.query,
tokenizer,
model.connector_end_token_id,
)
input_ids = input_ids.to(model.device)
attention_mask = attention_mask.to(model.device)
global_confidence, classifier_confidence = model(input_ids, attention_mask)
if global_confidence.item() < 0.5:
return []
passed_connectors = []
for i, connector_name in enumerate(connector_names):
if classifier_confidence.view(-1)[i].item() > 0.5:
passed_connectors.append(connector_name)
return passed_connectors
def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
tokenizer = get_intent_model_tokenizer()
model_input = tokenizer(
intent_req.query, return_tensors="pt", truncation=False, padding=False
)
if len(model_input.input_ids[0]) > 512:
# If the user text is too long, assume it is semantic and keep all words
return True, intent_req.query.split()
intent_probs, token_probs = run_inference(model_input)
is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold
keyword_preds = [
token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs
]
try:
keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
except Exception as e:
logger.warning(
f"Failed to extract keywords for query: {intent_req.query} due to {e}"
)
# Fallback to keeping all words
keywords = intent_req.query.split()
cleaned_keywords = clean_keywords(keywords)
return is_keyword_sequence, cleaned_keywords
@router.post("/connector-classification")
async def process_connector_classification_request(
classification_request: ConnectorClassificationRequest,
) -> ConnectorClassificationResponse:
if INDEXING_ONLY:
raise RuntimeError(
"Indexing model server should not call connector classification endpoint"
)
if len(classification_request.available_connectors) == 0:
return ConnectorClassificationResponse(connectors=[])
connectors = run_connector_classification(classification_request)
return ConnectorClassificationResponse(connectors=connectors)
@router.post("/query-analysis")
async def process_analysis_request(
intent_request: IntentRequest,
) -> IntentResponse:
if INDEXING_ONLY:
raise RuntimeError("Indexing model server should not call intent endpoint")
is_keyword, keywords = run_analysis(intent_request)
return IntentResponse(is_keyword=is_keyword, keywords=keywords)
@router.post("/content-classification")
async def process_content_classification_request(
content_classification_requests: list[str],
) -> list[ContentClassificationPrediction]:
return run_content_classification_inference(content_classification_requests)

View File

@@ -1,6 +1,7 @@
import asyncio
import time
from typing import Any
from typing import Optional
from typing import TYPE_CHECKING
from fastapi import APIRouter
@@ -9,13 +10,16 @@ from fastapi import Request
from model_server.utils import simple_log_function_time
from onyx.utils.logger import setup_logger
from shared_configs.configs import INDEXING_ONLY
from shared_configs.enums import EmbedTextType
from shared_configs.model_server_models import Embedding
from shared_configs.model_server_models import EmbedRequest
from shared_configs.model_server_models import EmbedResponse
from shared_configs.model_server_models import RerankRequest
from shared_configs.model_server_models import RerankResponse
if TYPE_CHECKING:
from sentence_transformers import SentenceTransformer
from sentence_transformers import CrossEncoder, SentenceTransformer
logger = setup_logger()
@@ -23,6 +27,11 @@ router = APIRouter(prefix="/encoder")
_GLOBAL_MODELS_DICT: dict[str, "SentenceTransformer"] = {}
_RERANK_MODEL: Optional["CrossEncoder"] = None
# If we are not only indexing, dont want retry very long
_RETRY_DELAY = 10 if INDEXING_ONLY else 0.1
_RETRY_TRIES = 10 if INDEXING_ONLY else 2
def get_embedding_model(
@@ -33,7 +42,7 @@ def get_embedding_model(
Loads or returns a cached SentenceTransformer, sets max_seq_length, pins device,
pre-warms rotary caches once, and wraps encode() with a lock to avoid cache races.
"""
from sentence_transformers import SentenceTransformer
from sentence_transformers import SentenceTransformer # type: ignore
def _prewarm_rope(st_model: "SentenceTransformer", target_len: int) -> None:
"""
@@ -78,6 +87,19 @@ def get_embedding_model(
return _GLOBAL_MODELS_DICT[model_name]
def get_local_reranking_model(
model_name: str,
) -> "CrossEncoder":
global _RERANK_MODEL
from sentence_transformers import CrossEncoder # type: ignore
if _RERANK_MODEL is None:
logger.notice(f"Loading {model_name}")
model = CrossEncoder(model_name)
_RERANK_MODEL = model
return _RERANK_MODEL
ENCODING_RETRIES = 3
ENCODING_RETRY_DELAY = 0.1
@@ -167,6 +189,16 @@ async def embed_text(
return embeddings
@simple_log_function_time()
async def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]:
cross_encoder = get_local_reranking_model(model_name)
# Run CPU-bound reranking in a thread pool
return await asyncio.get_event_loop().run_in_executor(
None,
lambda: cross_encoder.predict([(query, doc) for doc in docs]).tolist(), # type: ignore
)
@router.post("/bi-encoder-embed")
async def route_bi_encoder_embed(
request: Request,
@@ -222,3 +254,39 @@ async def process_embed_request(
raise HTTPException(
status_code=500, detail=f"Error during embedding process: {e}"
)
@router.post("/cross-encoder-scores")
async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse:
"""Cross encoders can be purely black box from the app perspective"""
# Only local models should use this endpoint - API providers should make direct API calls
if rerank_request.provider_type is not None:
raise ValueError(
f"Model server reranking endpoint should only be used for local models. "
f"API provider '{rerank_request.provider_type}' should make direct API calls instead."
)
if INDEXING_ONLY:
raise RuntimeError("Indexing model server should not call intent endpoint")
if not rerank_request.documents or not rerank_request.query:
raise HTTPException(
status_code=400, detail="Missing documents or query for reranking"
)
if not all(rerank_request.documents):
raise ValueError("Empty documents cannot be reranked.")
try:
# At this point, provider_type is None, so handle local reranking
sim_scores = await local_rerank(
query=rerank_request.query,
docs=rerank_request.documents,
model_name=rerank_request.model_name,
)
return RerankResponse(scores=sim_scores)
except Exception as e:
logger.exception(f"Error during reranking process:\n{str(e)}")
raise HTTPException(
status_code=500, detail="Failed to run Cross-Encoder reranking"
)

View File

@@ -1,5 +0,0 @@
This directory contains code that was useful and may become useful again in the future.
We stopped using rerankers because the state of the art rerankers are not significantly better than the biencoders and much worse than LLMs which are also capable of acting on a small set of documents for filtering, reranking, etc.
We stopped using the internal query classifier as that's now offloaded to the LLM which does query expansion so we know ahead of time if it's a keyword or semantic query.

View File

@@ -1,573 +0,0 @@
# from typing import cast
# from typing import Optional
# from typing import TYPE_CHECKING
# import numpy as np
# import torch
# import torch.nn.functional as F
# from fastapi import APIRouter
# from huggingface_hub import snapshot_download
# from pydantic import BaseModel
# from model_server.constants import MODEL_WARM_UP_STRING
# from model_server.legacy.onyx_torch_model import ConnectorClassifier
# from model_server.legacy.onyx_torch_model import HybridClassifier
# from model_server.utils import simple_log_function_time
# from onyx.utils.logger import setup_logger
# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
# from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
# from shared_configs.configs import INDEXING_ONLY
# from shared_configs.configs import INTENT_MODEL_TAG
# from shared_configs.configs import INTENT_MODEL_VERSION
# from shared_configs.model_server_models import IntentRequest
# from shared_configs.model_server_models import IntentResponse
# if TYPE_CHECKING:
# from setfit import SetFitModel # type: ignore[import-untyped]
# from transformers import PreTrainedTokenizer, BatchEncoding
# INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi" * 50
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX = 1.0
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN = 0.7
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE = 4.0
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH = 10
# INFORMATION_CONTENT_MODEL_VERSION = "onyx-dot-app/information-content-model"
# INFORMATION_CONTENT_MODEL_TAG: str | None = None
# class ConnectorClassificationRequest(BaseModel):
# available_connectors: list[str]
# query: str
# class ConnectorClassificationResponse(BaseModel):
# connectors: list[str]
# class ContentClassificationPrediction(BaseModel):
# predicted_label: int
# content_boost_factor: float
# logger = setup_logger()
# router = APIRouter(prefix="/custom")
# _CONNECTOR_CLASSIFIER_TOKENIZER: Optional["PreTrainedTokenizer"] = None
# _CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
# _INTENT_TOKENIZER: Optional["PreTrainedTokenizer"] = None
# _INTENT_MODEL: HybridClassifier | None = None
# _INFORMATION_CONTENT_MODEL: Optional["SetFitModel"] = None
# _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = "" # spec to model version!
# def get_connector_classifier_tokenizer() -> "PreTrainedTokenizer":
# global _CONNECTOR_CLASSIFIER_TOKENIZER
# from transformers import AutoTokenizer, PreTrainedTokenizer
# if _CONNECTOR_CLASSIFIER_TOKENIZER is None:
# # The tokenizer details are not uploaded to the HF hub since it's just the
# # unmodified distilbert tokenizer.
# _CONNECTOR_CLASSIFIER_TOKENIZER = cast(
# PreTrainedTokenizer,
# AutoTokenizer.from_pretrained("distilbert-base-uncased"),
# )
# return _CONNECTOR_CLASSIFIER_TOKENIZER
# def get_local_connector_classifier(
# model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,
# tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,
# ) -> ConnectorClassifier:
# global _CONNECTOR_CLASSIFIER_MODEL
# if _CONNECTOR_CLASSIFIER_MODEL is None:
# try:
# # Calculate where the cache should be, then load from local if available
# local_path = snapshot_download(
# repo_id=model_name_or_path, revision=tag, local_files_only=True
# )
# _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
# local_path
# )
# except Exception as e:
# logger.warning(f"Failed to load model directly: {e}")
# try:
# # Attempt to download the model snapshot
# logger.info(f"Downloading model snapshot for {model_name_or_path}")
# local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
# _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
# local_path
# )
# except Exception as e:
# logger.error(
# f"Failed to load model even after attempted snapshot download: {e}"
# )
# raise
# return _CONNECTOR_CLASSIFIER_MODEL
# def get_intent_model_tokenizer() -> "PreTrainedTokenizer":
# from transformers import AutoTokenizer, PreTrainedTokenizer
# global _INTENT_TOKENIZER
# if _INTENT_TOKENIZER is None:
# # The tokenizer details are not uploaded to the HF hub since it's just the
# # unmodified distilbert tokenizer.
# _INTENT_TOKENIZER = cast(
# PreTrainedTokenizer,
# AutoTokenizer.from_pretrained("distilbert-base-uncased"),
# )
# return _INTENT_TOKENIZER
# def get_local_intent_model(
# model_name_or_path: str = INTENT_MODEL_VERSION,
# tag: str | None = INTENT_MODEL_TAG,
# ) -> HybridClassifier:
# global _INTENT_MODEL
# if _INTENT_MODEL is None:
# try:
# # Calculate where the cache should be, then load from local if available
# logger.notice(f"Loading model from local cache: {model_name_or_path}")
# local_path = snapshot_download(
# repo_id=model_name_or_path, revision=tag, local_files_only=True
# )
# _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
# logger.notice(f"Loaded model from local cache: {local_path}")
# except Exception as e:
# logger.warning(f"Failed to load model directly: {e}")
# try:
# # Attempt to download the model snapshot
# logger.notice(f"Downloading model snapshot for {model_name_or_path}")
# local_path = snapshot_download(
# repo_id=model_name_or_path, revision=tag, local_files_only=False
# )
# _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
# except Exception as e:
# logger.error(
# f"Failed to load model even after attempted snapshot download: {e}"
# )
# raise
# return _INTENT_MODEL
# def get_local_information_content_model(
# model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
# tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
# ) -> "SetFitModel":
# from setfit import SetFitModel
# global _INFORMATION_CONTENT_MODEL
# if _INFORMATION_CONTENT_MODEL is None:
# try:
# # Calculate where the cache should be, then load from local if available
# logger.notice(
# f"Loading content information model from local cache: {model_name_or_path}"
# )
# local_path = snapshot_download(
# repo_id=model_name_or_path, revision=tag, local_files_only=True
# )
# _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
# logger.notice(
# f"Loaded content information model from local cache: {local_path}"
# )
# except Exception as e:
# logger.warning(f"Failed to load content information model directly: {e}")
# try:
# # Attempt to download the model snapshot
# logger.notice(
# f"Downloading content information model snapshot for {model_name_or_path}"
# )
# local_path = snapshot_download(
# repo_id=model_name_or_path, revision=tag, local_files_only=False
# )
# _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
# except Exception as e:
# logger.error(
# f"Failed to load content information model even after attempted snapshot download: {e}"
# )
# raise
# return _INFORMATION_CONTENT_MODEL
# def tokenize_connector_classification_query(
# connectors: list[str],
# query: str,
# tokenizer: "PreTrainedTokenizer",
# connector_token_end_id: int,
# ) -> tuple[torch.Tensor, torch.Tensor]:
# """
# Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models
# The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end
# token and then the user query.
# """
# input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)
# for connector in connectors:
# connector_token_ids = tokenizer(
# connector,
# add_special_tokens=False,
# return_tensors="pt",
# )
# input_ids = torch.cat(
# (
# input_ids,
# connector_token_ids["input_ids"].squeeze(dim=0),
# torch.tensor([connector_token_end_id], dtype=torch.long),
# ),
# dim=-1,
# )
# query_token_ids = tokenizer(
# query,
# add_special_tokens=False,
# return_tensors="pt",
# )
# input_ids = torch.cat(
# (
# input_ids,
# query_token_ids["input_ids"].squeeze(dim=0),
# torch.tensor([tokenizer.sep_token_id], dtype=torch.long),
# ),
# dim=-1,
# )
# attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)
# return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)
# def warm_up_connector_classifier_model() -> None:
# logger.info(
# f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}"
# )
# connector_classifier_tokenizer = get_connector_classifier_tokenizer()
# connector_classifier = get_local_connector_classifier()
# input_ids, attention_mask = tokenize_connector_classification_query(
# ["GitHub"],
# "onyx classifier query google doc",
# connector_classifier_tokenizer,
# connector_classifier.connector_end_token_id,
# )
# input_ids = input_ids.to(connector_classifier.device)
# attention_mask = attention_mask.to(connector_classifier.device)
# connector_classifier(input_ids, attention_mask)
# def warm_up_intent_model() -> None:
# logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}")
# intent_tokenizer = get_intent_model_tokenizer()
# tokens = intent_tokenizer(
# MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True
# )
# intent_model = get_local_intent_model()
# device = intent_model.device
# intent_model(
# query_ids=tokens["input_ids"].to(device),
# query_mask=tokens["attention_mask"].to(device),
# )
# def warm_up_information_content_model() -> None:
# logger.notice("Warming up Content Model") # TODO: add version if needed
# information_content_model = get_local_information_content_model()
# information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)
# @simple_log_function_time()
# def run_inference(tokens: "BatchEncoding") -> tuple[list[float], list[float]]:
# intent_model = get_local_intent_model()
# device = intent_model.device
# outputs = intent_model(
# query_ids=tokens["input_ids"].to(device),
# query_mask=tokens["attention_mask"].to(device),
# )
# token_logits = outputs["token_logits"]
# intent_logits = outputs["intent_logits"]
# # Move tensors to CPU before applying softmax and converting to numpy
# intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]
# token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]
# # Extract the probabilities for the positive class (index 1) for each token
# token_positive_probs = token_probabilities[:, 1].tolist()
# return intent_probabilities.tolist(), token_positive_probs
# @simple_log_function_time()
# def run_content_classification_inference(
# text_inputs: list[str],
# ) -> list[ContentClassificationPrediction]:
# """
# Assign a score to the segments in question. The model stored in get_local_information_content_model()
# creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
# In the code outside of the model/inference model servers that score will be converted into the actual
# boost factor.
# """
# def _prob_to_score(prob: float) -> float:
# """
# Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
# """
# _MIN_BASE_SCORE = 0.25
# _MAX_BASE_SCORE = 0.75
# if prob < _MIN_BASE_SCORE:
# raw_score = 0.0
# elif prob < _MAX_BASE_SCORE:
# raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
# else:
# raw_score = 1.0
# return (
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
# + (
# INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
# - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
# )
# * raw_score
# )
# _BATCH_SIZE = 32
# content_model = get_local_information_content_model()
# # Process inputs in batches
# all_output_classes: list[int] = []
# all_base_output_probabilities: list[float] = []
# for i in range(0, len(text_inputs), _BATCH_SIZE):
# batch = text_inputs[i : i + _BATCH_SIZE]
# batch_with_prefix = []
# batch_indices = []
# # Pre-allocate results for this batch
# batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
# batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)
# # Pre-process batch to handle long input exceptions
# for j, text in enumerate(batch):
# if len(text) == 0:
# # if no input, treat as non-informative from the model's perspective
# batch_output_classes[j] = np.array(0)
# batch_probabilities[j] = np.array(0.0)
# logger.warning("Input for Content Information Model is empty")
# elif (
# len(text.split())
# <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
# ):
# # if input is short, use the model
# batch_with_prefix.append(
# _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
# )
# batch_indices.append(j)
# else:
# # if longer than cutoff, treat as informative (stay with default), but issue warning
# logger.warning("Input for Content Information Model too long")
# if batch_with_prefix: # Only run model if we have valid inputs
# # Get predictions for the batch
# model_output_classes = content_model(batch_with_prefix)
# model_output_probabilities = content_model.predict_proba(batch_with_prefix)
# # Place results in the correct positions
# for idx, batch_idx in enumerate(batch_indices):
# batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
# batch_probabilities[batch_idx] = model_output_probabilities[idx][
# 1
# ].numpy() # x[1] is prob of the positive class
# all_output_classes.extend([int(x) for x in batch_output_classes])
# all_base_output_probabilities.extend([float(x) for x in batch_probabilities])
# logits = [
# np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
# for p in all_base_output_probabilities
# ]
# scaled_logits = [
# logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
# for logit in logits
# ]
# output_probabilities_with_temp = [
# np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
# for scaled_logit in scaled_logits
# ]
# prediction_scores = [
# _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
# ]
# content_classification_predictions = [
# ContentClassificationPrediction(
# predicted_label=predicted_label, content_boost_factor=output_score
# )
# for predicted_label, output_score in zip(all_output_classes, prediction_scores)
# ]
# return content_classification_predictions
# def map_keywords(
# input_ids: torch.Tensor, tokenizer: "PreTrainedTokenizer", is_keyword: list[bool]
# ) -> list[str]:
# tokens = tokenizer.convert_ids_to_tokens(input_ids) # type: ignore
# if not len(tokens) == len(is_keyword):
# raise ValueError("Length of tokens and keyword predictions must match")
# if input_ids[0] == tokenizer.cls_token_id:
# tokens = tokens[1:]
# is_keyword = is_keyword[1:]
# if input_ids[-1] == tokenizer.sep_token_id:
# tokens = tokens[:-1]
# is_keyword = is_keyword[:-1]
# unk_token = tokenizer.unk_token
# if unk_token in tokens:
# raise ValueError("Unknown token detected in the input")
# keywords = []
# current_keyword = ""
# for ind, token in enumerate(tokens):
# if is_keyword[ind]:
# if token.startswith("##"):
# current_keyword += token[2:]
# else:
# if current_keyword:
# keywords.append(current_keyword)
# current_keyword = token
# else:
# # If mispredicted a later token of a keyword, add it to the current keyword
# # to complete it
# if current_keyword:
# if len(current_keyword) > 2 and current_keyword.startswith("##"):
# current_keyword = current_keyword[2:]
# else:
# keywords.append(current_keyword)
# current_keyword = ""
# if current_keyword:
# keywords.append(current_keyword)
# return keywords
# def clean_keywords(keywords: list[str]) -> list[str]:
# cleaned_words = []
# for word in keywords:
# word = word[:-2] if word.endswith("'s") else word
# word = word.replace("/", " ")
# word = word.replace("'", "").replace('"', "")
# cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])
# return cleaned_words
# def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:
# tokenizer = get_connector_classifier_tokenizer()
# model = get_local_connector_classifier()
# connector_names = req.available_connectors
# input_ids, attention_mask = tokenize_connector_classification_query(
# connector_names,
# req.query,
# tokenizer,
# model.connector_end_token_id,
# )
# input_ids = input_ids.to(model.device)
# attention_mask = attention_mask.to(model.device)
# global_confidence, classifier_confidence = model(input_ids, attention_mask)
# if global_confidence.item() < 0.5:
# return []
# passed_connectors = []
# for i, connector_name in enumerate(connector_names):
# if classifier_confidence.view(-1)[i].item() > 0.5:
# passed_connectors.append(connector_name)
# return passed_connectors
# def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
# tokenizer = get_intent_model_tokenizer()
# model_input = tokenizer(
# intent_req.query, return_tensors="pt", truncation=False, padding=False
# )
# if len(model_input.input_ids[0]) > 512:
# # If the user text is too long, assume it is semantic and keep all words
# return True, intent_req.query.split()
# intent_probs, token_probs = run_inference(model_input)
# is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold
# keyword_preds = [
# token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs
# ]
# try:
# keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
# except Exception as e:
# logger.warning(
# f"Failed to extract keywords for query: {intent_req.query} due to {e}"
# )
# # Fallback to keeping all words
# keywords = intent_req.query.split()
# cleaned_keywords = clean_keywords(keywords)
# return is_keyword_sequence, cleaned_keywords
# @router.post("/connector-classification")
# async def process_connector_classification_request(
# classification_request: ConnectorClassificationRequest,
# ) -> ConnectorClassificationResponse:
# if INDEXING_ONLY:
# raise RuntimeError(
# "Indexing model server should not call connector classification endpoint"
# )
# if len(classification_request.available_connectors) == 0:
# return ConnectorClassificationResponse(connectors=[])
# connectors = run_connector_classification(classification_request)
# return ConnectorClassificationResponse(connectors=connectors)
# @router.post("/query-analysis")
# async def process_analysis_request(
# intent_request: IntentRequest,
# ) -> IntentResponse:
# if INDEXING_ONLY:
# raise RuntimeError("Indexing model server should not call intent endpoint")
# is_keyword, keywords = run_analysis(intent_request)
# return IntentResponse(is_keyword=is_keyword, keywords=keywords)
# @router.post("/content-classification")
# async def process_content_classification_request(
# content_classification_requests: list[str],
# ) -> list[ContentClassificationPrediction]:
# return run_content_classification_inference(content_classification_requests)

View File

@@ -1,154 +0,0 @@
# import json
# import os
# from typing import cast
# from typing import TYPE_CHECKING
# import torch
# import torch.nn as nn
# if TYPE_CHECKING:
# from transformers import DistilBertConfig
# class HybridClassifier(nn.Module):
# def __init__(self) -> None:
# from transformers import DistilBertConfig, DistilBertModel
# super().__init__()
# config = DistilBertConfig()
# self.distilbert = DistilBertModel(config)
# config = self.distilbert.config # type: ignore
# # Keyword tokenwise binary classification layer
# self.keyword_classifier = nn.Linear(config.dim, 2)
# # Intent Classifier layers
# self.pre_classifier = nn.Linear(config.dim, config.dim)
# self.intent_classifier = nn.Linear(config.dim, 2)
# self.device = torch.device("cpu")
# def forward(
# self,
# query_ids: torch.Tensor,
# query_mask: torch.Tensor,
# ) -> dict[str, torch.Tensor]:
# outputs = self.distilbert(input_ids=query_ids, attention_mask=query_mask)
# sequence_output = outputs.last_hidden_state
# # Intent classification on the CLS token
# cls_token_state = sequence_output[:, 0, :]
# pre_classifier_out = self.pre_classifier(cls_token_state)
# intent_logits = self.intent_classifier(pre_classifier_out)
# # Keyword classification on all tokens
# token_logits = self.keyword_classifier(sequence_output)
# return {"intent_logits": intent_logits, "token_logits": token_logits}
# @classmethod
# def from_pretrained(cls, load_directory: str) -> "HybridClassifier":
# model_path = os.path.join(load_directory, "pytorch_model.bin")
# config_path = os.path.join(load_directory, "config.json")
# with open(config_path, "r") as f:
# config = json.load(f)
# model = cls(**config)
# if torch.backends.mps.is_available():
# # Apple silicon GPU
# device = torch.device("mps")
# elif torch.cuda.is_available():
# device = torch.device("cuda")
# else:
# device = torch.device("cpu")
# model.load_state_dict(torch.load(model_path, map_location=device))
# model = model.to(device)
# model.device = device
# model.eval()
# # Eval doesn't set requires_grad to False, do it manually to save memory and have faster inference
# for param in model.parameters():
# param.requires_grad = False
# return model
# class ConnectorClassifier(nn.Module):
# def __init__(self, config: "DistilBertConfig") -> None:
# from transformers import DistilBertTokenizer, DistilBertModel
# super().__init__()
# self.config = config
# self.distilbert = DistilBertModel(config)
# config = self.distilbert.config # type: ignore
# self.connector_global_classifier = nn.Linear(config.dim, 1)
# self.connector_match_classifier = nn.Linear(config.dim, 1)
# self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
# # Token indicating end of connector name, and on which classifier is used
# self.connector_end_token_id = self.tokenizer.get_vocab()[
# self.config.connector_end_token
# ]
# self.device = torch.device("cpu")
# def forward(
# self,
# input_ids: torch.Tensor,
# attention_mask: torch.Tensor,
# ) -> tuple[torch.Tensor, torch.Tensor]:
# hidden_states = self.distilbert(
# input_ids=input_ids, attention_mask=attention_mask
# ).last_hidden_state
# cls_hidden_states = hidden_states[
# :, 0, :
# ] # Take leap of faith that first token is always [CLS]
# global_logits = self.connector_global_classifier(cls_hidden_states).view(-1)
# global_confidence = torch.sigmoid(global_logits).view(-1)
# connector_end_position_ids = input_ids == self.connector_end_token_id
# connector_end_hidden_states = hidden_states[connector_end_position_ids]
# classifier_output = self.connector_match_classifier(connector_end_hidden_states)
# classifier_confidence = torch.nn.functional.sigmoid(classifier_output).view(-1)
# return global_confidence, classifier_confidence
# @classmethod
# def from_pretrained(cls, repo_dir: str) -> "ConnectorClassifier":
# from transformers import DistilBertConfig
# config = cast(
# DistilBertConfig,
# DistilBertConfig.from_pretrained(os.path.join(repo_dir, "config.json")),
# )
# device = (
# torch.device("cuda")
# if torch.cuda.is_available()
# else (
# torch.device("mps")
# if torch.backends.mps.is_available()
# else torch.device("cpu")
# )
# )
# state_dict = torch.load(
# os.path.join(repo_dir, "pytorch_model.pt"),
# map_location=device,
# weights_only=True,
# )
# model = cls(config)
# model.load_state_dict(state_dict)
# model.to(device)
# model.device = device
# model.eval()
# for param in model.parameters():
# param.requires_grad = False
# return model

View File

@@ -1,80 +0,0 @@
# import asyncio
# from typing import Optional
# from typing import TYPE_CHECKING
# from fastapi import APIRouter
# from fastapi import HTTPException
# from model_server.utils import simple_log_function_time
# from onyx.utils.logger import setup_logger
# from shared_configs.configs import INDEXING_ONLY
# from shared_configs.model_server_models import RerankRequest
# from shared_configs.model_server_models import RerankResponse
# if TYPE_CHECKING:
# from sentence_transformers import CrossEncoder
# logger = setup_logger()
# router = APIRouter(prefix="/encoder")
# _RERANK_MODEL: Optional["CrossEncoder"] = None
# def get_local_reranking_model(
# model_name: str,
# ) -> "CrossEncoder":
# global _RERANK_MODEL
# from sentence_transformers import CrossEncoder
# if _RERANK_MODEL is None:
# logger.notice(f"Loading {model_name}")
# model = CrossEncoder(model_name)
# _RERANK_MODEL = model
# return _RERANK_MODEL
# @simple_log_function_time()
# async def local_rerank(query: str, docs: list[str], model_name: str) -> list[float]:
# cross_encoder = get_local_reranking_model(model_name)
# # Run CPU-bound reranking in a thread pool
# return await asyncio.get_event_loop().run_in_executor(
# None,
# lambda: cross_encoder.predict([(query, doc) for doc in docs]).tolist(),
# )
# @router.post("/cross-encoder-scores")
# async def process_rerank_request(rerank_request: RerankRequest) -> RerankResponse:
# """Cross encoders can be purely black box from the app perspective"""
# # Only local models should use this endpoint - API providers should make direct API calls
# if rerank_request.provider_type is not None:
# raise ValueError(
# f"Model server reranking endpoint should only be used for local models. "
# f"API provider '{rerank_request.provider_type}' should make direct API calls instead."
# )
# if INDEXING_ONLY:
# raise RuntimeError("Indexing model server should not call reranking endpoint")
# if not rerank_request.documents or not rerank_request.query:
# raise HTTPException(
# status_code=400, detail="Missing documents or query for reranking"
# )
# if not all(rerank_request.documents):
# raise ValueError("Empty documents cannot be reranked.")
# try:
# # At this point, provider_type is None, so handle local reranking
# sim_scores = await local_rerank(
# query=rerank_request.query,
# docs=rerank_request.documents,
# model_name=rerank_request.model_name,
# )
# return RerankResponse(scores=sim_scores)
# except Exception as e:
# logger.exception(f"Error during reranking process:\n{str(e)}")
# raise HTTPException(
# status_code=500, detail="Failed to run Cross-Encoder reranking"
# )

View File

@@ -12,8 +12,11 @@ from fastapi import FastAPI
from prometheus_fastapi_instrumentator import Instrumentator
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.starlette import StarletteIntegration
from transformers import logging as transformer_logging
from transformers import logging as transformer_logging # type:ignore
from model_server.custom_models import router as custom_models_router
from model_server.custom_models import warm_up_information_content_model
from model_server.custom_models import warm_up_intent_model
from model_server.encoders import router as encoders_router
from model_server.management_endpoints import router as management_router
from model_server.utils import get_gpu_type
@@ -27,6 +30,7 @@ from shared_configs.configs import MIN_THREADS_ML_MODELS
from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.configs import SENTRY_DSN
from shared_configs.configs import SKIP_WARM_UP
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
@@ -88,6 +92,18 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))
logger.notice(f"Torch Threads: {torch.get_num_threads()}")
if not SKIP_WARM_UP:
if not INDEXING_ONLY:
logger.notice("Warming up intent model for inference model server")
warm_up_intent_model()
else:
logger.notice(
"Warming up content information model for indexing model server"
)
warm_up_information_content_model()
else:
logger.notice("Skipping model warmup due to SKIP_WARM_UP=true")
yield
@@ -107,6 +123,7 @@ def get_model_app() -> FastAPI:
application.include_router(management_router)
application.include_router(encoders_router)
application.include_router(custom_models_router)
request_id_prefix = "INF"
if INDEXING_ONLY:

View File

@@ -0,0 +1,154 @@
import json
import os
from typing import cast
from typing import TYPE_CHECKING
import torch
import torch.nn as nn
if TYPE_CHECKING:
from transformers import DistilBertConfig # type: ignore
class HybridClassifier(nn.Module):
def __init__(self) -> None:
from transformers import DistilBertConfig, DistilBertModel
super().__init__()
config = DistilBertConfig()
self.distilbert = DistilBertModel(config)
config = self.distilbert.config # type: ignore
# Keyword tokenwise binary classification layer
self.keyword_classifier = nn.Linear(config.dim, 2)
# Intent Classifier layers
self.pre_classifier = nn.Linear(config.dim, config.dim)
self.intent_classifier = nn.Linear(config.dim, 2)
self.device = torch.device("cpu")
def forward(
self,
query_ids: torch.Tensor,
query_mask: torch.Tensor,
) -> dict[str, torch.Tensor]:
outputs = self.distilbert(input_ids=query_ids, attention_mask=query_mask) # type: ignore
sequence_output = outputs.last_hidden_state
# Intent classification on the CLS token
cls_token_state = sequence_output[:, 0, :]
pre_classifier_out = self.pre_classifier(cls_token_state)
intent_logits = self.intent_classifier(pre_classifier_out)
# Keyword classification on all tokens
token_logits = self.keyword_classifier(sequence_output)
return {"intent_logits": intent_logits, "token_logits": token_logits}
@classmethod
def from_pretrained(cls, load_directory: str) -> "HybridClassifier":
model_path = os.path.join(load_directory, "pytorch_model.bin")
config_path = os.path.join(load_directory, "config.json")
with open(config_path, "r") as f:
config = json.load(f)
model = cls(**config)
if torch.backends.mps.is_available():
# Apple silicon GPU
device = torch.device("mps")
elif torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.device = device
model.eval()
# Eval doesn't set requires_grad to False, do it manually to save memory and have faster inference
for param in model.parameters():
param.requires_grad = False
return model
class ConnectorClassifier(nn.Module):
def __init__(self, config: "DistilBertConfig") -> None:
from transformers import DistilBertTokenizer, DistilBertModel
super().__init__()
self.config = config
self.distilbert = DistilBertModel(config)
config = self.distilbert.config # type: ignore
self.connector_global_classifier = nn.Linear(config.dim, 1)
self.connector_match_classifier = nn.Linear(config.dim, 1)
self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
# Token indicating end of connector name, and on which classifier is used
self.connector_end_token_id = self.tokenizer.get_vocab()[
self.config.connector_end_token
]
self.device = torch.device("cpu")
def forward(
self,
input_ids: torch.Tensor,
attention_mask: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
hidden_states = self.distilbert( # type: ignore
input_ids=input_ids, attention_mask=attention_mask
).last_hidden_state
cls_hidden_states = hidden_states[
:, 0, :
] # Take leap of faith that first token is always [CLS]
global_logits = self.connector_global_classifier(cls_hidden_states).view(-1)
global_confidence = torch.sigmoid(global_logits).view(-1)
connector_end_position_ids = input_ids == self.connector_end_token_id
connector_end_hidden_states = hidden_states[connector_end_position_ids]
classifier_output = self.connector_match_classifier(connector_end_hidden_states)
classifier_confidence = torch.nn.functional.sigmoid(classifier_output).view(-1)
return global_confidence, classifier_confidence
@classmethod
def from_pretrained(cls, repo_dir: str) -> "ConnectorClassifier":
from transformers import DistilBertConfig
config = cast(
DistilBertConfig,
DistilBertConfig.from_pretrained(os.path.join(repo_dir, "config.json")),
)
device = (
torch.device("cuda")
if torch.cuda.is_available()
else (
torch.device("mps")
if torch.backends.mps.is_available()
else torch.device("cpu")
)
)
state_dict = torch.load(
os.path.join(repo_dir, "pytorch_model.pt"),
map_location=device,
weights_only=True,
)
model = cls(config)
model.load_state_dict(state_dict)
model.to(device)
model.device = device
model.eval()
for param in model.parameters():
param.requires_grad = False
return model

View File

@@ -43,7 +43,7 @@ def get_access_for_document(
versioned_get_access_for_document_fn = fetch_versioned_implementation(
"onyx.access.access", "_get_access_for_document"
)
return versioned_get_access_for_document_fn(document_id, db_session)
return versioned_get_access_for_document_fn(document_id, db_session) # type: ignore
def get_null_document_access() -> DocumentAccess:
@@ -93,7 +93,9 @@ def get_access_for_documents(
versioned_get_access_for_documents_fn = fetch_versioned_implementation(
"onyx.access.access", "_get_access_for_documents"
)
return versioned_get_access_for_documents_fn(document_ids, db_session)
return versioned_get_access_for_documents_fn(
document_ids, db_session
) # type: ignore
def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
@@ -111,7 +113,7 @@ def get_acl_for_user(user: User | None, db_session: Session | None = None) -> se
versioned_acl_for_user_fn = fetch_versioned_implementation(
"onyx.access.access", "_get_acl_for_user"
)
return versioned_acl_for_user_fn(user, db_session)
return versioned_acl_for_user_fn(user, db_session) # type: ignore
def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> bool:

View File

@@ -0,0 +1,73 @@
import json
from collections.abc import Sequence
from typing import cast
from langchain_core.messages import AIMessage
from langchain_core.messages import BaseMessage
from langchain_core.messages import FunctionMessage
from onyx.llm.message_types import AssistantMessage
from onyx.llm.message_types import ChatCompletionMessage
from onyx.llm.message_types import FunctionCall
from onyx.llm.message_types import SystemMessage
from onyx.llm.message_types import ToolCall
from onyx.llm.message_types import ToolMessage
from onyx.llm.message_types import UserMessageWithText
HUMAN = "human"
SYSTEM = "system"
AI = "ai"
FUNCTION = "function"
def base_messages_to_chat_completion_msgs(
msgs: Sequence[BaseMessage],
) -> list[ChatCompletionMessage]:
return [_base_message_to_chat_completion_msg(msg) for msg in msgs]
def _base_message_to_chat_completion_msg(
msg: BaseMessage,
) -> ChatCompletionMessage:
if msg.type == HUMAN:
content = msg.content if isinstance(msg.content, str) else str(msg.content)
user_msg: UserMessageWithText = {"role": "user", "content": content}
return user_msg
if msg.type == SYSTEM:
content = msg.content if isinstance(msg.content, str) else str(msg.content)
system_msg: SystemMessage = {"role": "system", "content": content}
return system_msg
if msg.type == AI:
content = msg.content if isinstance(msg.content, str) else str(msg.content)
assistant_msg: AssistantMessage = {
"role": "assistant",
"content": content,
}
if isinstance(msg, AIMessage) and msg.tool_calls:
assistant_msg["tool_calls"] = [
ToolCall(
id=tool_call.get("id") or "",
type="function",
function=FunctionCall(
name=tool_call["name"],
arguments=json.dumps(tool_call["args"]),
),
)
for tool_call in msg.tool_calls
]
return assistant_msg
if msg.type == FUNCTION:
function_message = cast(FunctionMessage, msg)
content = (
function_message.content
if isinstance(function_message.content, str)
else str(function_message.content)
)
tool_msg: ToolMessage = {
"role": "tool",
"content": content,
"tool_call_id": function_message.name or "",
}
return tool_msg
raise ValueError(f"Unexpected message type: {msg.type}")

View File

@@ -0,0 +1,47 @@
from typing import Any
from typing import Literal
from typing import TypeAlias
from pydantic import BaseModel
from onyx.llm.model_response import ModelResponseStream
class ToolCallStreamItem(BaseModel):
call_id: str | None = None
id: str | None = None
name: str | None = None
arguments: str | None = None
type: Literal["function_call"] = "function_call"
index: int | None = None
class ToolCallOutputStreamItem(BaseModel):
call_id: str | None = None
output: Any
type: Literal["function_call_output"] = "function_call_output"
RunItemStreamEventDetails: TypeAlias = ToolCallStreamItem | ToolCallOutputStreamItem
class RunItemStreamEvent(BaseModel):
type: Literal[
"message_start",
"message_done",
"reasoning_start",
"reasoning_done",
"tool_call",
"tool_call_output",
]
details: RunItemStreamEventDetails | None = None
StreamEvent: TypeAlias = ModelResponseStream | RunItemStreamEvent

View File

@@ -0,0 +1,309 @@
# import json
# from collections.abc import Callable
# from collections.abc import Iterator
# from collections.abc import Sequence
# from dataclasses import dataclass
# from typing import Any
# from onyx.agents.agent_framework.models import RunItemStreamEvent
# from onyx.agents.agent_framework.models import StreamEvent
# from onyx.agents.agent_framework.models import ToolCallStreamItem
# from onyx.llm.interfaces import LanguageModelInput
# from onyx.llm.interfaces import LLM
# from onyx.llm.interfaces import ToolChoiceOptions
# from onyx.llm.message_types import ChatCompletionMessage
# from onyx.llm.message_types import ToolCall
# from onyx.llm.model_response import ModelResponseStream
# from onyx.tools.tool import Tool
# from onyx.tracing.framework.create import agent_span
# from onyx.tracing.framework.create import generation_span
# @dataclass
# class QueryResult:
# stream: Iterator[StreamEvent]
# new_messages_stateful: list[ChatCompletionMessage]
# def _serialize_tool_output(output: Any) -> str:
# if isinstance(output, str):
# return output
# try:
# return json.dumps(output)
# except TypeError:
# return str(output)
# def _parse_tool_calls_from_message_content(
# content: str,
# ) -> list[dict[str, Any]]:
# """Parse JSON content that represents tool call instructions."""
# try:
# parsed_content = json.loads(content)
# except json.JSONDecodeError:
# return []
# if isinstance(parsed_content, dict):
# candidates = [parsed_content]
# elif isinstance(parsed_content, list):
# candidates = [item for item in parsed_content if isinstance(item, dict)]
# else:
# return []
# tool_calls: list[dict[str, Any]] = []
# for candidate in candidates:
# name = candidate.get("name")
# arguments = candidate.get("arguments")
# if not isinstance(name, str) or arguments is None:
# continue
# if not isinstance(arguments, dict):
# continue
# call_id = candidate.get("id")
# arguments_str = json.dumps(arguments)
# tool_calls.append(
# {
# "id": call_id,
# "name": name,
# "arguments": arguments_str,
# }
# )
# return tool_calls
# def _try_convert_content_to_tool_calls_for_non_tool_calling_llms(
# tool_calls_in_progress: dict[int, dict[str, Any]],
# content_parts: list[str],
# structured_response_format: dict | None,
# next_synthetic_tool_call_id: Callable[[], str],
# ) -> None:
# """Populate tool_calls_in_progress when a non-tool-calling LLM returns JSON content describing tool calls."""
# if tool_calls_in_progress or not content_parts or structured_response_format:
# return
# tool_calls_from_content = _parse_tool_calls_from_message_content(
# "".join(content_parts)
# )
# if not tool_calls_from_content:
# return
# content_parts.clear()
# for index, tool_call_data in enumerate(tool_calls_from_content):
# call_id = tool_call_data["id"] or next_synthetic_tool_call_id()
# tool_calls_in_progress[index] = {
# "id": call_id,
# "name": tool_call_data["name"],
# "arguments": tool_call_data["arguments"],
# }
# def _update_tool_call_with_delta(
# tool_calls_in_progress: dict[int, dict[str, Any]],
# tool_call_delta: Any,
# ) -> None:
# index = tool_call_delta.index
# if index not in tool_calls_in_progress:
# tool_calls_in_progress[index] = {
# "id": None,
# "name": None,
# "arguments": "",
# }
# if tool_call_delta.id:
# tool_calls_in_progress[index]["id"] = tool_call_delta.id
# if tool_call_delta.function:
# if tool_call_delta.function.name:
# tool_calls_in_progress[index]["name"] = tool_call_delta.function.name
# if tool_call_delta.function.arguments:
# tool_calls_in_progress[index][
# "arguments"
# ] += tool_call_delta.function.arguments
# def query(
# llm_with_default_settings: LLM,
# messages: LanguageModelInput,
# tools: Sequence[Tool],
# context: Any,
# tool_choice: ToolChoiceOptions | None = None,
# structured_response_format: dict | None = None,
# ) -> QueryResult:
# tool_definitions = [tool.tool_definition() for tool in tools]
# tools_by_name = {tool.name: tool for tool in tools}
# new_messages_stateful: list[ChatCompletionMessage] = []
# current_span = agent_span(
# name="agent_framework_query",
# output_type="dict" if structured_response_format else "str",
# )
# current_span.start(mark_as_current=True)
# current_span.span_data.tools = [t.name for t in tools]
# def stream_generator() -> Iterator[StreamEvent]:
# message_started = False
# reasoning_started = False
# tool_calls_in_progress: dict[int, dict[str, Any]] = {}
# content_parts: list[str] = []
# synthetic_tool_call_counter = 0
# def _next_synthetic_tool_call_id() -> str:
# nonlocal synthetic_tool_call_counter
# call_id = f"synthetic_tool_call_{synthetic_tool_call_counter}"
# synthetic_tool_call_counter += 1
# return call_id
# with generation_span( # type: ignore[misc]
# model=llm_with_default_settings.config.model_name,
# model_config={
# "base_url": str(llm_with_default_settings.config.api_base or ""),
# "model_impl": "litellm",
# },
# ) as span_generation:
# # Only set input if messages is a sequence (not a string)
# # ChatCompletionMessage TypedDicts are compatible with Mapping[str, Any] at runtime
# if isinstance(messages, Sequence) and not isinstance(messages, str):
# # Convert ChatCompletionMessage sequence to Sequence[Mapping[str, Any]]
# span_generation.span_data.input = [dict(msg) for msg in messages] # type: ignore[assignment]
# for chunk in llm_with_default_settings.stream(
# prompt=messages,
# tools=tool_definitions,
# tool_choice=tool_choice,
# structured_response_format=structured_response_format,
# ):
# assert isinstance(chunk, ModelResponseStream)
# usage = getattr(chunk, "usage", None)
# if usage:
# span_generation.span_data.usage = {
# "input_tokens": usage.prompt_tokens,
# "output_tokens": usage.completion_tokens,
# "cache_read_input_tokens": usage.cache_read_input_tokens,
# "cache_creation_input_tokens": usage.cache_creation_input_tokens,
# }
# delta = chunk.choice.delta
# finish_reason = chunk.choice.finish_reason
# if delta.reasoning_content:
# if not reasoning_started:
# yield RunItemStreamEvent(type="reasoning_start")
# reasoning_started = True
# if delta.content:
# if reasoning_started:
# yield RunItemStreamEvent(type="reasoning_done")
# reasoning_started = False
# content_parts.append(delta.content)
# if not message_started:
# yield RunItemStreamEvent(type="message_start")
# message_started = True
# if delta.tool_calls:
# if reasoning_started:
# yield RunItemStreamEvent(type="reasoning_done")
# reasoning_started = False
# if message_started:
# yield RunItemStreamEvent(type="message_done")
# message_started = False
# for tool_call_delta in delta.tool_calls:
# _update_tool_call_with_delta(
# tool_calls_in_progress, tool_call_delta
# )
# yield chunk
# if not finish_reason:
# continue
# if reasoning_started:
# yield RunItemStreamEvent(type="reasoning_done")
# reasoning_started = False
# if message_started:
# yield RunItemStreamEvent(type="message_done")
# message_started = False
# if tool_choice != "none":
# _try_convert_content_to_tool_calls_for_non_tool_calling_llms(
# tool_calls_in_progress,
# content_parts,
# structured_response_format,
# _next_synthetic_tool_call_id,
# )
# if content_parts:
# new_messages_stateful.append(
# {
# "role": "assistant",
# "content": "".join(content_parts),
# }
# )
# span_generation.span_data.output = new_messages_stateful
# # Execute tool calls outside of the stream loop and generation_span
# if tool_calls_in_progress:
# sorted_tool_calls = sorted(tool_calls_in_progress.items())
# # Build tool calls for the message and execute tools
# assistant_tool_calls: list[ToolCall] = []
# for _, tool_call_data in sorted_tool_calls:
# call_id = tool_call_data["id"]
# name = tool_call_data["name"]
# arguments_str = tool_call_data["arguments"]
# if call_id is None or name is None:
# continue
# assistant_tool_calls.append(
# {
# "id": call_id,
# "type": "function",
# "function": {
# "name": name,
# "arguments": arguments_str,
# },
# }
# )
# yield RunItemStreamEvent(
# type="tool_call",
# details=ToolCallStreamItem(
# call_id=call_id,
# name=name,
# arguments=arguments_str,
# ),
# )
# if name in tools_by_name:
# tools_by_name[name]
# json.loads(arguments_str)
# run_context = RunContextWrapper(context=context)
# TODO: Instead of executing sequentially, execute in parallel
# In practice, it's not a must right now since we don't use parallel
# tool calls, so kicking the can down the road for now.
# TODO broken for now, no need for a run_v2
# output = tool.run_v2(run_context, **arguments)
# yield RunItemStreamEvent(
# type="tool_call_output",
# details=ToolCallOutputStreamItem(
# call_id=call_id,
# output=output,
# ),
# )

View File

@@ -0,0 +1,167 @@
from collections.abc import Sequence
from langchain.schema.messages import BaseMessage
from onyx.agents.agent_sdk.message_types import AgentSDKMessage
from onyx.agents.agent_sdk.message_types import AssistantMessageWithContent
from onyx.agents.agent_sdk.message_types import ImageContent
from onyx.agents.agent_sdk.message_types import InputTextContent
from onyx.agents.agent_sdk.message_types import SystemMessage
from onyx.agents.agent_sdk.message_types import UserMessage
# TODO: Currently, we only support native API input for images. For other
# files, we process the content and share it as text in the message. In
# the future, we might support native file uploads for other types of files.
def base_messages_to_agent_sdk_msgs(
msgs: Sequence[BaseMessage],
is_responses_api: bool,
) -> list[AgentSDKMessage]:
return [_base_message_to_agent_sdk_msg(msg, is_responses_api) for msg in msgs]
def _base_message_to_agent_sdk_msg(
msg: BaseMessage, is_responses_api: bool
) -> AgentSDKMessage:
message_type_to_agent_sdk_role = {
"human": "user",
"system": "system",
"ai": "assistant",
}
role = message_type_to_agent_sdk_role[msg.type]
# Convert content to Agent SDK format
content = msg.content
if isinstance(content, str):
# For system/user/assistant messages, use InputTextContent
if role in ("system", "user"):
input_text_content: list[InputTextContent | ImageContent] = [
InputTextContent(type="input_text", text=content)
]
if role == "system":
# SystemMessage only accepts InputTextContent
system_msg: SystemMessage = {
"role": "system",
"content": [InputTextContent(type="input_text", text=content)],
}
return system_msg
else: # user
user_msg: UserMessage = {
"role": "user",
"content": input_text_content,
}
return user_msg
else: # assistant
assistant_msg: AssistantMessageWithContent
if is_responses_api:
from onyx.agents.agent_sdk.message_types import OutputTextContent
assistant_msg = {
"role": "assistant",
"content": [OutputTextContent(type="output_text", text=content)],
}
else:
assistant_msg = {
"role": "assistant",
"content": [InputTextContent(type="input_text", text=content)],
}
return assistant_msg
elif isinstance(content, list):
# For lists, we need to process based on the role
if role == "assistant":
# For responses API, use OutputTextContent; otherwise use InputTextContent
assistant_content: list[InputTextContent | OutputTextContent] = []
if is_responses_api:
from onyx.agents.agent_sdk.message_types import OutputTextContent
for item in content:
if isinstance(item, str):
assistant_content.append(
OutputTextContent(type="output_text", text=item)
)
elif isinstance(item, dict) and item.get("type") == "text":
assistant_content.append(
OutputTextContent(
type="output_text", text=item.get("text", "")
)
)
else:
raise ValueError(
f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
)
else:
for item in content:
if isinstance(item, str):
assistant_content.append(
InputTextContent(type="input_text", text=item)
)
elif isinstance(item, dict) and item.get("type") == "text":
assistant_content.append(
InputTextContent(
type="input_text", text=item.get("text", "")
)
)
else:
raise ValueError(
f"Unexpected item type for assistant message: {type(item)}. Item: {item}"
)
assistant_msg_list: AssistantMessageWithContent = {
"role": "assistant",
"content": assistant_content,
}
return assistant_msg_list
else: # system or user - use InputTextContent
input_content: list[InputTextContent | ImageContent] = []
for item in content:
if isinstance(item, str):
input_content.append(InputTextContent(type="input_text", text=item))
elif isinstance(item, dict):
item_type = item.get("type")
if item_type == "text":
input_content.append(
InputTextContent(
type="input_text", text=item.get("text", "")
)
)
elif item_type == "image_url":
# Convert image_url to input_image format
image_url = item.get("image_url", {})
if isinstance(image_url, dict):
url = image_url.get("url", "")
else:
url = image_url
input_content.append(
ImageContent(
type="input_image", image_url=url, detail="auto"
)
)
else:
raise ValueError(f"Unexpected item type: {item_type}")
else:
raise ValueError(
f"Unexpected item type: {type(item)}. Item: {item}"
)
if role == "system":
# SystemMessage only accepts InputTextContent (no images)
text_only_content = [
c for c in input_content if c["type"] == "input_text"
]
system_msg_list: SystemMessage = {
"role": "system",
"content": text_only_content, # type: ignore[typeddict-item]
}
return system_msg_list
else: # user
user_msg_list: UserMessage = {
"role": "user",
"content": input_content,
}
return user_msg_list
else:
raise ValueError(
f"Unexpected content type: {type(content)}. Content: {content}"
)

View File

@@ -0,0 +1,125 @@
"""Strongly typed message structures for Agent SDK messages."""
from typing import Literal
from typing import NotRequired
from typing_extensions import TypedDict
class InputTextContent(TypedDict):
type: Literal["input_text"]
text: str
class OutputTextContent(TypedDict):
type: Literal["output_text"]
text: str
TextContent = InputTextContent | OutputTextContent
class ImageContent(TypedDict):
type: Literal["input_image"]
image_url: str
detail: str
# Tool call structures
class ToolCallFunction(TypedDict):
name: str
arguments: str
class ToolCall(TypedDict):
id: str
type: Literal["function"]
function: ToolCallFunction
# Message types
class SystemMessage(TypedDict):
role: Literal["system"]
content: list[InputTextContent] # System messages use input text
class UserMessage(TypedDict):
role: Literal["user"]
content: list[
InputTextContent | ImageContent
] # User messages use input text or images
class AssistantMessageWithContent(TypedDict):
role: Literal["assistant"]
content: list[
InputTextContent | OutputTextContent
] # Assistant messages use output_text for responses API compatibility
class AssistantMessageWithToolCalls(TypedDict):
role: Literal["assistant"]
tool_calls: list[ToolCall]
class AssistantMessageDuringAgentRun(TypedDict):
role: Literal["assistant"]
id: str
content: (
list[InputTextContent | OutputTextContent] | list[ToolCall]
) # Assistant runtime messages receive output_text from agents SDK for responses API compatibility
status: Literal["completed", "failed", "in_progress"]
type: Literal["message"]
class ToolMessage(TypedDict):
role: Literal["tool"]
content: str
tool_call_id: str
class FunctionCallMessage(TypedDict):
"""Agent SDK function call message format."""
type: Literal["function_call"]
id: NotRequired[str]
call_id: str
name: str
arguments: str
class FunctionCallOutputMessage(TypedDict):
"""Agent SDK function call output message format."""
type: Literal["function_call_output"]
call_id: str
output: str
class SummaryText(TypedDict):
"""Summary text item in reasoning messages."""
text: str
type: Literal["summary_text"]
class ReasoningMessage(TypedDict):
"""Agent SDK reasoning message format."""
id: str
type: Literal["reasoning"]
summary: list[SummaryText]
# Union type for all Agent SDK messages
AgentSDKMessage = (
SystemMessage
| UserMessage
| AssistantMessageWithContent
| AssistantMessageWithToolCalls
| AssistantMessageDuringAgentRun
| ToolMessage
| FunctionCallMessage
| FunctionCallOutputMessage
| ReasoningMessage
)

View File

@@ -0,0 +1,36 @@
from typing import Any
from agents.models.openai_responses import Converter as OpenAIResponsesConverter
# TODO: I am very sad that I have to monkey patch this :(
# Basically, OpenAI agents sdk doesn't convert the tool choice correctly
# when they have a built-in tool in their framework, like they do for web_search
# and image_generation.
# Going to open up a thread with OpenAI agents team to see what they recommend
# or what we can fix.
# A discussion is warranted, but we likely want to just write our own LitellmModel for
# the OpenAI agents SDK since they probably don't really care about Litellm and will
# prioritize functionality for their own models.
def monkey_patch_convert_tool_choice_to_ignore_openai_hosted_web_search() -> None:
if (
getattr(OpenAIResponsesConverter.convert_tool_choice, "__name__", "")
== "_patched_convert_tool_choice"
):
return
orig_func = OpenAIResponsesConverter.convert_tool_choice.__func__ # type: ignore[attr-defined]
def _patched_convert_tool_choice(cls: type, tool_choice: Any) -> Any:
# Handle OpenAI hosted tools that we have custom implementations for
# Without this patch, the library uses special formatting that breaks our custom tools
# See: https://platform.openai.com/docs/api-reference/responses/create#responses_create-tool_choice-hosted_tool-type
if tool_choice == "web_search":
return {"type": "function", "name": "web_search"}
if tool_choice == "image_generation":
return {"type": "function", "name": "image_generation"}
return orig_func(cls, tool_choice)
OpenAIResponsesConverter.convert_tool_choice = classmethod( # type: ignore[method-assign, assignment]
_patched_convert_tool_choice
)

View File

@@ -0,0 +1,178 @@
import asyncio
import queue
import threading
from collections.abc import Iterator
from collections.abc import Sequence
from typing import Generic
from typing import Optional
from typing import TypeVar
from agents import Agent
from agents import RunResultStreaming
from agents import TContext
from agents.run import Runner
from onyx.agents.agent_sdk.message_types import AgentSDKMessage
from onyx.utils.threadpool_concurrency import run_in_background
T = TypeVar("T")
class SyncAgentStream(Generic[T]):
"""
Convert an async streamed run into a sync iterator with cooperative cancellation.
Runs the Agent in a background thread.
Usage:
adapter = SyncStreamAdapter(
agent=agent,
input=input,
context=context,
max_turns=100,
queue_maxsize=0, # optional backpressure
)
for ev in adapter: # sync iteration
...
# or cancel from elsewhere:
adapter.cancel()
"""
_SENTINEL = object()
def __init__(
self,
*,
agent: Agent,
input: Sequence[AgentSDKMessage],
context: TContext | None = None,
max_turns: int = 100,
queue_maxsize: int = 0,
) -> None:
self._agent = agent
self._input = input
self._context = context
self._max_turns = max_turns
self._q: "queue.Queue[object]" = queue.Queue(maxsize=queue_maxsize)
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._thread: Optional[threading.Thread] = None
self.streamed: RunResultStreaming | None = None
self._exc: Optional[BaseException] = None
self._cancel_requested = threading.Event()
self._started = threading.Event()
self._done = threading.Event()
self._start_thread()
# ---------- public sync API ----------
def __iter__(self) -> Iterator[T]:
try:
while True:
item = self._q.get()
if item is self._SENTINEL:
# If the consumer thread raised, surface it now
if self._exc is not None:
raise self._exc
# Normal completion
return
yield item # type: ignore[misc,return-value]
finally:
# Ensure we fully clean up whether we exited due to exception,
# StopIteration, or external cancel.
self.close()
def cancel(self) -> bool:
"""
Cooperatively cancel the underlying streamed run and shut down.
Safe to call multiple times and from any thread.
"""
self._cancel_requested.set()
loop = self._loop
streamed = self.streamed
if loop is not None and streamed is not None and not self._done.is_set():
loop.call_soon_threadsafe(streamed.cancel)
return True
return False
def close(self, *, wait: bool = True) -> None:
"""Idempotent shutdown."""
self.cancel()
# ask the loop to stop if it's still running
loop = self._loop
if loop is not None and loop.is_running():
try:
loop.call_soon_threadsafe(loop.stop)
except Exception:
pass
# join the thread
if wait and self._thread is not None and self._thread.is_alive():
self._thread.join(timeout=5.0)
# ---------- internals ----------
def _start_thread(self) -> None:
t = run_in_background(self._thread_main)
self._thread = t
# Optionally wait until the loop/worker is started so .cancel() is safe soon after init
self._started.wait(timeout=1.0)
def _thread_main(self) -> None:
loop = asyncio.new_event_loop()
self._loop = loop
asyncio.set_event_loop(loop)
async def worker() -> None:
try:
# Start the streamed run inside the loop thread
self.streamed = Runner.run_streamed(
self._agent,
self._input, # type: ignore[arg-type]
context=self._context,
max_turns=self._max_turns,
)
# If cancel was requested before we created _streamed, honor it now
if self._cancel_requested.is_set():
await self.streamed.cancel() # type: ignore[func-returns-value]
# Consume async events and forward into the thread-safe queue
async for ev in self.streamed.stream_events():
# Early exit if a late cancel arrives
if self._cancel_requested.is_set():
# Try to cancel gracefully; don't break until cancel takes effect
try:
await self.streamed.cancel() # type: ignore[func-returns-value]
except Exception:
pass
break
# This put() may block if queue_maxsize > 0 (backpressure)
self._q.put(ev)
except BaseException as e:
# Save exception to surface on the sync iterator side
self._exc = e
finally:
# Signal end-of-stream
self._q.put(self._SENTINEL)
self._done.set()
# Mark started and run the worker to completion
self._started.set()
try:
loop.run_until_complete(worker())
finally:
try:
# Drain pending tasks/callbacks safely
pending = asyncio.all_tasks(loop=loop)
for task in pending:
task.cancel()
if pending:
loop.run_until_complete(
asyncio.gather(*pending, return_exceptions=True)
)
except Exception:
pass
finally:
loop.close()
self._loop = None

View File

@@ -0,0 +1,21 @@
# from operator import add
# from typing import Annotated
# from pydantic import BaseModel
# class CoreState(BaseModel):
# """
# This is the core state that is shared across all subgraphs.
# """
# log_messages: Annotated[list[str], add] = []
# current_step_nr: int = 1
# class SubgraphCoreState(BaseModel):
# """
# This is the core state that is shared across all subgraphs.
# """
# log_messages: Annotated[list[str], add] = []

View File

@@ -0,0 +1,62 @@
# from collections.abc import Hashable
# from typing import cast
# from langchain_core.runnables.config import RunnableConfig
# from langgraph.types import Send
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
# from onyx.agents.agent_search.dc_search_analysis.states import (
# ObjectResearchInformationUpdate,
# )
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
# from onyx.agents.agent_search.dc_search_analysis.states import (
# SearchSourcesObjectsUpdate,
# )
# from onyx.agents.agent_search.models import GraphConfig
# def parallel_object_source_research_edge(
# state: SearchSourcesObjectsUpdate, config: RunnableConfig
# ) -> list[Send | Hashable]:
# """
# LangGraph edge to parallelize the research for an individual object and source
# """
# search_objects = state.analysis_objects
# search_sources = state.analysis_sources
# object_source_combinations = [
# (object, source) for object in search_objects for source in search_sources
# ]
# return [
# Send(
# "research_object_source",
# ObjectSourceInput(
# object_source_combination=object_source_combination,
# log_messages=[],
# ),
# )
# for object_source_combination in object_source_combinations
# ]
# def parallel_object_research_consolidation_edge(
# state: ObjectResearchInformationUpdate, config: RunnableConfig
# ) -> list[Send | Hashable]:
# """
# LangGraph edge to parallelize the research for an individual object and source
# """
# cast(GraphConfig, config["metadata"]["config"])
# object_research_information_results = state.object_research_information_results
# return [
# Send(
# "consolidate_object_research",
# ObjectInformationInput(
# object_information=object_information,
# log_messages=[],
# ),
# )
# for object_information in object_research_information_results
# ]

View File

@@ -0,0 +1,103 @@
# from langgraph.graph import END
# from langgraph.graph import START
# from langgraph.graph import StateGraph
# from onyx.agents.agent_search.dc_search_analysis.edges import (
# parallel_object_research_consolidation_edge,
# )
# from onyx.agents.agent_search.dc_search_analysis.edges import (
# parallel_object_source_research_edge,
# )
# from onyx.agents.agent_search.dc_search_analysis.nodes.a1_search_objects import (
# search_objects,
# )
# from onyx.agents.agent_search.dc_search_analysis.nodes.a2_research_object_source import (
# research_object_source,
# )
# from onyx.agents.agent_search.dc_search_analysis.nodes.a3_structure_research_by_object import (
# structure_research_by_object,
# )
# from onyx.agents.agent_search.dc_search_analysis.nodes.a4_consolidate_object_research import (
# consolidate_object_research,
# )
# from onyx.agents.agent_search.dc_search_analysis.nodes.a5_consolidate_research import (
# consolidate_research,
# )
# from onyx.agents.agent_search.dc_search_analysis.states import MainInput
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
# from onyx.utils.logger import setup_logger
# logger = setup_logger()
# test_mode = False
# def divide_and_conquer_graph_builder(test_mode: bool = False) -> StateGraph:
# """
# LangGraph graph builder for the knowledge graph search process.
# """
# graph = StateGraph(
# state_schema=MainState,
# input=MainInput,
# )
# ### Add nodes ###
# graph.add_node(
# "search_objects",
# search_objects,
# )
# graph.add_node(
# "structure_research_by_source",
# structure_research_by_object,
# )
# graph.add_node(
# "research_object_source",
# research_object_source,
# )
# graph.add_node(
# "consolidate_object_research",
# consolidate_object_research,
# )
# graph.add_node(
# "consolidate_research",
# consolidate_research,
# )
# ### Add edges ###
# graph.add_edge(start_key=START, end_key="search_objects")
# graph.add_conditional_edges(
# source="search_objects",
# path=parallel_object_source_research_edge,
# path_map=["research_object_source"],
# )
# graph.add_edge(
# start_key="research_object_source",
# end_key="structure_research_by_source",
# )
# graph.add_conditional_edges(
# source="structure_research_by_source",
# path=parallel_object_research_consolidation_edge,
# path_map=["consolidate_object_research"],
# )
# graph.add_edge(
# start_key="consolidate_object_research",
# end_key="consolidate_research",
# )
# graph.add_edge(
# start_key="consolidate_research",
# end_key=END,
# )
# return graph

View File

@@ -0,0 +1,146 @@
# from typing import cast
# from langchain_core.messages import HumanMessage
# from langchain_core.runnables import RunnableConfig
# from langgraph.types import StreamWriter
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
# from onyx.agents.agent_search.dc_search_analysis.ops import research
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
# from onyx.agents.agent_search.dc_search_analysis.states import (
# SearchSourcesObjectsUpdate,
# )
# from onyx.agents.agent_search.models import GraphConfig
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
# trim_prompt_piece,
# )
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_SEPARATOR
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT
# from onyx.secondary_llm_flows.source_filter import strings_to_document_sources
# from onyx.utils.logger import setup_logger
# from onyx.utils.threadpool_concurrency import run_with_timeout
# logger = setup_logger()
# def search_objects(
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
# ) -> SearchSourcesObjectsUpdate:
# """
# LangGraph node to start the agentic search process.
# """
# graph_config = cast(GraphConfig, config["metadata"]["config"])
# question = graph_config.inputs.prompt_builder.raw_user_query
# search_tool = graph_config.tooling.search_tool
# if search_tool is None or graph_config.inputs.persona is None:
# raise ValueError("Search tool and persona must be provided for DivCon search")
# try:
# instructions = graph_config.inputs.persona.system_prompt or ""
# agent_1_instructions = extract_section(
# instructions, "Agent Step 1:", "Agent Step 2:"
# )
# if agent_1_instructions is None:
# raise ValueError("Agent 1 instructions not found")
# agent_1_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
# agent_1_task = extract_section(
# agent_1_instructions, "Task:", "Independent Research Sources:"
# )
# if agent_1_task is None:
# raise ValueError("Agent 1 task not found")
# agent_1_independent_sources_str = extract_section(
# agent_1_instructions, "Independent Research Sources:", "Output Objective:"
# )
# if agent_1_independent_sources_str is None:
# raise ValueError("Agent 1 Independent Research Sources not found")
# document_sources = strings_to_document_sources(
# [
# x.strip().lower()
# for x in agent_1_independent_sources_str.split(DC_OBJECT_SEPARATOR)
# ]
# )
# agent_1_output_objective = extract_section(
# agent_1_instructions, "Output Objective:"
# )
# if agent_1_output_objective is None:
# raise ValueError("Agent 1 output objective not found")
# except Exception as e:
# raise ValueError(
# f"Agent 1 instructions not found or not formatted correctly: {e}"
# )
# # Extract objects
# if agent_1_base_data is None:
# # Retrieve chunks for objects
# retrieved_docs = research(question, search_tool)[:10]
# document_texts_list = []
# for doc_num, doc in enumerate(retrieved_docs):
# chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
# document_texts_list.append(chunk_text)
# document_texts = "\n\n".join(document_texts_list)
# dc_object_extraction_prompt = DC_OBJECT_NO_BASE_DATA_EXTRACTION_PROMPT.format(
# question=question,
# task=agent_1_task,
# document_text=document_texts,
# objects_of_interest=agent_1_output_objective,
# )
# else:
# dc_object_extraction_prompt = DC_OBJECT_WITH_BASE_DATA_EXTRACTION_PROMPT.format(
# question=question,
# task=agent_1_task,
# base_data=agent_1_base_data,
# objects_of_interest=agent_1_output_objective,
# )
# msg = [
# HumanMessage(
# content=trim_prompt_piece(
# config=graph_config.tooling.primary_llm.config,
# prompt_piece=dc_object_extraction_prompt,
# reserved_str="",
# ),
# )
# ]
# primary_llm = graph_config.tooling.primary_llm
# # Grader
# try:
# llm_response = run_with_timeout(
# 30,
# primary_llm.invoke_langchain,
# prompt=msg,
# timeout_override=30,
# max_tokens=300,
# )
# cleaned_response = (
# str(llm_response.content)
# .replace("```json\n", "")
# .replace("\n```", "")
# .replace("\n", "")
# )
# cleaned_response = cleaned_response.split("OBJECTS:")[1]
# object_list = [x.strip() for x in cleaned_response.split(";")]
# except Exception as e:
# raise ValueError(f"Error in search_objects: {e}")
# return SearchSourcesObjectsUpdate(
# analysis_objects=object_list,
# analysis_sources=document_sources,
# log_messages=["Agent 1 Task done"],
# )

View File

@@ -0,0 +1,180 @@
# from datetime import datetime
# from datetime import timedelta
# from datetime import timezone
# from typing import cast
# from langchain_core.messages import HumanMessage
# from langchain_core.runnables import RunnableConfig
# from langgraph.types import StreamWriter
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
# from onyx.agents.agent_search.dc_search_analysis.ops import research
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectSourceInput
# from onyx.agents.agent_search.dc_search_analysis.states import (
# ObjectSourceResearchUpdate,
# )
# from onyx.agents.agent_search.models import GraphConfig
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
# trim_prompt_piece,
# )
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_SOURCE_RESEARCH_PROMPT
# from onyx.utils.logger import setup_logger
# from onyx.utils.threadpool_concurrency import run_with_timeout
# logger = setup_logger()
# def research_object_source(
# state: ObjectSourceInput,
# config: RunnableConfig,
# writer: StreamWriter = lambda _: None,
# ) -> ObjectSourceResearchUpdate:
# """
# LangGraph node to start the agentic search process.
# """
# datetime.now()
# graph_config = cast(GraphConfig, config["metadata"]["config"])
# search_tool = graph_config.tooling.search_tool
# question = graph_config.inputs.prompt_builder.raw_user_query
# object, document_source = state.object_source_combination
# if search_tool is None or graph_config.inputs.persona is None:
# raise ValueError("Search tool and persona must be provided for DivCon search")
# try:
# instructions = graph_config.inputs.persona.system_prompt or ""
# agent_2_instructions = extract_section(
# instructions, "Agent Step 2:", "Agent Step 3:"
# )
# if agent_2_instructions is None:
# raise ValueError("Agent 2 instructions not found")
# agent_2_task = extract_section(
# agent_2_instructions, "Task:", "Independent Research Sources:"
# )
# if agent_2_task is None:
# raise ValueError("Agent 2 task not found")
# agent_2_time_cutoff = extract_section(
# agent_2_instructions, "Time Cutoff:", "Research Topics:"
# )
# agent_2_research_topics = extract_section(
# agent_2_instructions, "Research Topics:", "Output Objective"
# )
# agent_2_output_objective = extract_section(
# agent_2_instructions, "Output Objective:"
# )
# if agent_2_output_objective is None:
# raise ValueError("Agent 2 output objective not found")
# except Exception:
# raise ValueError(
# "Agent 1 instructions not found or not formatted correctly: {e}"
# )
# # Populate prompt
# # Retrieve chunks for objects
# if agent_2_time_cutoff is not None and agent_2_time_cutoff.strip() != "":
# if agent_2_time_cutoff.strip().endswith("d"):
# try:
# days = int(agent_2_time_cutoff.strip()[:-1])
# agent_2_source_start_time = datetime.now(timezone.utc) - timedelta(
# days=days
# )
# except ValueError:
# raise ValueError(
# f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
# )
# else:
# raise ValueError(
# f"Invalid time cutoff format: {agent_2_time_cutoff}. Expected format: '<number>d'"
# )
# else:
# agent_2_source_start_time = None
# document_sources = [document_source] if document_source else None
# if len(question.strip()) > 0:
# research_area = f"{question} for {object}"
# elif agent_2_research_topics and len(agent_2_research_topics.strip()) > 0:
# research_area = f"{agent_2_research_topics} for {object}"
# else:
# research_area = object
# retrieved_docs = research(
# question=research_area,
# search_tool=search_tool,
# document_sources=document_sources,
# time_cutoff=agent_2_source_start_time,
# )
# # Generate document text
# document_texts_list = []
# for doc_num, doc in enumerate(retrieved_docs):
# chunk_text = "Document " + str(doc_num) + ":\n" + doc.content
# document_texts_list.append(chunk_text)
# document_texts = "\n\n".join(document_texts_list)
# # Built prompt
# today = datetime.now().strftime("%A, %Y-%m-%d")
# dc_object_source_research_prompt = (
# DC_OBJECT_SOURCE_RESEARCH_PROMPT.format(
# today=today,
# question=question,
# task=agent_2_task,
# document_text=document_texts,
# format=agent_2_output_objective,
# )
# .replace("---object---", object)
# .replace("---source---", document_source.value)
# )
# # Run LLM
# msg = [
# HumanMessage(
# content=trim_prompt_piece(
# config=graph_config.tooling.primary_llm.config,
# prompt_piece=dc_object_source_research_prompt,
# reserved_str="",
# ),
# )
# ]
# primary_llm = graph_config.tooling.primary_llm
# # Grader
# try:
# llm_response = run_with_timeout(
# 30,
# primary_llm.invoke_langchain,
# prompt=msg,
# timeout_override=30,
# max_tokens=300,
# )
# cleaned_response = str(llm_response.content).replace("```json\n", "")
# cleaned_response = cleaned_response.split("RESEARCH RESULTS:")[1]
# object_research_results = {
# "object": object,
# "source": document_source.value,
# "research_result": cleaned_response,
# }
# except Exception as e:
# raise ValueError(f"Error in research_object_source: {e}")
# logger.debug("DivCon Step A2 - Object Source Research - completed for an object")
# return ObjectSourceResearchUpdate(
# object_source_research_results=[object_research_results],
# log_messages=["Agent Step 2 done for one object"],
# )

View File

@@ -0,0 +1,48 @@
# from collections import defaultdict
# from typing import Dict
# from typing import List
# from langchain_core.runnables import RunnableConfig
# from langgraph.types import StreamWriter
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
# from onyx.agents.agent_search.dc_search_analysis.states import (
# ObjectResearchInformationUpdate,
# )
# from onyx.utils.logger import setup_logger
# logger = setup_logger()
# def structure_research_by_object(
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
# ) -> ObjectResearchInformationUpdate:
# """
# LangGraph node to start the agentic search process.
# """
# object_source_research_results = state.object_source_research_results
# object_research_information_results: List[Dict[str, str]] = []
# object_research_information_results_list: Dict[str, List[str]] = defaultdict(list)
# for object_source_research in object_source_research_results:
# object = object_source_research["object"]
# source = object_source_research["source"]
# research_result = object_source_research["research_result"]
# object_research_information_results_list[object].append(
# f"Source: {source}\n{research_result}"
# )
# for object, information in object_research_information_results_list.items():
# object_research_information_results.append(
# {"object": object, "information": "\n".join(information)}
# )
# logger.debug("DivCon Step A3 - Object Research Information Structuring - completed")
# return ObjectResearchInformationUpdate(
# object_research_information_results=object_research_information_results,
# log_messages=["A3 - Object Research Information structured"],
# )

View File

@@ -0,0 +1,103 @@
# from typing import cast
# from langchain_core.messages import HumanMessage
# from langchain_core.runnables import RunnableConfig
# from langgraph.types import StreamWriter
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectInformationInput
# from onyx.agents.agent_search.dc_search_analysis.states import ObjectResearchUpdate
# from onyx.agents.agent_search.models import GraphConfig
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
# trim_prompt_piece,
# )
# from onyx.prompts.agents.dc_prompts import DC_OBJECT_CONSOLIDATION_PROMPT
# from onyx.utils.logger import setup_logger
# from onyx.utils.threadpool_concurrency import run_with_timeout
# logger = setup_logger()
# def consolidate_object_research(
# state: ObjectInformationInput,
# config: RunnableConfig,
# writer: StreamWriter = lambda _: None,
# ) -> ObjectResearchUpdate:
# """
# LangGraph node to start the agentic search process.
# """
# graph_config = cast(GraphConfig, config["metadata"]["config"])
# search_tool = graph_config.tooling.search_tool
# question = graph_config.inputs.prompt_builder.raw_user_query
# if search_tool is None or graph_config.inputs.persona is None:
# raise ValueError("Search tool and persona must be provided for DivCon search")
# instructions = graph_config.inputs.persona.system_prompt or ""
# agent_4_instructions = extract_section(
# instructions, "Agent Step 4:", "Agent Step 5:"
# )
# if agent_4_instructions is None:
# raise ValueError("Agent 4 instructions not found")
# agent_4_output_objective = extract_section(
# agent_4_instructions, "Output Objective:"
# )
# if agent_4_output_objective is None:
# raise ValueError("Agent 4 output objective not found")
# object_information = state.object_information
# object = object_information["object"]
# information = object_information["information"]
# # Create a prompt for the object consolidation
# dc_object_consolidation_prompt = DC_OBJECT_CONSOLIDATION_PROMPT.format(
# question=question,
# object=object,
# information=information,
# format=agent_4_output_objective,
# )
# # Run LLM
# msg = [
# HumanMessage(
# content=trim_prompt_piece(
# config=graph_config.tooling.primary_llm.config,
# prompt_piece=dc_object_consolidation_prompt,
# reserved_str="",
# ),
# )
# ]
# primary_llm = graph_config.tooling.primary_llm
# # Grader
# try:
# llm_response = run_with_timeout(
# 30,
# primary_llm.invoke_langchain,
# prompt=msg,
# timeout_override=30,
# max_tokens=300,
# )
# cleaned_response = str(llm_response.content).replace("```json\n", "")
# consolidated_information = cleaned_response.split("INFORMATION:")[1]
# except Exception as e:
# raise ValueError(f"Error in consolidate_object_research: {e}")
# object_research_results = {
# "object": object,
# "research_result": consolidated_information,
# }
# logger.debug(
# "DivCon Step A4 - Object Research Consolidation - completed for an object"
# )
# return ObjectResearchUpdate(
# object_research_results=[object_research_results],
# log_messages=["Agent Source Consilidation done"],
# )

View File

@@ -0,0 +1,127 @@
# from typing import cast
# from langchain_core.messages import HumanMessage
# from langchain_core.runnables import RunnableConfig
# from langgraph.types import StreamWriter
# from onyx.agents.agent_search.dc_search_analysis.ops import extract_section
# from onyx.agents.agent_search.dc_search_analysis.states import MainState
# from onyx.agents.agent_search.dc_search_analysis.states import ResearchUpdate
# from onyx.agents.agent_search.models import GraphConfig
# from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
# trim_prompt_piece,
# )
# from onyx.agents.agent_search.shared_graph_utils.llm import stream_llm_answer
# from onyx.prompts.agents.dc_prompts import DC_FORMATTING_NO_BASE_DATA_PROMPT
# from onyx.prompts.agents.dc_prompts import DC_FORMATTING_WITH_BASE_DATA_PROMPT
# from onyx.utils.logger import setup_logger
# from onyx.utils.threadpool_concurrency import run_with_timeout
# logger = setup_logger()
# def consolidate_research(
# state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
# ) -> ResearchUpdate:
# """
# LangGraph node to start the agentic search process.
# """
# graph_config = cast(GraphConfig, config["metadata"]["config"])
# search_tool = graph_config.tooling.search_tool
# if search_tool is None or graph_config.inputs.persona is None:
# raise ValueError("Search tool and persona must be provided for DivCon search")
# # Populate prompt
# instructions = graph_config.inputs.persona.system_prompt or ""
# try:
# agent_5_instructions = extract_section(
# instructions, "Agent Step 5:", "Agent End"
# )
# if agent_5_instructions is None:
# raise ValueError("Agent 5 instructions not found")
# agent_5_base_data = extract_section(instructions, "|Start Data|", "|End Data|")
# agent_5_task = extract_section(
# agent_5_instructions, "Task:", "Independent Research Sources:"
# )
# if agent_5_task is None:
# raise ValueError("Agent 5 task not found")
# agent_5_output_objective = extract_section(
# agent_5_instructions, "Output Objective:"
# )
# if agent_5_output_objective is None:
# raise ValueError("Agent 5 output objective not found")
# except ValueError as e:
# raise ValueError(
# f"Instructions for Agent Step 5 were not properly formatted: {e}"
# )
# research_result_list = []
# if agent_5_task.strip() == "*concatenate*":
# object_research_results = state.object_research_results
# for object_research_result in object_research_results:
# object = object_research_result["object"]
# research_result = object_research_result["research_result"]
# research_result_list.append(f"Object: {object}\n\n{research_result}")
# research_results = "\n\n".join(research_result_list)
# else:
# raise NotImplementedError("Only '*concatenate*' is currently supported")
# # Create a prompt for the object consolidation
# if agent_5_base_data is None:
# dc_formatting_prompt = DC_FORMATTING_NO_BASE_DATA_PROMPT.format(
# text=research_results,
# format=agent_5_output_objective,
# )
# else:
# dc_formatting_prompt = DC_FORMATTING_WITH_BASE_DATA_PROMPT.format(
# base_data=agent_5_base_data,
# text=research_results,
# format=agent_5_output_objective,
# )
# # Run LLM
# msg = [
# HumanMessage(
# content=trim_prompt_piece(
# config=graph_config.tooling.primary_llm.config,
# prompt_piece=dc_formatting_prompt,
# reserved_str="",
# ),
# )
# ]
# try:
# _ = run_with_timeout(
# 60,
# lambda: stream_llm_answer(
# llm=graph_config.tooling.primary_llm,
# prompt=msg,
# event_name="initial_agent_answer",
# writer=writer,
# agent_answer_level=0,
# agent_answer_question_num=0,
# agent_answer_type="agent_level_answer",
# timeout_override=30,
# max_tokens=None,
# ),
# )
# except Exception as e:
# raise ValueError(f"Error in consolidate_research: {e}")
# logger.debug("DivCon Step A5 - Final Generation - completed")
# return ResearchUpdate(
# research_results=research_results,
# log_messages=["Agent Source Consilidation done"],
# )

View File

@@ -0,0 +1,50 @@
# from datetime import datetime
# from typing import cast
# from onyx.chat.models import LlmDoc
# from onyx.configs.constants import DocumentSource
# from onyx.tools.models import SearchToolOverrideKwargs
# from onyx.tools.tool_implementations.search.search_tool import (
# FINAL_CONTEXT_DOCUMENTS_ID,
# )
# from onyx.tools.tool_implementations.search.search_tool import SearchTool
# def research(
# question: str,
# search_tool: SearchTool,
# document_sources: list[DocumentSource] | None = None,
# time_cutoff: datetime | None = None,
# ) -> list[LlmDoc]:
# # new db session to avoid concurrency issues
# retrieved_docs: list[LlmDoc] = []
# for tool_response in search_tool.run(
# query=question,
# override_kwargs=SearchToolOverrideKwargs(original_query=question),
# ):
# # get retrieved docs to send to the rest of the graph
# if tool_response.id == FINAL_CONTEXT_DOCUMENTS_ID:
# retrieved_docs = cast(list[LlmDoc], tool_response.response)[:10]
# break
# return retrieved_docs
# def extract_section(
# text: str, start_marker: str, end_marker: str | None = None
# ) -> str | None:
# """Extract text between markers, returning None if markers not found"""
# parts = text.split(start_marker)
# if len(parts) == 1:
# return None
# after_start = parts[1].strip()
# if not end_marker:
# return after_start
# extract = after_start.split(end_marker)[0]
# return extract.strip()

View File

@@ -0,0 +1,72 @@
# from operator import add
# from typing import Annotated
# from typing import Dict
# from typing import TypedDict
# from pydantic import BaseModel
# from onyx.agents.agent_search.core_state import CoreState
# from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
# from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
# from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
# from onyx.configs.constants import DocumentSource
# ### States ###
# class LoggerUpdate(BaseModel):
# log_messages: Annotated[list[str], add] = []
# class SearchSourcesObjectsUpdate(LoggerUpdate):
# analysis_objects: list[str] = []
# analysis_sources: list[DocumentSource] = []
# class ObjectSourceInput(LoggerUpdate):
# object_source_combination: tuple[str, DocumentSource]
# class ObjectSourceResearchUpdate(LoggerUpdate):
# object_source_research_results: Annotated[list[Dict[str, str]], add] = []
# class ObjectInformationInput(LoggerUpdate):
# object_information: Dict[str, str]
# class ObjectResearchInformationUpdate(LoggerUpdate):
# object_research_information_results: Annotated[list[Dict[str, str]], add] = []
# class ObjectResearchUpdate(LoggerUpdate):
# object_research_results: Annotated[list[Dict[str, str]], add] = []
# class ResearchUpdate(LoggerUpdate):
# research_results: str | None = None
# ## Graph Input State
# class MainInput(CoreState):
# pass
# ## Graph State
# class MainState(
# # This includes the core state
# MainInput,
# ToolChoiceInput,
# ToolCallUpdate,
# ToolChoiceUpdate,
# SearchSourcesObjectsUpdate,
# ObjectSourceResearchUpdate,
# ObjectResearchInformationUpdate,
# ObjectResearchUpdate,
# ResearchUpdate,
# ):
# pass
# ## Graph Output State - presently not used
# class MainOutput(TypedDict):
# log_messages: list[str]

View File

@@ -0,0 +1,36 @@
# from pydantic import BaseModel
# class RefinementSubQuestion(BaseModel):
# sub_question: str
# sub_question_id: str
# verified: bool
# answered: bool
# answer: str
# class AgentTimings(BaseModel):
# base_duration_s: float | None
# refined_duration_s: float | None
# full_duration_s: float | None
# class AgentBaseMetrics(BaseModel):
# num_verified_documents_total: int | None
# num_verified_documents_core: int | None
# verified_avg_score_core: float | None
# num_verified_documents_base: int | float | None
# verified_avg_score_base: float | None = None
# base_doc_boost_factor: float | None = None
# support_boost_factor: float | None = None
# duration_s: float | None = None
# class AgentRefinedMetrics(BaseModel):
# refined_doc_boost_factor: float | None = None
# refined_question_boost_factor: float | None = None
# duration_s: float | None = None
# class AgentAdditionalMetrics(BaseModel):
# pass

Some files were not shown because too many files have changed in this diff Show More