Compare commits

..

7 Commits

Author SHA1 Message Date
Nik
8bb13ec110 fix(devtools): disable post-checkout hook during cherry-pick branch switches
SKIP=uv-sync env var doesn't reliably prevent uv-sync from running
(pre-commit fires the hook twice and the first invocation ignores SKIP).
Instead, temporarily rename .git/hooks/post-checkout before branch
switches and restore it when ods exits. This guarantees no post-checkout
hooks fire during ods-initiated branch switches.
2026-02-26 09:22:07 -08:00
Nik
8066d3477b fix(devtools): use SKIP=uv-sync to prevent binary overwrite during cherry-pick
The stash/restore approach cannot work: uv-sync rebuilds the ods binary
from the target branch's source during post-checkout, and the restored
binary gets overwritten again. Instead, set SKIP=uv-sync in the process
environment so pre-commit skips uv-sync during ods-initiated branch
switches. The env var only affects child processes of ods and does not
persist after ods exits.
2026-02-25 20:57:46 -08:00
Nik
97fcebbe50 fix(devtools): replace SKIP=uv-sync with stash/restore binary approach
Let uv-sync run normally (keeping deps correct for pre-commit hooks)
but restore the ods binary afterward. Key fixes over prior attempt:

- Persist OdsBinaryPath in CherryPickState so --continue can restore
- Remove target file before copying to avoid ETXTBSY on macOS
- Use os.Executable() directly (no EvalSymlinks indirection)
- Log at Info level for visibility into stash/restore operations
- Verify restore succeeded by checking file size
2026-02-25 20:49:20 -08:00
Nik
f8da89c1af fix(devtools): use SKIP=uv-sync to prevent binary overwrite during cherry-pick
Previous approaches (re-exec, RestoreOdsBinary) failed because the old
binary on release branches doesn't have the new code. Instead, set
SKIP=uv-sync in the process environment so pre-commit's post-checkout
hook skips the uv-sync hook entirely during ods-initiated branch
switches. This prevents the problem at the source.
2026-02-25 20:34:57 -08:00
Nik
661ffae6c6 fix(devtools): restore ods binary after branch switches instead of re-exec
The re-exec approach fails because the old binary on release branches
doesn't have the re-exec code. Instead, after every git switch/checkout
in the cherry-pick flow, copy the stashed binary back to .venv/bin/ods
so that uv-sync's overwrite is immediately undone. This way the user
always gets the new binary when running ods cp --continue.
2026-02-25 20:20:39 -08:00
Nik
a46f697033 fix(devtools): move re-exec to main() before Cobra parses subcommands
The old ods binary on release branches doesn't know about `cp` at all,
so Cobra rejects the command before reaching runCherryPickContinue().
Moving ReExecFromStashedBinary() to main() ensures the stashed binary
takes over before any argument parsing happens.
2026-02-25 20:16:14 -08:00
Nik
d9f817e40d fix(devtools): persist ods binary across branch switches during cherry-pick
When `ods cp` switches to a release branch, the uv-sync post-checkout
hook rebuilds onyx-devtools from that branch's source, overwriting
.venv/bin/ods with an older version that may lack `cp` and `--continue`.

Fix: stash the running binary to .git/ods-bin at the start of a
cherry-pick, and re-exec from it on --continue if the installed
binary has been overwritten. Cleaned up when the operation completes.
2026-02-25 20:05:32 -08:00
111 changed files with 1061 additions and 5120 deletions

View File

@@ -9,8 +9,7 @@ inputs:
required: true
provider-api-key:
description: "API key for NIGHTLY_LLM_API_KEY"
required: false
default: ""
required: true
strict:
description: "String true/false for NIGHTLY_LLM_STRICT"
required: true
@@ -18,14 +17,6 @@ inputs:
description: "Optional NIGHTLY_LLM_API_BASE"
required: false
default: ""
api-version:
description: "Optional NIGHTLY_LLM_API_VERSION"
required: false
default: ""
deployment-name:
description: "Optional NIGHTLY_LLM_DEPLOYMENT_NAME"
required: false
default: ""
custom-config-json:
description: "Optional NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
required: false
@@ -68,7 +59,6 @@ runs:
DISABLE_TELEMETRY=true
INTEGRATION_TESTS_MODE=true
AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
AWS_REGION_NAME=us-west-2
ONYX_BACKEND_IMAGE=${ECR_CACHE}:nightly-llm-it-backend-${RUN_ID}
ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:nightly-llm-it-model-server-${RUN_ID}
EOF2
@@ -92,8 +82,6 @@ runs:
NIGHTLY_LLM_PROVIDER: ${{ inputs.provider }}
NIGHTLY_LLM_API_KEY: ${{ inputs.provider-api-key }}
NIGHTLY_LLM_API_BASE: ${{ inputs.api-base }}
NIGHTLY_LLM_API_VERSION: ${{ inputs.api-version }}
NIGHTLY_LLM_DEPLOYMENT_NAME: ${{ inputs.deployment-name }}
NIGHTLY_LLM_CUSTOM_CONFIG_JSON: ${{ inputs.custom-config-json }}
NIGHTLY_LLM_STRICT: ${{ inputs.strict }}
RUNS_ON_ECR_CACHE: ${{ inputs.runs-on-ecr-cache }}
@@ -103,6 +91,11 @@ runs:
max_attempts: 2
retry_wait_seconds: 10
command: |
if [ -z "${MODELS}" ]; then
echo "Input 'models' must be non-empty for provider '${NIGHTLY_LLM_PROVIDER}'."
exit 1
fi
docker run --rm --network onyx_default \
--name test-runner \
-e POSTGRES_HOST=relational_db \
@@ -117,13 +110,10 @@ runs:
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e TEST_WEB_HOSTNAME=test-runner \
-e AWS_REGION_NAME=us-west-2 \
-e NIGHTLY_LLM_PROVIDER="${NIGHTLY_LLM_PROVIDER}" \
-e NIGHTLY_LLM_MODELS="${MODELS}" \
-e NIGHTLY_LLM_API_KEY="${NIGHTLY_LLM_API_KEY}" \
-e NIGHTLY_LLM_API_BASE="${NIGHTLY_LLM_API_BASE}" \
-e NIGHTLY_LLM_API_VERSION="${NIGHTLY_LLM_API_VERSION}" \
-e NIGHTLY_LLM_DEPLOYMENT_NAME="${NIGHTLY_LLM_DEPLOYMENT_NAME}" \
-e NIGHTLY_LLM_CUSTOM_CONFIG_JSON="${NIGHTLY_LLM_CUSTOM_CONFIG_JSON}" \
-e NIGHTLY_LLM_STRICT="${NIGHTLY_LLM_STRICT}" \
${RUNS_ON_ECR_CACHE}:nightly-llm-it-${RUN_ID} \

View File

@@ -0,0 +1,44 @@
name: Nightly LLM Provider Chat Tests (OpenAI)
concurrency:
group: Nightly-LLM-Provider-Chat-OpenAI-${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
on:
schedule:
# Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)
- cron: "30 10 * * *"
workflow_dispatch:
permissions:
contents: read
jobs:
openai-provider-chat-test:
uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
with:
provider: openai
models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
strict: true
secrets:
provider_api_key: ${{ secrets.OPENAI_API_KEY }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
notify-slack-on-failure:
needs: [openai-provider-chat-test]
if: failure() && github.event_name == 'schedule'
runs-on: ubuntu-slim
timeout-minutes: 5
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
failed-jobs: openai-provider-chat-test
title: "🚨 Scheduled OpenAI Provider Chat Tests failed!"
ref-name: ${{ github.ref_name }}

View File

@@ -1,56 +0,0 @@
name: Nightly LLM Provider Chat Tests
concurrency:
group: Nightly-LLM-Provider-Chat-${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
on:
schedule:
# Runs daily at 10:30 UTC (2:30 AM PST / 3:30 AM PDT)
- cron: "30 10 * * *"
workflow_dispatch:
permissions:
contents: read
jobs:
provider-chat-test:
uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
with:
openai_models: ${{ vars.NIGHTLY_LLM_OPENAI_MODELS }}
anthropic_models: ${{ vars.NIGHTLY_LLM_ANTHROPIC_MODELS }}
bedrock_models: ${{ vars.NIGHTLY_LLM_BEDROCK_MODELS }}
vertex_ai_models: ${{ vars.NIGHTLY_LLM_VERTEX_AI_MODELS }}
azure_models: ${{ vars.NIGHTLY_LLM_AZURE_MODELS }}
azure_api_base: ${{ vars.NIGHTLY_LLM_AZURE_API_BASE }}
ollama_models: ${{ vars.NIGHTLY_LLM_OLLAMA_MODELS }}
openrouter_models: ${{ vars.NIGHTLY_LLM_OPENROUTER_MODELS }}
strict: true
secrets:
openai_api_key: ${{ secrets.OPENAI_API_KEY }}
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
bedrock_api_key: ${{ secrets.BEDROCK_API_KEY }}
vertex_ai_custom_config_json: ${{ secrets.NIGHTLY_LLM_VERTEX_AI_CUSTOM_CONFIG_JSON }}
azure_api_key: ${{ secrets.AZURE_API_KEY }}
ollama_api_key: ${{ secrets.OLLAMA_API_KEY }}
openrouter_api_key: ${{ secrets.OPENROUTER_API_KEY }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
notify-slack-on-failure:
needs: [provider-chat-test]
if: failure() && github.event_name == 'schedule'
runs-on: ubuntu-slim
timeout-minutes: 5
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
with:
persist-credentials: false
- name: Send Slack notification
uses: ./.github/actions/slack-notify
with:
webhook-url: ${{ secrets.SLACK_WEBHOOK }}
failed-jobs: provider-chat-test
title: "🚨 Scheduled LLM Provider Chat Tests failed!"
ref-name: ${{ github.ref_name }}

View File

@@ -89,10 +89,6 @@ env:
SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ vars.SHAREPOINT_CLIENT_DIRECTORY_ID }}
SHAREPOINT_SITE: ${{ vars.SHAREPOINT_SITE }}
PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
# Github
ACCESS_TOKEN_GITHUB: ${{ secrets.ACCESS_TOKEN_GITHUB }}

View File

@@ -3,66 +3,33 @@ name: Reusable Nightly LLM Provider Chat Tests
on:
workflow_call:
inputs:
openai_models:
description: "Comma-separated models for openai"
required: false
default: ""
provider:
description: "Provider slug passed to NIGHTLY_LLM_PROVIDER (e.g. openai, anthropic)"
required: true
type: string
anthropic_models:
description: "Comma-separated models for anthropic"
required: false
default: ""
type: string
bedrock_models:
description: "Comma-separated models for bedrock"
required: false
default: ""
type: string
vertex_ai_models:
description: "Comma-separated models for vertex_ai"
required: false
default: ""
type: string
azure_models:
description: "Comma-separated models for azure"
required: false
default: ""
type: string
ollama_models:
description: "Comma-separated models for ollama_chat"
required: false
default: ""
type: string
openrouter_models:
description: "Comma-separated models for openrouter"
required: false
default: ""
type: string
azure_api_base:
description: "API base for azure provider"
required: false
default: ""
models:
description: "Comma-separated model list passed to NIGHTLY_LLM_MODELS"
required: true
type: string
strict:
description: "Default NIGHTLY_LLM_STRICT passed to tests"
description: "Pass-through value for NIGHTLY_LLM_STRICT"
required: false
default: true
type: boolean
api_base:
description: "Optional NIGHTLY_LLM_API_BASE override"
required: false
default: ""
type: string
custom_config_json:
description: "Optional NIGHTLY_LLM_CUSTOM_CONFIG_JSON override"
required: false
default: ""
type: string
secrets:
openai_api_key:
required: false
anthropic_api_key:
required: false
bedrock_api_key:
required: false
vertex_ai_custom_config_json:
required: false
azure_api_key:
required: false
ollama_api_key:
required: false
openrouter_api_key:
required: false
provider_api_key:
description: "Provider API key passed to NIGHTLY_LLM_API_KEY"
required: true
DOCKER_USERNAME:
required: true
DOCKER_TOKEN:
@@ -71,8 +38,29 @@ on:
permissions:
contents: read
env:
NIGHTLY_LLM_PROVIDER: ${{ inputs.provider }}
NIGHTLY_LLM_MODELS: ${{ inputs.models }}
NIGHTLY_LLM_API_KEY: ${{ secrets.provider_api_key }}
NIGHTLY_LLM_API_BASE: ${{ inputs.api_base }}
NIGHTLY_LLM_CUSTOM_CONFIG_JSON: ${{ inputs.custom_config_json }}
NIGHTLY_LLM_STRICT: ${{ inputs.strict && 'true' || 'false' }}
jobs:
validate-inputs:
# NOTE: Keep this cheap and fail before image builds if required inputs are missing.
runs-on: ubuntu-slim
timeout-minutes: 5
steps:
- name: Validate required nightly provider inputs
run: |
if [ -z "${NIGHTLY_LLM_MODELS}" ]; then
echo "Input 'models' must be non-empty for provider '${NIGHTLY_LLM_PROVIDER}'."
exit 1
fi
build-backend-image:
needs: [validate-inputs]
runs-on:
[
runs-on,
@@ -102,6 +90,7 @@ jobs:
docker-no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' && 'true' || 'false' }}
build-model-server-image:
needs: [validate-inputs]
runs-on:
[
runs-on,
@@ -130,6 +119,7 @@ jobs:
docker-token: ${{ secrets.DOCKER_TOKEN }}
build-integration-image:
needs: [validate-inputs]
runs-on:
[
runs-on,
@@ -159,75 +149,11 @@ jobs:
provider-chat-test:
needs:
[
build-backend-image,
build-model-server-image,
build-integration-image,
]
strategy:
fail-fast: false
matrix:
include:
- provider: openai
models: ${{ inputs.openai_models }}
api_key_secret: openai_api_key
custom_config_secret: ""
api_base: ""
api_version: ""
deployment_name: ""
required: true
- provider: anthropic
models: ${{ inputs.anthropic_models }}
api_key_secret: anthropic_api_key
custom_config_secret: ""
api_base: ""
api_version: ""
deployment_name: ""
required: true
- provider: bedrock
models: ${{ inputs.bedrock_models }}
api_key_secret: bedrock_api_key
custom_config_secret: ""
api_base: ""
api_version: ""
deployment_name: ""
required: false
- provider: vertex_ai
models: ${{ inputs.vertex_ai_models }}
api_key_secret: ""
custom_config_secret: vertex_ai_custom_config_json
api_base: ""
api_version: ""
deployment_name: ""
required: false
- provider: azure
models: ${{ inputs.azure_models }}
api_key_secret: azure_api_key
custom_config_secret: ""
api_base: ${{ inputs.azure_api_base }}
api_version: "2025-04-01-preview"
deployment_name: ""
required: false
- provider: ollama_chat
models: ${{ inputs.ollama_models }}
api_key_secret: ollama_api_key
custom_config_secret: ""
api_base: "https://ollama.com"
api_version: ""
deployment_name: ""
required: false
- provider: openrouter
models: ${{ inputs.openrouter_models }}
api_key_secret: openrouter_api_key
custom_config_secret: ""
api_base: "https://openrouter.ai/api/v1"
api_version: ""
deployment_name: ""
required: false
[build-backend-image, build-model-server-image, build-integration-image]
runs-on:
- runs-on
- runner=4cpu-linux-arm64
- "run-id=${{ github.run_id }}-nightly-${{ matrix.provider }}-provider-chat-test"
- "run-id=${{ github.run_id }}-nightly-${{ inputs.provider }}-provider-chat-test"
- extras=ecr-cache
timeout-minutes: 45
steps:
@@ -241,14 +167,12 @@ jobs:
- name: Run nightly provider chat test
uses: ./.github/actions/run-nightly-provider-chat-test
with:
provider: ${{ matrix.provider }}
models: ${{ matrix.models }}
provider-api-key: ${{ matrix.api_key_secret && secrets[matrix.api_key_secret] || '' }}
strict: ${{ inputs.strict && 'true' || 'false' }}
api-base: ${{ matrix.api_base }}
api-version: ${{ matrix.api_version }}
deployment-name: ${{ matrix.deployment_name }}
custom-config-json: ${{ matrix.custom_config_secret && secrets[matrix.custom_config_secret] || '' }}
provider: ${{ env.NIGHTLY_LLM_PROVIDER }}
models: ${{ env.NIGHTLY_LLM_MODELS }}
provider-api-key: ${{ secrets.provider_api_key }}
strict: ${{ env.NIGHTLY_LLM_STRICT }}
api-base: ${{ env.NIGHTLY_LLM_API_BASE }}
custom-config-json: ${{ env.NIGHTLY_LLM_CUSTOM_CONFIG_JSON }}
runs-on-ecr-cache: ${{ env.RUNS_ON_ECR_CACHE }}
run-id: ${{ github.run_id }}
docker-username: ${{ secrets.DOCKER_USERNAME }}
@@ -270,7 +194,7 @@ jobs:
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: docker-all-logs-nightly-${{ matrix.provider }}-llm-provider
name: docker-all-logs-nightly-${{ inputs.provider }}-llm-provider
path: |
${{ github.workspace }}/api_server.log
${{ github.workspace }}/docker-compose.log

View File

@@ -1,69 +0,0 @@
"""add python tool on default
Revision ID: 57122d037335
Revises: c0c937d5c9e5
Create Date: 2026-02-27 10:10:40.124925
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "57122d037335"
down_revision = "c0c937d5c9e5"
branch_labels = None
depends_on = None
PYTHON_TOOL_NAME = "python"
def upgrade() -> None:
conn = op.get_bind()
# Look up the PythonTool id
result = conn.execute(
sa.text("SELECT id FROM tool WHERE name = :name"),
{"name": PYTHON_TOOL_NAME},
).fetchone()
if not result:
return
tool_id = result[0]
# Attach to the default persona (id=0) if not already attached
conn.execute(
sa.text(
"""
INSERT INTO persona__tool (persona_id, tool_id)
VALUES (0, :tool_id)
ON CONFLICT DO NOTHING
"""
),
{"tool_id": tool_id},
)
def downgrade() -> None:
conn = op.get_bind()
result = conn.execute(
sa.text("SELECT id FROM tool WHERE name = :name"),
{"name": PYTHON_TOOL_NAME},
).fetchone()
if not result:
return
conn.execute(
sa.text(
"""
DELETE FROM persona__tool
WHERE persona_id = 0 AND tool_id = :tool_id
"""
),
{"tool_id": result[0]},
)

View File

@@ -1,70 +0,0 @@
"""llm provider deprecate fields
Revision ID: c0c937d5c9e5
Revises: 8ffcc2bcfc11
Create Date: 2026-02-25 17:35:46.125102
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "c0c937d5c9e5"
down_revision = "8ffcc2bcfc11"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Make default_model_name nullable (was NOT NULL)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=True,
)
# Drop unique constraint on is_default_provider (defaults now tracked via LLMModelFlow)
op.drop_constraint(
"llm_provider_is_default_provider_key",
"llm_provider",
type_="unique",
)
# Remove server_default from is_default_vision_provider (was server_default=false())
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=None,
)
def downgrade() -> None:
# Restore default_model_name to NOT NULL (set empty string for any NULLs first)
op.execute(
"UPDATE llm_provider SET default_model_name = '' WHERE default_model_name IS NULL"
)
op.alter_column(
"llm_provider",
"default_model_name",
existing_type=sa.String(),
nullable=False,
)
# Restore unique constraint on is_default_provider
op.create_unique_constraint(
"llm_provider_is_default_provider_key",
"llm_provider",
["is_default_provider"],
)
# Restore server_default for is_default_vision_provider
op.alter_column(
"llm_provider",
"is_default_vision_provider",
existing_type=sa.Boolean(),
server_default=sa.false(),
)

View File

@@ -4,6 +4,7 @@ from collections import deque
from collections.abc import Callable
from collections.abc import Generator
from typing import Any
from urllib.parse import unquote
from urllib.parse import urlparse
import requests as _requests
@@ -597,12 +598,8 @@ def get_external_access_from_sharepoint(
)
elif site_page:
site_url = site_page.get("webUrl")
# Keep percent-encoding intact so the path matches the encoding
# used by the Office365 library's SPResPath.create_relative(),
# which compares against urlparse(context.base_url).path.
# Decoding (e.g. %27 → ') causes a mismatch that duplicates
# the site prefix in the constructed URL.
server_relative_url = urlparse(site_url).path
# Prefer server-relative URL to avoid OData filters that break on apostrophes
server_relative_url = unquote(urlparse(site_url).path)
file_obj = client_context.web.get_file_by_server_relative_url(
server_relative_url
)

View File

@@ -322,7 +322,6 @@ def list_users(
"""List users with optional SCIM filter and pagination."""
dal = ScimDAL(db_session)
dal.update_token_last_used(_token.id)
dal.commit()
try:
scim_filter = parse_scim_filter(filter)
@@ -366,7 +365,6 @@ def get_user(
"""Get a single user by ID."""
dal = ScimDAL(db_session)
dal.update_token_last_used(_token.id)
dal.commit()
result = _fetch_user_or_404(user_id, dal)
if isinstance(result, ScimJSONResponse):
@@ -723,7 +721,6 @@ def list_groups(
"""List groups with optional SCIM filter and pagination."""
dal = ScimDAL(db_session)
dal.update_token_last_used(_token.id)
dal.commit()
try:
scim_filter = parse_scim_filter(filter)
@@ -760,7 +757,6 @@ def get_group(
"""Get a single group by ID."""
dal = ScimDAL(db_session)
dal.update_token_last_used(_token.id)
dal.commit()
result = _fetch_group_or_404(group_id, dal)
if isinstance(result, ScimJSONResponse):

View File

@@ -58,27 +58,16 @@ class OAuthTokenManager:
if not user_token.token_data:
raise ValueError("No token data available for refresh")
if (
self.oauth_config.client_id is None
or self.oauth_config.client_secret is None
):
raise ValueError(
"OAuth client_id and client_secret are required for token refresh"
)
token_data = self._unwrap_token_data(user_token.token_data)
data: dict[str, str] = {
"grant_type": "refresh_token",
"refresh_token": token_data["refresh_token"],
"client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
"client_secret": self._unwrap_sensitive_str(
self.oauth_config.client_secret
),
}
response = requests.post(
self.oauth_config.token_url,
data=data,
data={
"grant_type": "refresh_token",
"refresh_token": token_data["refresh_token"],
"client_id": self.oauth_config.client_id,
"client_secret": self.oauth_config.client_secret,
},
headers={"Accept": "application/json"},
)
response.raise_for_status()
@@ -126,26 +115,15 @@ class OAuthTokenManager:
def exchange_code_for_token(self, code: str, redirect_uri: str) -> dict[str, Any]:
"""Exchange authorization code for access token"""
if (
self.oauth_config.client_id is None
or self.oauth_config.client_secret is None
):
raise ValueError(
"OAuth client_id and client_secret are required for code exchange"
)
data: dict[str, str] = {
"grant_type": "authorization_code",
"code": code,
"client_id": self._unwrap_sensitive_str(self.oauth_config.client_id),
"client_secret": self._unwrap_sensitive_str(
self.oauth_config.client_secret
),
"redirect_uri": redirect_uri,
}
response = requests.post(
self.oauth_config.token_url,
data=data,
data={
"grant_type": "authorization_code",
"code": code,
"client_id": self.oauth_config.client_id,
"client_secret": self.oauth_config.client_secret,
"redirect_uri": redirect_uri,
},
headers={"Accept": "application/json"},
)
response.raise_for_status()
@@ -163,13 +141,8 @@ class OAuthTokenManager:
oauth_config: OAuthConfig, redirect_uri: str, state: str
) -> str:
"""Build OAuth authorization URL"""
if oauth_config.client_id is None:
raise ValueError("OAuth client_id is required to build authorization URL")
params: dict[str, Any] = {
"client_id": OAuthTokenManager._unwrap_sensitive_str(
oauth_config.client_id
),
"client_id": oauth_config.client_id,
"redirect_uri": redirect_uri,
"response_type": "code",
"state": state,
@@ -188,12 +161,6 @@ class OAuthTokenManager:
return f"{oauth_config.authorization_url}{separator}{urlencode(params)}"
@staticmethod
def _unwrap_sensitive_str(value: SensitiveValue[str] | str) -> str:
if isinstance(value, SensitiveValue):
return value.get_value(apply_mask=False)
return value
@staticmethod
def _unwrap_token_data(
token_data: SensitiveValue[dict[str, Any]] | dict[str, Any],

View File

@@ -48,7 +48,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchDocumentIndex,
)
from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
from onyx.indexing.models import IndexingSetting
from onyx.redis.redis_pool import get_redis_client
from shared_configs.configs import MULTI_TENANT
from shared_configs.contextvars import get_current_tenant_id
@@ -150,12 +149,8 @@ def migrate_chunks_from_vespa_to_opensearch_task(
try_insert_opensearch_tenant_migration_record_with_commit(db_session)
search_settings = get_current_search_settings(db_session)
tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
indexing_setting = IndexingSetting.from_db_model(search_settings)
opensearch_document_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
index_name=search_settings.index_name, tenant_state=tenant_state
)
vespa_document_index = VespaDocumentIndex(
index_name=search_settings.index_name,

View File

@@ -76,7 +76,7 @@ def _user_file_queued_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_QUEUED_PREFIX}:{user_file_id}"
def user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
@@ -764,7 +764,7 @@ def process_single_user_file_project_sync(
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
file_lock: RedisLock = redis_client.lock(
user_file_project_sync_lock_key(user_file_id),
_user_file_project_sync_lock_key(user_file_id),
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
)

View File

@@ -3,6 +3,7 @@ import time
from collections.abc import Callable
from collections.abc import Generator
from queue import Empty
from typing import Any
from onyx.chat.citation_processor import CitationMapping
from onyx.chat.emitter import Emitter
@@ -162,11 +163,13 @@ class ChatStateContainer:
def run_chat_loop_with_state_containers(
chat_loop_func: Callable[[Emitter, ChatStateContainer], None],
func: Callable[..., None],
completion_callback: Callable[[ChatStateContainer], None],
is_connected: Callable[[], bool],
emitter: Emitter,
state_container: ChatStateContainer,
*args: Any,
**kwargs: Any,
) -> Generator[Packet, None]:
"""
Explicit wrapper function that runs a function in a background thread
@@ -177,18 +180,19 @@ def run_chat_loop_with_state_containers(
Args:
func: The function to wrap (should accept emitter and state_container as first and second args)
completion_callback: Callback function to call when the function completes
emitter: Emitter instance for sending packets
state_container: ChatStateContainer instance for accumulating state
is_connected: Callable that returns False when stop signal is set
*args: Additional positional arguments for func
**kwargs: Additional keyword arguments for func
Usage:
packets = run_chat_loop_with_state_containers(
my_func,
completion_callback=completion_callback,
emitter=emitter,
state_container=state_container,
is_connected=check_func,
arg1, arg2, kwarg1=value1
)
for packet in packets:
# Process packets
@@ -197,7 +201,9 @@ def run_chat_loop_with_state_containers(
def run_with_exception_capture() -> None:
try:
chat_loop_func(emitter, state_container)
# Ensure state_container is passed explicitly, removing it from kwargs if present
kwargs_with_state = {**kwargs, "state_container": state_container}
func(emitter, *args, **kwargs_with_state)
except Exception as e:
# If execution fails, emit an exception packet
emitter.emit(

View File

@@ -461,7 +461,7 @@ def _build_tool_call_response_history_message(
def convert_chat_history(
chat_history: list[ChatMessage],
files: list[ChatLoadedFile],
context_image_files: list[ChatLoadedFile],
project_image_files: list[ChatLoadedFile],
additional_context: str | None,
token_counter: Callable[[str], int],
tool_id_to_name_map: dict[int, str],
@@ -541,11 +541,11 @@ def convert_chat_history(
)
# Add the user message with image files attached
# If this is the last USER message, also include context_image_files
# Note: context image file tokens are NOT counted in the token count
# If this is the last USER message, also include project_image_files
# Note: project image file tokens are NOT counted in the token count
if idx == last_user_message_idx:
if context_image_files:
image_files.extend(context_image_files)
if project_image_files:
image_files.extend(project_image_files)
if additional_context:
simple_messages.append(

View File

@@ -15,10 +15,10 @@ from onyx.chat.emitter import Emitter
from onyx.chat.llm_step import extract_tool_calls_from_response_text
from onyx.chat.llm_step import run_llm_step
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import ExtractedProjectFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ProjectFileMetadata
from onyx.chat.models import ToolCallSimple
from onyx.chat.prompt_utils import build_reminder_message
from onyx.chat.prompt_utils import build_system_prompt
@@ -203,17 +203,17 @@ def _try_fallback_tool_extraction(
MAX_LLM_CYCLES = 6
def _build_context_file_citation_mapping(
file_metadata: list[ContextFileMetadata],
def _build_project_file_citation_mapping(
project_file_metadata: list[ProjectFileMetadata],
starting_citation_num: int = 1,
) -> CitationMapping:
"""Build citation mapping for context files.
"""Build citation mapping for project files.
Converts context file metadata into SearchDoc objects that can be cited.
Converts project file metadata into SearchDoc objects that can be cited.
Citation numbers start from the provided starting number.
Args:
file_metadata: List of context file metadata
project_file_metadata: List of project file metadata
starting_citation_num: Starting citation number (default: 1)
Returns:
@@ -221,7 +221,8 @@ def _build_context_file_citation_mapping(
"""
citation_mapping: CitationMapping = {}
for idx, file_meta in enumerate(file_metadata, start=starting_citation_num):
for idx, file_meta in enumerate(project_file_metadata, start=starting_citation_num):
# Create a SearchDoc for each project file
search_doc = SearchDoc(
document_id=file_meta.file_id,
chunk_ind=0,
@@ -241,28 +242,29 @@ def _build_context_file_citation_mapping(
def _build_project_message(
context_files: ExtractedContextFiles | None,
project_files: ExtractedProjectFiles | None,
token_counter: Callable[[str], int] | None,
) -> list[ChatMessageSimple]:
"""Build messages for context-injected / tool-backed files.
"""Build messages for project / tool-backed files.
Returns up to two messages:
1. The full-text files message (if file_texts is populated).
1. The full-text project files message (if project_file_texts is populated).
2. A lightweight metadata message for files the LLM should access via the
FileReaderTool (e.g. oversized files that don't fit in context).
FileReaderTool (e.g. oversized chat-attached files or project files that
don't fit in context).
"""
if not context_files:
if not project_files:
return []
messages: list[ChatMessageSimple] = []
if context_files.file_texts:
if project_files.project_file_texts:
messages.append(
_create_context_files_message(context_files, token_counter=None)
_create_project_files_message(project_files, token_counter=None)
)
if context_files.file_metadata_for_tool and token_counter:
if project_files.file_metadata_for_tool and token_counter:
messages.append(
_create_file_tool_metadata_message(
context_files.file_metadata_for_tool, token_counter
project_files.file_metadata_for_tool, token_counter
)
)
return messages
@@ -273,7 +275,7 @@ def construct_message_history(
custom_agent_prompt: ChatMessageSimple | None,
simple_chat_history: list[ChatMessageSimple],
reminder_message: ChatMessageSimple | None,
context_files: ExtractedContextFiles | None,
project_files: ExtractedProjectFiles | None,
available_tokens: int,
last_n_user_messages: int | None = None,
token_counter: Callable[[str], int] | None = None,
@@ -287,7 +289,7 @@ def construct_message_history(
# Build the project / file-metadata messages up front so we can use their
# actual token counts for the budget.
project_messages = _build_project_message(context_files, token_counter)
project_messages = _build_project_message(project_files, token_counter)
project_messages_tokens = sum(m.token_count for m in project_messages)
history_token_budget = available_tokens
@@ -443,17 +445,17 @@ def construct_message_history(
)
# Attach project images to the last user message
if context_files and context_files.image_files:
if project_files and project_files.project_image_files:
existing_images = last_user_message.image_files or []
last_user_message = ChatMessageSimple(
message=last_user_message.message,
token_count=last_user_message.token_count,
message_type=last_user_message.message_type,
image_files=existing_images + context_files.image_files,
image_files=existing_images + project_files.project_image_files,
)
# Build the final message list according to README ordering:
# [system], [history_before_last_user], [custom_agent], [context_files],
# [system], [history_before_last_user], [custom_agent], [project_files],
# [forgotten_files], [last_user_message], [messages_after_last_user], [reminder]
result = [system_prompt] if system_prompt else []
@@ -464,14 +466,14 @@ def construct_message_history(
if custom_agent_prompt:
result.append(custom_agent_prompt)
# 3. Add context files / file-metadata messages (inserted before last user message)
# 3. Add project files / file-metadata messages (inserted before last user message)
result.extend(project_messages)
# 4. Add forgotten-files metadata (right before the user's question)
if forgotten_files_message:
result.append(forgotten_files_message)
# 5. Add last user message (with context images attached)
# 5. Add last user message (with project images attached)
result.append(last_user_message)
# 6. Add messages after last user message (tool calls, responses, etc.)
@@ -545,11 +547,11 @@ def _create_file_tool_metadata_message(
)
def _create_context_files_message(
context_files: ExtractedContextFiles,
def _create_project_files_message(
project_files: ExtractedProjectFiles,
token_counter: Callable[[str], int] | None, # noqa: ARG001
) -> ChatMessageSimple:
"""Convert context files to a ChatMessageSimple message.
"""Convert project files to a ChatMessageSimple message.
Format follows the README specification for document representation.
"""
@@ -557,7 +559,7 @@ def _create_context_files_message(
# Format as documents JSON as described in README
documents_list = []
for idx, file_text in enumerate(context_files.file_texts, start=1):
for idx, file_text in enumerate(project_files.project_file_texts, start=1):
documents_list.append(
{
"document": idx,
@@ -568,10 +570,10 @@ def _create_context_files_message(
documents_json = json.dumps({"documents": documents_list}, indent=2)
message_content = f"Here are some documents provided for context, they may not all be relevant:\n{documents_json}"
# Use pre-calculated token count from context_files
# Use pre-calculated token count from project_files
return ChatMessageSimple(
message=message_content,
token_count=context_files.total_token_count,
token_count=project_files.total_token_count,
message_type=MessageType.USER,
)
@@ -582,7 +584,7 @@ def run_llm_loop(
simple_chat_history: list[ChatMessageSimple],
tools: list[Tool],
custom_agent_prompt: str | None,
context_files: ExtractedContextFiles,
project_files: ExtractedProjectFiles,
persona: Persona | None,
user_memory_context: UserMemoryContext | None,
llm: LLM,
@@ -625,9 +627,9 @@ def run_llm_loop(
# Add project file citation mappings if project files are present
project_citation_mapping: CitationMapping = {}
if context_files.file_metadata:
project_citation_mapping = _build_context_file_citation_mapping(
context_files.file_metadata
if project_files.project_file_metadata:
project_citation_mapping = _build_project_file_citation_mapping(
project_files.project_file_metadata
)
citation_processor.update_citation_mapping(project_citation_mapping)
@@ -645,7 +647,7 @@ def run_llm_loop(
# TODO allow citing of images in Projects. Since attached to the last user message, it has no text associated with it.
# One future workaround is to include the images as separate user messages with citation information and process those.
always_cite_documents: bool = bool(
context_files.use_as_search_filter or context_files.file_texts
project_files.project_as_filter or project_files.project_file_texts
)
should_cite_documents: bool = False
ran_image_gen: bool = False
@@ -786,7 +788,7 @@ def run_llm_loop(
custom_agent_prompt=custom_agent_prompt_msg,
simple_chat_history=simple_chat_history,
reminder_message=reminder_msg,
context_files=context_files,
project_files=project_files,
available_tokens=available_tokens,
token_counter=token_counter,
all_injected_file_metadata=all_injected_file_metadata,

View File

@@ -31,6 +31,13 @@ class CustomToolResponse(BaseModel):
tool_name: str
class ProjectSearchConfig(BaseModel):
"""Configuration for search tool availability in project context."""
search_usage: SearchToolUsage
disable_forced_tool: bool
class CreateChatSessionID(BaseModel):
chat_session_id: UUID
@@ -125,8 +132,8 @@ class ChatMessageSimple(BaseModel):
file_id: str | None = None
class ContextFileMetadata(BaseModel):
"""Metadata for a context-injected file to enable citation support."""
class ProjectFileMetadata(BaseModel):
"""Metadata for a project file to enable citation support."""
file_id: str
filename: str
@@ -160,28 +167,20 @@ class ChatHistoryResult(BaseModel):
all_injected_file_metadata: dict[str, FileToolMetadata]
class ExtractedContextFiles(BaseModel):
"""Result of attempting to load user files (from a project or persona) into context."""
file_texts: list[str]
image_files: list[ChatLoadedFile]
use_as_search_filter: bool
class ExtractedProjectFiles(BaseModel):
project_file_texts: list[str]
project_image_files: list[ChatLoadedFile]
project_as_filter: bool
total_token_count: int
# Metadata for project files to enable citations
project_file_metadata: list[ProjectFileMetadata]
# None if not a project
project_uncapped_token_count: int | None
# Lightweight metadata for files exposed via FileReaderTool
# (populated when files don't fit in context and vector DB is disabled).
file_metadata: list[ContextFileMetadata]
uncapped_token_count: int | None
# (populated when files don't fit in context and vector DB is disabled)
file_metadata_for_tool: list[FileToolMetadata] = []
class SearchParams(BaseModel):
"""Resolved search filter IDs and search-tool usage for a chat turn."""
search_project_id: int | None
search_persona_id: int | None
search_usage: SearchToolUsage
class LlmStepResult(BaseModel):
reasoning: str | None
answer: str | None

View File

@@ -3,7 +3,6 @@ IMPORTANT: familiarize yourself with the design concepts prior to contributing t
An overview can be found in the README.md file in this directory.
"""
import io
import re
import traceback
from collections.abc import Callable
@@ -34,11 +33,11 @@ from onyx.chat.models import ChatBasicResponse
from onyx.chat.models import ChatFullResponse
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import ExtractedProjectFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import SearchParams
from onyx.chat.models import ProjectFileMetadata
from onyx.chat.models import ProjectSearchConfig
from onyx.chat.models import StreamingError
from onyx.chat.models import ToolCallResponse
from onyx.chat.prompt_utils import calculate_reserved_tokens
@@ -63,12 +62,11 @@ from onyx.db.models import ChatSession
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.projects import get_project_token_count
from onyx.db.projects import get_user_files_from_project
from onyx.db.tools import get_tools
from onyx.deep_research.dr_loop import run_deep_research_llm_loop
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.file_store.utils import load_in_memory_chat_files
from onyx.file_store.utils import verify_user_files
from onyx.llm.factory import get_llm_for_persona
@@ -141,12 +139,12 @@ def _collect_available_file_ids(
pass
if project_id:
user_files = get_user_files_from_project(
project_files = get_user_files_from_project(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
for uf in user_files:
for uf in project_files:
user_file_ids.add(uf.id)
return _AvailableFiles(
@@ -194,67 +192,9 @@ def _convert_loaded_files_to_chat_files(
return chat_files
def resolve_context_user_files(
persona: Persona,
def _extract_project_file_texts_and_images(
project_id: int | None,
user_id: UUID | None,
db_session: Session,
) -> list[UserFile]:
"""Apply the precedence rule to decide which user files to load.
A custom persona fully supersedes the project. When a chat uses a
custom persona, the project is purely organisational — its files are
never loaded and never made searchable.
Custom persona → persona's own user_files (may be empty).
Default persona inside a project → project files.
Otherwise → empty list.
"""
if persona.id != DEFAULT_PERSONA_ID:
return list(persona.user_files) if persona.user_files else []
if project_id:
return get_user_files_from_project(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
return []
def _empty_extracted_context_files() -> ExtractedContextFiles:
return ExtractedContextFiles(
file_texts=[],
image_files=[],
use_as_search_filter=False,
total_token_count=0,
file_metadata=[],
uncapped_token_count=None,
)
def _extract_text_from_in_memory_file(f: InMemoryChatFile) -> str | None:
"""Extract text content from an InMemoryChatFile.
PLAIN_TEXT: the content is pre-extracted UTF-8 plaintext stored during
ingestion — decode directly.
DOC / CSV / other text types: the content is the original file bytes —
use extract_file_text which handles encoding detection and format parsing.
"""
try:
if f.file_type == ChatFileType.PLAIN_TEXT:
return f.content.decode("utf-8", errors="ignore").replace("\x00", "")
return extract_file_text(
file=io.BytesIO(f.content),
file_name=f.filename or "",
break_on_unprocessable=False,
)
except Exception:
logger.warning(f"Failed to extract text from file {f.file_id}", exc_info=True)
return None
def extract_context_files(
user_files: list[UserFile],
llm_max_context_window: int,
reserved_token_count: int,
db_session: Session,
@@ -263,12 +203,8 @@ def extract_context_files(
# 60% of the LLM's max context window. The other benefit is that for projects with
# more files, this makes it so that we don't throw away the history too quickly every time.
max_llm_context_percentage: float = 0.6,
) -> ExtractedContextFiles:
"""Load user files into context if they fit; otherwise flag for search.
The caller is responsible for deciding *which* user files to pass in
(project files, persona files, etc.). This function only cares about
the all-or-nothing fit check and the actual content loading.
) -> ExtractedProjectFiles:
"""Extract text content from project files if they fit within the context window.
Args:
project_id: The project ID to load files from
@@ -277,95 +213,160 @@ def extract_context_files(
reserved_token_count: Number of tokens to reserve for other content
db_session: Database session
max_llm_context_percentage: Maximum percentage of the LLM context window to use.
Returns:
ExtractedContextFiles containing:
- List of text content strings from context files (text files only)
- List of image files from context (ChatLoadedFile objects)
ExtractedProjectFiles containing:
- List of text content strings from project files (text files only)
- List of image files from project (ChatLoadedFile objects)
- Project id if the the project should be provided as a filter in search or None if not.
- Total token count of all extracted files
- File metadata for context files
- Uncapped token count of all extracted files
- File metadata for files that don't fit in context and vector DB is disabled
"""
# TODO(yuhong): I believe this is not handling all file types correctly.
# TODO I believe this is not handling all file types correctly.
project_as_filter = False
if not project_id:
return ExtractedProjectFiles(
project_file_texts=[],
project_image_files=[],
project_as_filter=False,
total_token_count=0,
project_file_metadata=[],
project_uncapped_token_count=None,
)
if not user_files:
return _empty_extracted_context_files()
aggregate_tokens = sum(uf.token_count or 0 for uf in user_files)
max_actual_tokens = (
llm_max_context_window - reserved_token_count
) * max_llm_context_percentage
if aggregate_tokens >= max_actual_tokens:
tool_metadata = []
use_as_search_filter = not DISABLE_VECTOR_DB
if DISABLE_VECTOR_DB:
tool_metadata = _build_file_tool_metadata_for_user_files(user_files)
return ExtractedContextFiles(
file_texts=[],
image_files=[],
use_as_search_filter=use_as_search_filter,
total_token_count=0,
file_metadata=[],
uncapped_token_count=aggregate_tokens,
file_metadata_for_tool=tool_metadata,
)
# Files fit — load them into context
user_file_map = {str(uf.id): uf for uf in user_files}
in_memory_files = load_in_memory_chat_files(
user_file_ids=[uf.id for uf in user_files],
# Calculate total token count for all user files in the project
project_tokens = get_project_token_count(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
file_texts: list[str] = []
image_files: list[ChatLoadedFile] = []
file_metadata: list[ContextFileMetadata] = []
project_file_texts: list[str] = []
project_image_files: list[ChatLoadedFile] = []
project_file_metadata: list[ProjectFileMetadata] = []
total_token_count = 0
if project_tokens < max_actual_tokens:
# Load project files into memory using cached plaintext when available
project_user_files = get_user_files_from_project(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
if project_user_files:
# Create a mapping from file_id to UserFile for token count lookup
user_file_map = {str(file.id): file for file in project_user_files}
for f in in_memory_files:
uf = user_file_map.get(str(f.file_id))
if f.file_type.is_text_file():
text_content = _extract_text_from_in_memory_file(f)
if not text_content:
continue
file_texts.append(text_content)
file_metadata.append(
ContextFileMetadata(
file_id=str(f.file_id),
filename=f.filename or f"file_{f.file_id}",
file_content=text_content,
)
)
if uf and uf.token_count:
total_token_count += uf.token_count
elif f.file_type == ChatFileType.IMAGE:
token_count = uf.token_count if uf and uf.token_count else 0
total_token_count += token_count
image_files.append(
ChatLoadedFile(
file_id=f.file_id,
content=f.content,
file_type=f.file_type,
filename=f.filename,
content_text=None,
token_count=token_count,
)
project_file_ids = [file.id for file in project_user_files]
in_memory_project_files = load_in_memory_chat_files(
user_file_ids=project_file_ids,
db_session=db_session,
)
return ExtractedContextFiles(
file_texts=file_texts,
image_files=image_files,
use_as_search_filter=False,
# Extract text content from loaded files
for file in in_memory_project_files:
if file.file_type.is_text_file():
try:
text_content = file.content.decode("utf-8", errors="ignore")
# Strip null bytes
text_content = text_content.replace("\x00", "")
if text_content:
project_file_texts.append(text_content)
# Add metadata for citation support
project_file_metadata.append(
ProjectFileMetadata(
file_id=str(file.file_id),
filename=file.filename or f"file_{file.file_id}",
file_content=text_content,
)
)
# Add token count for text file
user_file = user_file_map.get(str(file.file_id))
if user_file and user_file.token_count:
total_token_count += user_file.token_count
except Exception:
# Skip files that can't be decoded
pass
elif file.file_type == ChatFileType.IMAGE:
# Convert InMemoryChatFile to ChatLoadedFile
user_file = user_file_map.get(str(file.file_id))
token_count = (
user_file.token_count
if user_file and user_file.token_count
else 0
)
total_token_count += token_count
chat_loaded_file = ChatLoadedFile(
file_id=file.file_id,
content=file.content,
file_type=file.file_type,
filename=file.filename,
content_text=None, # Images don't have text content
token_count=token_count,
)
project_image_files.append(chat_loaded_file)
else:
if DISABLE_VECTOR_DB:
# Without a vector DB we can't use project-as-filter search.
# Instead, build lightweight metadata so the LLM can call the
# FileReaderTool to inspect individual files on demand.
file_metadata_for_tool = _build_file_tool_metadata_for_project(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
return ExtractedProjectFiles(
project_file_texts=[],
project_image_files=[],
project_as_filter=False,
total_token_count=0,
project_file_metadata=[],
project_uncapped_token_count=project_tokens,
file_metadata_for_tool=file_metadata_for_tool,
)
project_as_filter = True
return ExtractedProjectFiles(
project_file_texts=project_file_texts,
project_image_files=project_image_files,
project_as_filter=project_as_filter,
total_token_count=total_token_count,
file_metadata=file_metadata,
uncapped_token_count=aggregate_tokens,
project_file_metadata=project_file_metadata,
project_uncapped_token_count=project_tokens,
)
APPROX_CHARS_PER_TOKEN = 4
def _build_file_tool_metadata_for_project(
project_id: int,
user_id: UUID | None,
db_session: Session,
) -> list[FileToolMetadata]:
"""Build lightweight FileToolMetadata for every file in a project.
Used when files are too large to fit in context and the vector DB is
disabled, so the LLM needs to know which files it can read via the
FileReaderTool.
"""
project_user_files = get_user_files_from_project(
project_id=project_id,
user_id=user_id,
db_session=db_session,
)
return [
FileToolMetadata(
file_id=str(uf.id),
filename=uf.name,
approx_char_count=(uf.token_count or 0) * APPROX_CHARS_PER_TOKEN,
)
for uf in project_user_files
]
def _build_file_tool_metadata_for_user_files(
user_files: list[UserFile],
) -> list[FileToolMetadata]:
@@ -380,46 +381,55 @@ def _build_file_tool_metadata_for_user_files(
]
def determine_search_params(
persona_id: int,
def _get_project_search_availability(
project_id: int | None,
extracted_context_files: ExtractedContextFiles,
) -> SearchParams:
"""Decide which search filter IDs and search-tool usage apply for a chat turn.
persona_id: int | None,
loaded_project_files: bool,
project_has_files: bool,
forced_tool_id: int | None,
search_tool_id: int | None,
) -> ProjectSearchConfig:
"""Determine search tool availability based on project context.
A custom persona fully supersedes the project — project files are never
searchable and the search tool config is entirely controlled by the
persona. The project_id filter is only set for the default persona.
Search is disabled when ALL of the following are true:
- User is in a project
- Using the default persona (not a custom agent)
- Project files are already loaded in context
For the default persona inside a project:
- Files overflow → ENABLED (vector DB scopes to these files)
- Files fit → DISABLED (content already in prompt)
- No files at all → DISABLED (nothing to search)
When search is disabled and the user tried to force the search tool,
that forcing is also disabled.
Returns AUTO (follow persona config) in all other cases.
"""
is_custom_persona = persona_id != DEFAULT_PERSONA_ID
# Not in a project, this should have no impact on search tool availability
if not project_id:
return ProjectSearchConfig(
search_usage=SearchToolUsage.AUTO, disable_forced_tool=False
)
search_project_id: int | None = None
search_persona_id: int | None = None
if extracted_context_files.use_as_search_filter:
if is_custom_persona:
search_persona_id = persona_id
else:
search_project_id = project_id
# Custom persona in project - let persona config decide
# Even if there are no files in the project, it's still guided by the persona config.
if persona_id != DEFAULT_PERSONA_ID:
return ProjectSearchConfig(
search_usage=SearchToolUsage.AUTO, disable_forced_tool=False
)
search_usage = SearchToolUsage.AUTO
if not is_custom_persona and project_id:
has_context_files = bool(extracted_context_files.uncapped_token_count)
files_loaded_in_context = bool(extracted_context_files.file_texts)
# If in a project with the default persona and the files have been already loaded into the context or
# there are no files in the project, disable search as there is nothing to search for.
if loaded_project_files or not project_has_files:
user_forced_search = (
forced_tool_id is not None
and search_tool_id is not None
and forced_tool_id == search_tool_id
)
return ProjectSearchConfig(
search_usage=SearchToolUsage.DISABLED,
disable_forced_tool=user_forced_search,
)
if extracted_context_files.use_as_search_filter:
search_usage = SearchToolUsage.ENABLED
elif files_loaded_in_context or not has_context_files:
search_usage = SearchToolUsage.DISABLED
return SearchParams(
search_project_id=search_project_id,
search_persona_id=search_persona_id,
search_usage=search_usage,
# Default persona in a project with files, but also the files have not been loaded into the context already.
return ProjectSearchConfig(
search_usage=SearchToolUsage.ENABLED, disable_forced_tool=False
)
@@ -651,37 +661,26 @@ def handle_stream_message_objects(
user_memory_context=prompt_memory_context,
)
# Determine which user files to use. A custom persona fully
# supersedes the project — project files are never loaded or
# searchable when a custom persona is in play. Only the default
# persona inside a project uses the project's files.
context_user_files = resolve_context_user_files(
persona=persona,
# Process projects, if all of the files fit in the context, it doesn't need to use RAG
extracted_project_files = _extract_project_file_texts_and_images(
project_id=chat_session.project_id,
user_id=user_id,
db_session=db_session,
)
extracted_context_files = extract_context_files(
user_files=context_user_files,
llm_max_context_window=llm.config.max_input_tokens,
reserved_token_count=reserved_token_count,
db_session=db_session,
)
search_params = determine_search_params(
persona_id=persona.id,
project_id=chat_session.project_id,
extracted_context_files=extracted_context_files,
)
# Also grant access to persona-attached user files for FileReaderTool
if persona.user_files:
existing = set(available_files.user_file_ids)
for uf in persona.user_files:
if uf.id not in existing:
available_files.user_file_ids.append(uf.id)
# When the vector DB is disabled, persona-attached user_files have no
# search pipeline path. Inject them as file_metadata_for_tool so the
# LLM can read them via the FileReaderTool.
if DISABLE_VECTOR_DB and persona.user_files:
persona_file_metadata = _build_file_tool_metadata_for_user_files(
persona.user_files
)
# Merge persona file metadata into the extracted project files
extracted_project_files.file_metadata_for_tool.extend(persona_file_metadata)
# Build a mapping of tool_id to tool_name for history reconstruction
all_tools = get_tools(db_session)
tool_id_to_name_map = {tool.id: tool.name for tool in all_tools}
@@ -690,17 +689,30 @@ def handle_stream_message_objects(
None,
)
# Determine if search should be disabled for this project context
forced_tool_id = new_msg_req.forced_tool_id
if (
search_params.search_usage == SearchToolUsage.DISABLED
and forced_tool_id is not None
and search_tool_id is not None
and forced_tool_id == search_tool_id
):
project_search_config = _get_project_search_availability(
project_id=chat_session.project_id,
persona_id=persona.id,
loaded_project_files=bool(extracted_project_files.project_file_texts),
project_has_files=bool(
extracted_project_files.project_uncapped_token_count
),
forced_tool_id=new_msg_req.forced_tool_id,
search_tool_id=search_tool_id,
)
if project_search_config.disable_forced_tool:
forced_tool_id = None
emitter = get_default_emitter()
# Also grant access to persona-attached user files
if persona.user_files:
existing = set(available_files.user_file_ids)
for uf in persona.user_files:
if uf.id not in existing:
available_files.user_file_ids.append(uf.id)
# Construct tools based on the persona configurations
tool_dict = construct_tools(
persona=persona,
@@ -710,8 +722,11 @@ def handle_stream_message_objects(
llm=llm,
search_tool_config=SearchToolConfig(
user_selected_filters=new_msg_req.internal_search_filters,
project_id=search_params.search_project_id,
persona_id=search_params.search_persona_id,
project_id=(
chat_session.project_id
if extracted_project_files.project_as_filter
else None
),
bypass_acl=bypass_acl,
slack_context=slack_context,
enable_slack_search=_should_enable_slack_search(
@@ -729,7 +744,7 @@ def handle_stream_message_objects(
chat_file_ids=available_files.chat_file_ids,
),
allowed_tool_ids=new_msg_req.allowed_tool_ids,
search_usage_forcing_setting=search_params.search_usage,
search_usage_forcing_setting=project_search_config.search_usage,
)
tools: list[Tool] = []
for tool_list in tool_dict.values():
@@ -768,7 +783,7 @@ def handle_stream_message_objects(
chat_history_result = convert_chat_history(
chat_history=chat_history,
files=files,
context_image_files=extracted_context_files.image_files,
project_image_files=extracted_project_files.project_image_files,
additional_context=additional_context,
token_counter=token_counter,
tool_id_to_name_map=tool_id_to_name_map,
@@ -864,54 +879,46 @@ def handle_stream_message_objects(
# (user has already responded to a clarification question)
skip_clarification = is_last_assistant_message_clarification(chat_history)
# NOTE: we _could_ pass in a zero argument function since emitter and state_container
# are just passed in immediately anyways, but the abstraction is cleaner this way.
yield from run_chat_loop_with_state_containers(
lambda emitter, state_container: run_deep_research_llm_loop(
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
llm=llm,
token_counter=token_counter,
db_session=db_session,
skip_clarification=skip_clarification,
user_identity=user_identity,
chat_session_id=str(chat_session.id),
all_injected_file_metadata=all_injected_file_metadata,
),
run_deep_research_llm_loop,
llm_loop_completion_callback,
is_connected=check_is_connected,
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
llm=llm,
token_counter=token_counter,
db_session=db_session,
skip_clarification=skip_clarification,
user_identity=user_identity,
chat_session_id=str(chat_session.id),
all_injected_file_metadata=all_injected_file_metadata,
)
else:
yield from run_chat_loop_with_state_containers(
lambda emitter, state_container: run_llm_loop(
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
context_files=extracted_context_files,
persona=persona,
user_memory_context=user_memory_context,
llm=llm,
token_counter=token_counter,
db_session=db_session,
forced_tool_id=forced_tool_id,
user_identity=user_identity,
chat_session_id=str(chat_session.id),
chat_files=chat_files_for_tools,
include_citations=new_msg_req.include_citations,
all_injected_file_metadata=all_injected_file_metadata,
inject_memories_in_prompt=user.use_memories,
),
run_llm_loop,
llm_loop_completion_callback,
is_connected=check_is_connected, # Not passed through to run_llm_loop
emitter=emitter,
state_container=state_container,
simple_chat_history=simple_chat_history,
tools=tools,
custom_agent_prompt=custom_agent_prompt,
project_files=extracted_project_files,
persona=persona,
user_memory_context=user_memory_context,
llm=llm,
token_counter=token_counter,
db_session=db_session,
forced_tool_id=forced_tool_id,
user_identity=user_identity,
chat_session_id=str(chat_session.id),
chat_files=chat_files_for_tools,
include_citations=new_msg_req.include_citations,
all_injected_file_metadata=all_injected_file_metadata,
inject_memories_in_prompt=user.use_memories,
)
except ValueError as e:

View File

@@ -294,12 +294,6 @@ ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
and os.environ.get("ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX", "").lower() == "true"
)
# Whether we should check for and create an index if necessary every time we
# instantiate an OpenSearchDocumentIndex on multitenant cloud. Defaults to True.
VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
os.environ.get("VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT", "true").lower()
== "true"
)
VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost"
# NOTE: this is used if and only if the vespa config server is accessible via a

View File

@@ -23,6 +23,7 @@ from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.serialization import pkcs12
from office365.graph_client import GraphClient # type: ignore[import-untyped]
from office365.intune.organizations.organization import Organization # type: ignore[import-untyped]
from office365.onedrive.driveitems.driveItem import DriveItem # type: ignore[import-untyped]
from office365.onedrive.sites.site import Site # type: ignore[import-untyped]
from office365.onedrive.sites.sites_with_root import SitesWithRoot # type: ignore[import-untyped]
@@ -871,56 +872,6 @@ class SharepointConnector(
"Site URLs must be full Sharepoint URLs (e.g. https://your-tenant.sharepoint.com/sites/your-site or https://your-tenant.sharepoint.com/teams/your-team)"
)
def _extract_tenant_domain_from_sites(self) -> str | None:
"""Extract the tenant domain from configured site URLs.
Site URLs look like https://{tenant}.sharepoint.com/sites/... so the
tenant domain is the first label of the hostname.
"""
for site_url in self.sites:
try:
hostname = urlsplit(site_url.strip()).hostname
except ValueError:
continue
if not hostname:
continue
tenant = hostname.split(".")[0]
if tenant:
return tenant
logger.warning(f"No tenant domain found from {len(self.sites)} sites")
return None
def _resolve_tenant_domain_from_root_site(self) -> str:
"""Resolve tenant domain via GET /v1.0/sites/root which only requires
Sites.Read.All (a permission the connector already needs)."""
root_site = self.graph_client.sites.root.get().execute_query()
hostname = root_site.site_collection.hostname
if not hostname:
raise ConnectorValidationError(
"Could not determine tenant domain from root site"
)
tenant_domain = hostname.split(".")[0]
logger.info(
"Resolved tenant domain '%s' from root site hostname '%s'",
tenant_domain,
hostname,
)
return tenant_domain
def _resolve_tenant_domain(self) -> str:
"""Determine the tenant domain, preferring site URLs over a Graph API
call to avoid needing extra permissions."""
from_sites = self._extract_tenant_domain_from_sites()
if from_sites:
logger.info(
"Resolved tenant domain '%s' from site URLs",
from_sites,
)
return from_sites
logger.info("No site URLs available; resolving tenant domain from root site")
return self._resolve_tenant_domain_from_root_site()
@property
def graph_client(self) -> GraphClient:
if self._graph_client is None:
@@ -1638,11 +1589,6 @@ class SharepointConnector(
sp_private_key = credentials.get("sp_private_key")
sp_certificate_password = credentials.get("sp_certificate_password")
if not sp_client_id:
raise ConnectorValidationError("Client ID is required")
if not sp_directory_id:
raise ConnectorValidationError("Directory (tenant) ID is required")
authority_url = f"{self.authority_host}/{sp_directory_id}"
if auth_method == SharepointAuthMethod.CERTIFICATE.value:
@@ -1695,7 +1641,21 @@ class SharepointConnector(
_acquire_token_for_graph, environment=self._azure_environment
)
if auth_method == SharepointAuthMethod.CERTIFICATE.value:
self.sp_tenant_domain = self._resolve_tenant_domain()
org = self.graph_client.organization.get().execute_query()
if not org or len(org) == 0:
raise ConnectorValidationError("No organization found")
tenant_info: Organization = org[
0
] # Access first item directly from collection
if not tenant_info.verified_domains:
raise ConnectorValidationError("No verified domains found for tenant")
sp_tenant_domain = tenant_info.verified_domains[0].name
if not sp_tenant_domain:
raise ConnectorValidationError("No verified domains found for tenant")
# remove the .onmicrosoft.com part
self.sp_tenant_domain = sp_tenant_domain.split(".")[0]
return None
def _get_drive_names_for_site(self, site_url: str) -> list[str]:

View File

@@ -21,8 +21,8 @@ from onyx.configs.app_configs import POSTGRES_POOL_RECYCLE
from onyx.configs.app_configs import POSTGRES_PORT
from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
from onyx.configs.app_configs import POSTGRES_USER
from onyx.db.engine.iam_auth import create_ssl_context_if_iam
from onyx.db.engine.iam_auth import get_iam_auth_token
from onyx.db.engine.iam_auth import ssl_context
from onyx.db.engine.sql_engine import ASYNC_DB_API
from onyx.db.engine.sql_engine import build_connection_string
from onyx.db.engine.sql_engine import is_valid_schema_name
@@ -66,7 +66,7 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
if app_name:
connect_args["server_settings"] = {"application_name": app_name}
connect_args["ssl"] = create_ssl_context_if_iam()
connect_args["ssl"] = ssl_context
engine_kwargs = {
"connect_args": connect_args,
@@ -97,7 +97,7 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
user = POSTGRES_USER
token = get_iam_auth_token(host, port, user, AWS_REGION_NAME)
cparams["password"] = token
cparams["ssl"] = create_ssl_context_if_iam()
cparams["ssl"] = ssl_context
return _ASYNC_ENGINE

View File

@@ -1,4 +1,3 @@
import functools
import os
import ssl
from typing import Any
@@ -49,9 +48,11 @@ def provide_iam_token(
configure_psycopg2_iam_auth(cparams, host, port, user, region)
@functools.cache
def create_ssl_context_if_iam() -> ssl.SSLContext | None:
"""Create an SSL context if IAM authentication is enabled, else return None."""
if USE_IAM_AUTH:
return ssl.create_default_context(cafile=SSL_CERT_FILE)
return None
ssl_context = create_ssl_context_if_iam()

View File

@@ -619,7 +619,7 @@ def update_default_provider(provider_id: int, db_session: Session) -> None:
_update_default_model(
db_session,
provider_id,
provider.default_model_name, # type: ignore[arg-type]
provider.default_model_name,
LLMModelFlowType.CHAT,
)

View File

@@ -2822,17 +2822,13 @@ class LLMProvider(Base):
custom_config: Mapped[dict[str, str] | None] = mapped_column(
postgresql.JSONB(), nullable=True
)
# Deprecated: use LLMModelFlow with CHAT flow type instead
default_model_name: Mapped[str | None] = mapped_column(String, nullable=True)
default_model_name: Mapped[str] = mapped_column(String)
deployment_name: Mapped[str | None] = mapped_column(String, nullable=True)
# Deprecated: use LLMModelFlow.is_default with CHAT flow type instead
is_default_provider: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
# Deprecated: use LLMModelFlow.is_default with VISION flow type instead
# should only be set for a single provider
is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True)
is_default_vision_provider: Mapped[bool | None] = mapped_column(Boolean)
# Deprecated: use LLMModelFlow with VISION flow type instead
default_vision_model: Mapped[str | None] = mapped_column(String, nullable=True)
# EE only
is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
@@ -2883,7 +2879,6 @@ class ModelConfiguration(Base):
# - The end-user is configuring a model and chooses not to set a max-input-tokens limit.
max_input_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
# Deprecated: use LLMModelFlow with VISION flow type instead
supports_image_input: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
# Human-readable display name for the model.

View File

@@ -256,6 +256,9 @@ def create_update_persona(
try:
# Default persona validation
if create_persona_request.is_default_persona:
if not create_persona_request.is_public:
raise ValueError("Cannot make a default persona non public")
# Curators can edit default personas, but not make them
if user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR:
pass
@@ -332,7 +335,6 @@ def update_persona_shared(
db_session: Session,
group_ids: list[int] | None = None,
is_public: bool | None = None,
label_ids: list[int] | None = None,
) -> None:
"""Simplified version of `create_update_persona` which only touches the
accessibility rather than any of the logic (e.g. prompt, connected data sources,
@@ -342,7 +344,9 @@ def update_persona_shared(
)
if user and user.role != UserRole.ADMIN and persona.user_id != user.id:
raise PermissionError("You don't have permission to modify this persona")
raise HTTPException(
status_code=403, detail="You don't have permission to modify this persona"
)
versioned_update_persona_access = fetch_versioned_implementation(
"onyx.db.persona", "update_persona_access"
@@ -356,15 +360,6 @@ def update_persona_shared(
group_ids=group_ids,
)
if label_ids is not None:
labels = (
db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
)
if len(labels) != len(label_ids):
raise ValueError("Some label IDs were not found in the database")
persona.labels.clear()
persona.labels = labels
db_session.commit()
@@ -970,8 +965,6 @@ def upsert_persona(
labels = (
db_session.query(PersonaLabel).filter(PersonaLabel.id.in_(label_ids)).all()
)
if len(labels) != len(label_ids):
raise ValueError("Some label IDs were not found in the database")
# Fetch and attach hierarchy_nodes by IDs
hierarchy_nodes = None
@@ -1168,6 +1161,9 @@ def update_persona_is_default(
db_session=db_session, persona_id=persona_id, user=user, get_editable=True
)
if not persona.is_public:
persona.is_public = True
persona.is_default_persona = is_default
db_session.commit()

View File

@@ -6,7 +6,6 @@ from sqlalchemy import select
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
from onyx.db.models import Project__UserFile
from onyx.db.models import UserFile
@@ -58,19 +57,12 @@ def fetch_user_project_ids_for_user_files(
db_session: Session,
) -> dict[str, list[int]]:
"""Fetch user project ids for specified user files"""
user_file_uuid_ids = [UUID(user_file_id) for user_file_id in user_file_ids]
stmt = select(Project__UserFile.user_file_id, Project__UserFile.project_id).where(
Project__UserFile.user_file_id.in_(user_file_uuid_ids)
)
rows = db_session.execute(stmt).all()
user_file_id_to_project_ids: dict[str, list[int]] = {
user_file_id: [] for user_file_id in user_file_ids
stmt = select(UserFile).where(UserFile.id.in_(user_file_ids))
results = db_session.execute(stmt).scalars().all()
return {
str(user_file.id): [project.id for project in user_file.projects]
for user_file in results
}
for user_file_id, project_id in rows:
user_file_id_to_project_ids[str(user_file_id)].append(project_id)
return user_file_id_to_project_ids
def fetch_persona_ids_for_user_files(

View File

@@ -139,7 +139,7 @@ def generate_final_report(
custom_agent_prompt=None,
simple_chat_history=history,
reminder_message=reminder_message,
context_files=None,
project_files=None,
available_tokens=llm.config.max_input_tokens,
all_injected_file_metadata=all_injected_file_metadata,
)
@@ -257,7 +257,7 @@ def run_deep_research_llm_loop(
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
all_injected_file_metadata=all_injected_file_metadata,
@@ -321,7 +321,7 @@ def run_deep_research_llm_loop(
custom_agent_prompt=None,
simple_chat_history=simple_chat_history + [reminder_message],
reminder_message=None,
context_files=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,
all_injected_file_metadata=all_injected_file_metadata,
@@ -485,7 +485,7 @@ def run_deep_research_llm_loop(
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=first_cycle_reminder_message,
context_files=None,
project_files=None,
available_tokens=available_tokens,
last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
all_injected_file_metadata=all_injected_file_metadata,

View File

@@ -11,7 +11,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
OpenSearchOldDocumentIndex,
)
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
from shared_configs.configs import MULTI_TENANT
@@ -50,11 +49,8 @@ def get_default_document_index(
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
if opensearch_retrieval_enabled:
indexing_setting = IndexingSetting.from_db_model(search_settings)
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=secondary_index_name,
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=secondary_large_chunks_enabled,
@@ -122,11 +118,8 @@ def get_all_document_indices(
)
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
indexing_setting = IndexingSetting.from_db_model(search_settings)
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,

View File

@@ -1,7 +1,5 @@
import logging
import time
from contextlib import AbstractContextManager
from contextlib import nullcontext
from typing import Any
from typing import Generic
from typing import TypeVar
@@ -85,26 +83,22 @@ def get_new_body_without_vectors(body: dict[str, Any]) -> dict[str, Any]:
return new_body
class OpenSearchClient(AbstractContextManager):
"""Client for interacting with OpenSearch for cluster-level operations.
class OpenSearchClient:
"""Client for interacting with OpenSearch.
Args:
host: The host of the OpenSearch cluster.
port: The port of the OpenSearch cluster.
auth: The authentication credentials for the OpenSearch cluster. A tuple
of (username, password).
use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
True.
verify_certs: Whether to verify the SSL certificates for the OpenSearch
cluster. Defaults to False.
ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
to False.
timeout: The timeout for the OpenSearch cluster. Defaults to
DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
OpenSearch's Python module has pretty bad typing support so this client
attempts to protect the rest of the codebase from this. As a consequence,
most methods here return the minimum data needed for the rest of Onyx, and
tend to rely on Exceptions to handle errors.
TODO(andrei): This class currently assumes the structure of the database
schema when it returns a DocumentChunk. Make the class, or at least the
search method, templated on the structure the caller can expect.
"""
def __init__(
self,
index_name: str,
host: str = OPENSEARCH_HOST,
port: int = OPENSEARCH_REST_API_PORT,
auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
@@ -113,8 +107,9 @@ class OpenSearchClient(AbstractContextManager):
ssl_show_warn: bool = False,
timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
):
self._index_name = index_name
logger.debug(
f"Creating OpenSearch client with host {host}, port {port} and timeout {timeout} seconds."
f"Creating OpenSearch client for index {index_name} with host {host} and port {port} and timeout {timeout} seconds."
)
self._client = OpenSearch(
hosts=[{"host": host, "port": port}],
@@ -130,142 +125,6 @@ class OpenSearchClient(AbstractContextManager):
# your request body that is less than this value.
timeout=timeout,
)
def __exit__(self, *_: Any) -> None:
self.close()
def __del__(self) -> None:
try:
self.close()
except Exception:
pass
@log_function_time(print_only=True, debug_only=True, include_args=True)
def create_search_pipeline(
self,
pipeline_id: str,
pipeline_body: dict[str, Any],
) -> None:
"""Creates a search pipeline.
See the OpenSearch documentation for more information on the search
pipeline body.
https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/
Args:
pipeline_id: The ID of the search pipeline to create.
pipeline_body: The body of the search pipeline to create.
Raises:
Exception: There was an error creating the search pipeline.
"""
result = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
if not result.get("acknowledged", False):
raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
def delete_search_pipeline(self, pipeline_id: str) -> None:
"""Deletes a search pipeline.
Args:
pipeline_id: The ID of the search pipeline to delete.
Raises:
Exception: There was an error deleting the search pipeline.
"""
result = self._client.search_pipeline.delete(id=pipeline_id)
if not result.get("acknowledged", False):
raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
def put_cluster_settings(self, settings: dict[str, Any]) -> bool:
"""Puts cluster settings.
Args:
settings: The settings to put.
Raises:
Exception: There was an error putting the cluster settings.
Returns:
True if the settings were put successfully, False otherwise.
"""
response = self._client.cluster.put_settings(body=settings)
if response.get("acknowledged", False):
logger.info("Successfully put cluster settings.")
return True
else:
logger.error(f"Failed to put cluster settings: {response}.")
return False
@log_function_time(print_only=True, debug_only=True)
def ping(self) -> bool:
"""Pings the OpenSearch cluster.
Returns:
True if OpenSearch could be reached, False if it could not.
"""
return self._client.ping()
@log_function_time(print_only=True, debug_only=True)
def close(self) -> None:
"""Closes the client.
Raises:
Exception: There was an error closing the client.
"""
self._client.close()
class OpenSearchIndexClient(OpenSearchClient):
"""Client for interacting with OpenSearch for index-level operations.
OpenSearch's Python module has pretty bad typing support so this client
attempts to protect the rest of the codebase from this. As a consequence,
most methods here return the minimum data needed for the rest of Onyx, and
tend to rely on Exceptions to handle errors.
TODO(andrei): This class currently assumes the structure of the database
schema when it returns a DocumentChunk. Make the class, or at least the
search method, templated on the structure the caller can expect.
Args:
index_name: The name of the index to interact with.
host: The host of the OpenSearch cluster.
port: The port of the OpenSearch cluster.
auth: The authentication credentials for the OpenSearch cluster. A tuple
of (username, password).
use_ssl: Whether to use SSL for the OpenSearch cluster. Defaults to
True.
verify_certs: Whether to verify the SSL certificates for the OpenSearch
cluster. Defaults to False.
ssl_show_warn: Whether to show warnings for SSL certificates. Defaults
to False.
timeout: The timeout for the OpenSearch cluster. Defaults to
DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S.
"""
def __init__(
self,
index_name: str,
host: str = OPENSEARCH_HOST,
port: int = OPENSEARCH_REST_API_PORT,
auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
use_ssl: bool = True,
verify_certs: bool = False,
ssl_show_warn: bool = False,
timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
):
super().__init__(
host=host,
port=port,
auth=auth,
use_ssl=use_ssl,
verify_certs=verify_certs,
ssl_show_warn=ssl_show_warn,
timeout=timeout,
)
self._index_name = index_name
logger.debug(
f"OpenSearch client created successfully for index {self._index_name}."
)
@@ -333,38 +192,6 @@ class OpenSearchIndexClient(OpenSearchClient):
"""
return self._client.indices.exists(index=self._index_name)
@log_function_time(print_only=True, debug_only=True, include_args=True)
def put_mapping(self, mappings: dict[str, Any]) -> None:
"""Updates the index mapping in an idempotent manner.
- Existing fields with the same definition: No-op (succeeds silently).
- New fields: Added to the index.
- Existing fields with different types: Raises exception (requires
reindex).
See the OpenSearch documentation for more information:
https://docs.opensearch.org/latest/api-reference/index-apis/put-mapping/
Args:
mappings: The complete mapping definition to apply. This will be
merged with existing mappings in the index.
Raises:
Exception: There was an error updating the mappings, such as
attempting to change the type of an existing field.
"""
logger.debug(
f"Putting mappings for index {self._index_name} with mappings {mappings}."
)
response = self._client.indices.put_mapping(
index=self._index_name, body=mappings
)
if not response.get("acknowledged", False):
raise RuntimeError(
f"Failed to put the mapping update for index {self._index_name}."
)
logger.debug(f"Successfully put mappings for index {self._index_name}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
def validate_index(self, expected_mappings: dict[str, Any]) -> bool:
"""Validates the index.
@@ -783,6 +610,43 @@ class OpenSearchIndexClient(OpenSearchClient):
)
return DocumentChunk.model_validate(document_chunk_source)
@log_function_time(print_only=True, debug_only=True, include_args=True)
def create_search_pipeline(
self,
pipeline_id: str,
pipeline_body: dict[str, Any],
) -> None:
"""Creates a search pipeline.
See the OpenSearch documentation for more information on the search
pipeline body.
https://docs.opensearch.org/latest/search-plugins/search-pipelines/index/
Args:
pipeline_id: The ID of the search pipeline to create.
pipeline_body: The body of the search pipeline to create.
Raises:
Exception: There was an error creating the search pipeline.
"""
result = self._client.search_pipeline.put(id=pipeline_id, body=pipeline_body)
if not result.get("acknowledged", False):
raise RuntimeError(f"Failed to create search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True, include_args=True)
def delete_search_pipeline(self, pipeline_id: str) -> None:
"""Deletes a search pipeline.
Args:
pipeline_id: The ID of the search pipeline to delete.
Raises:
Exception: There was an error deleting the search pipeline.
"""
result = self._client.search_pipeline.delete(id=pipeline_id)
if not result.get("acknowledged", False):
raise RuntimeError(f"Failed to delete search pipeline {pipeline_id}.")
@log_function_time(print_only=True, debug_only=True)
def search(
self, body: dict[str, Any], search_pipeline_id: str | None
@@ -943,6 +807,48 @@ class OpenSearchIndexClient(OpenSearchClient):
"""
self._client.indices.refresh(index=self._index_name)
@log_function_time(print_only=True, debug_only=True, include_args=True)
def put_cluster_settings(self, settings: dict[str, Any]) -> bool:
"""Puts cluster settings.
Args:
settings: The settings to put.
Raises:
Exception: There was an error putting the cluster settings.
Returns:
True if the settings were put successfully, False otherwise.
"""
response = self._client.cluster.put_settings(body=settings)
if response.get("acknowledged", False):
logger.info("Successfully put cluster settings.")
return True
else:
logger.error(f"Failed to put cluster settings: {response}.")
return False
@log_function_time(print_only=True, debug_only=True)
def ping(self) -> bool:
"""Pings the OpenSearch cluster.
Returns:
True if OpenSearch could be reached, False if it could not.
"""
return self._client.ping()
@log_function_time(print_only=True, debug_only=True)
def close(self) -> None:
"""Closes the client.
TODO(andrei): Can we have some way to auto close when the client no
longer has any references?
Raises:
Exception: There was an error closing the client.
"""
self._client.close()
def _get_hits_and_profile_from_search_result(
self, result: dict[str, Any]
) -> tuple[list[Any], int | None, bool | None, dict[str, Any], dict[str, Any]]:
@@ -1039,7 +945,14 @@ def wait_for_opensearch_with_timeout(
Returns:
True if OpenSearch is ready, False otherwise.
"""
with nullcontext(client) if client else OpenSearchClient() as client:
made_client = False
try:
if client is None:
# NOTE: index_name does not matter because we are only using this object
# to ping.
# TODO(andrei): Make this better.
client = OpenSearchClient(index_name="")
made_client = True
time_start = time.monotonic()
while True:
if client.ping():
@@ -1056,3 +969,7 @@ def wait_for_opensearch_with_timeout(
f"[OpenSearch] Readiness probe ongoing. elapsed={time_elapsed:.1f} timeout={wait_limit_s:.1f}"
)
time.sleep(wait_interval_s)
finally:
if made_client:
assert client is not None
client.close()

View File

@@ -7,7 +7,6 @@ from opensearchpy import NotFoundError
from onyx.access.models import DocumentAccess
from onyx.configs.app_configs import USING_AWS_MANAGED_OPENSEARCH
from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
from onyx.configs.chat_configs import NUM_RETURNED_HITS
from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
from onyx.configs.constants import PUBLIC_DOC_PAT
@@ -41,7 +40,6 @@ from onyx.document_index.interfaces_new import IndexingMetadata
from onyx.document_index.interfaces_new import MetadataUpdateRequest
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import SearchHit
from onyx.document_index.opensearch.cluster_settings import OPENSEARCH_CLUSTER_SETTINGS
from onyx.document_index.opensearch.schema import ACCESS_CONTROL_LIST_FIELD_NAME
@@ -95,25 +93,6 @@ def generate_opensearch_filtered_access_control_list(
return list(access_control_list)
def set_cluster_state(client: OpenSearchClient) -> None:
if not client.put_cluster_settings(settings=OPENSEARCH_CLUSTER_SETTINGS):
logger.error(
"Failed to put cluster settings. If the settings have never been set before, "
"this may cause unexpected index creation when indexing documents into an "
"index that does not exist, or may cause expected logs to not appear. If this "
"is not the first time running Onyx against this instance of OpenSearch, these "
"settings have likely already been set. Not taking any further action..."
)
client.create_search_pipeline(
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
)
client.create_search_pipeline(
pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
)
def _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
chunk: DocumentChunk,
score: float | None,
@@ -269,8 +248,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
def __init__(
self,
index_name: str,
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
secondary_index_name: str | None,
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
@@ -281,6 +258,10 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
index_name=index_name,
secondary_index_name=secondary_index_name,
)
if multitenant:
raise ValueError(
"Bug: OpenSearch is not yet ready for multitenant environments but something tried to use it."
)
if multitenant != MULTI_TENANT:
raise ValueError(
"Bug: Multitenant mismatch when initializing an OpenSearchDocumentIndex. "
@@ -288,10 +269,8 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
)
tenant_id = get_current_tenant_id()
self._real_index = OpenSearchDocumentIndex(
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
index_name=index_name,
embedding_dim=embedding_dim,
embedding_precision=embedding_precision,
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
)
@staticmethod
@@ -300,8 +279,9 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
embedding_dims: list[int],
embedding_precisions: list[EmbeddingPrecision],
) -> None:
# TODO(andrei): Implement.
raise NotImplementedError(
"Bug: Multitenant index registration is not supported for OpenSearch."
"Multitenant index registration is not yet implemented for OpenSearch."
)
def ensure_indices_exist(
@@ -491,37 +471,19 @@ class OpenSearchDocumentIndex(DocumentIndex):
for an OpenSearch search engine instance. It handles the complete lifecycle
of document chunks within a specific OpenSearch index/schema.
Each kind of embedding used should correspond to a different instance of
this class, and therefore a different index in OpenSearch.
If in a multitenant environment and
VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT, will verify and create the index
if necessary on initialization. This is because there is no logic which runs
on cluster restart which scans through all search settings over all tenants
and creates the relevant indices.
Args:
tenant_state: The tenant state of the caller.
index_name: The name of the index to interact with.
embedding_dim: The dimensionality of the embeddings used for the index.
embedding_precision: The precision of the embeddings used for the index.
Although not yet used in this way in the codebase, each kind of embedding
used should correspond to a different instance of this class, and therefore
a different index in OpenSearch.
"""
def __init__(
self,
tenant_state: TenantState,
index_name: str,
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
tenant_state: TenantState,
) -> None:
self._index_name: str = index_name
self._tenant_state: TenantState = tenant_state
self._client = OpenSearchIndexClient(index_name=self._index_name)
if self._tenant_state.multitenant and VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT:
self.verify_and_create_index_if_necessary(
embedding_dim=embedding_dim, embedding_precision=embedding_precision
)
self._os_client = OpenSearchClient(index_name=self._index_name)
def verify_and_create_index_if_necessary(
self,
@@ -530,15 +492,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
) -> None:
"""Verifies and creates the index if necessary.
Also puts the desired cluster settings if not in a multitenant
environment.
Also puts the desired cluster settings.
Also puts the desired search pipeline state if not in a multitenant
environment, creating the pipelines if they do not exist and updating
them otherwise.
In a multitenant environment, the above steps happen explicitly on
setup.
Also puts the desired search pipeline state, creating the pipelines if
they do not exist and updating them otherwise.
Args:
embedding_dim: Vector dimensionality for the vector similarity part
@@ -551,38 +508,47 @@ class OpenSearchDocumentIndex(DocumentIndex):
search pipelines.
"""
logger.debug(
f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if "
f"necessary, with embedding dimension {embedding_dim}."
f"[OpenSearchDocumentIndex] Verifying and creating index {self._index_name} if necessary, "
f"with embedding dimension {embedding_dim}."
)
if not self._tenant_state.multitenant:
set_cluster_state(self._client)
expected_mappings = DocumentSchema.get_document_schema(
embedding_dim, self._tenant_state.multitenant
)
if not self._client.index_exists():
if not self._os_client.put_cluster_settings(
settings=OPENSEARCH_CLUSTER_SETTINGS
):
logger.error(
f"Failed to put cluster settings for index {self._index_name}. If the settings have never been set before this "
"may cause unexpected index creation when indexing documents into an index that does not exist, or may cause "
"expected logs to not appear. If this is not the first time running Onyx against this instance of OpenSearch, "
"these settings have likely already been set. Not taking any further action..."
)
if not self._os_client.index_exists():
if USING_AWS_MANAGED_OPENSEARCH:
index_settings = (
DocumentSchema.get_index_settings_for_aws_managed_opensearch()
)
else:
index_settings = DocumentSchema.get_index_settings()
self._client.create_index(
self._os_client.create_index(
mappings=expected_mappings,
settings=index_settings,
)
else:
# Ensure schema is up to date by applying the current mappings.
try:
self._client.put_mapping(expected_mappings)
except Exception as e:
logger.error(
f"Failed to update mappings for index {self._index_name}. This likely means a "
f"field type was changed which requires reindexing. Error: {e}"
)
raise
if not self._os_client.validate_index(
expected_mappings=expected_mappings,
):
raise RuntimeError(
f"The index {self._index_name} is not valid. The expected mappings do not match the actual mappings."
)
self._os_client.create_search_pipeline(
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
pipeline_body=MIN_MAX_NORMALIZATION_PIPELINE_CONFIG,
)
self._os_client.create_search_pipeline(
pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
pipeline_body=ZSCORE_NORMALIZATION_PIPELINE_CONFIG,
)
def index(
self,
@@ -654,7 +620,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
)
# Now index. This will raise if a chunk of the same ID exists, which
# we do not expect because we should have deleted all chunks.
self._client.bulk_index_documents(
self._os_client.bulk_index_documents(
documents=chunk_batch,
tenant_state=self._tenant_state,
)
@@ -694,7 +660,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
tenant_state=self._tenant_state,
)
return self._client.delete_by_query(query_body)
return self._os_client.delete_by_query(query_body)
def update(
self,
@@ -794,7 +760,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
document_id=doc_id,
chunk_index=chunk_index,
)
self._client.update_document(
self._os_client.update_document(
document_chunk_id=document_chunk_id,
properties_to_update=properties_to_update,
)
@@ -833,7 +799,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
min_chunk_index=chunk_request.min_chunk_ind,
max_chunk_index=chunk_request.max_chunk_ind,
)
search_hits = self._client.search(
search_hits = self._os_client.search(
body=query_body,
search_pipeline_id=None,
)
@@ -883,7 +849,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
# NOTE: Using z-score normalization here because it's better for hybrid search from a theoretical standpoint.
# Empirically on a small dataset of up to 10K docs, it's not very different. Likely more impactful at scale.
# https://opensearch.org/blog/introducing-the-z-score-normalization-technique-for-hybrid-search/
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._os_client.search(
body=query_body,
search_pipeline_id=ZSCORE_NORMALIZATION_PIPELINE_NAME,
)
@@ -915,7 +881,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
index_filters=filters,
num_to_retrieve=num_to_retrieve,
)
search_hits: list[SearchHit[DocumentChunk]] = self._client.search(
search_hits: list[SearchHit[DocumentChunk]] = self._os_client.search(
body=query_body,
search_pipeline_id=None,
)
@@ -943,6 +909,6 @@ class OpenSearchDocumentIndex(DocumentIndex):
# Do not raise if the document already exists, just update. This is
# because the document may already have been indexed during the
# OpenSearch transition period.
self._client.bulk_index_documents(
self._os_client.bulk_index_documents(
documents=chunks, tenant_state=self._tenant_state, update_if_exists=True
)

View File

@@ -405,7 +405,6 @@ class PersonaShareRequest(BaseModel):
user_ids: list[UUID] | None = None
group_ids: list[int] | None = None
is_public: bool | None = None
label_ids: list[int] | None = None
# We notify each user when a user is shared with them
@@ -416,22 +415,14 @@ def share_persona(
user: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> None:
try:
update_persona_shared(
persona_id=persona_id,
user=user,
db_session=db_session,
user_ids=persona_share_request.user_ids,
group_ids=persona_share_request.group_ids,
is_public=persona_share_request.is_public,
label_ids=persona_share_request.label_ids,
)
except PermissionError as e:
logger.exception("Failed to share persona")
raise HTTPException(status_code=403, detail=str(e))
except ValueError as e:
logger.exception("Failed to share persona")
raise HTTPException(status_code=400, detail=str(e))
update_persona_shared(
persona_id=persona_id,
user=user,
db_session=db_session,
user_ids=persona_share_request.user_ids,
group_ids=persona_share_request.group_ids,
is_public=persona_share_request.is_public,
)
@basic_router.delete("/{persona_id}", tags=PUBLIC_API_TAGS)

View File

@@ -105,9 +105,7 @@ class LLMProviderDescriptor(BaseModel):
is_default_provider = bool(default_model_name)
is_default_vision_provider = default_vision_model is not None
default_model_name = (
default_model_name or llm_provider_model.default_model_name or ""
)
default_model_name = default_model_name or llm_provider_model.default_model_name
return cls(
name=llm_provider_model.name,
@@ -186,9 +184,7 @@ class LLMProviderView(LLMProvider):
is_default_provider = bool(default_model_name)
is_default_vision_provider = default_vision_model is not None
default_model_name = (
default_model_name or llm_provider_model.default_model_name or ""
)
default_model_name = default_model_name or llm_provider_model.default_model_name
return cls(
id=llm_provider_model.id,

View File

@@ -1,27 +0,0 @@
"""Per-tenant request counter metric.
Increments a counter on every request, labelled by tenant, so Grafana can
answer "which tenant is generating the most traffic?"
"""
from prometheus_client import Counter
from prometheus_fastapi_instrumentator.metrics import Info
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
_requests_by_tenant = Counter(
"onyx_api_requests_by_tenant_total",
"Total API requests by tenant",
["tenant_id", "method", "handler", "status"],
)
def per_tenant_request_callback(info: Info) -> None:
"""Increment per-tenant request counter for every request."""
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
_requests_by_tenant.labels(
tenant_id=tenant_id,
method=info.method,
handler=info.modified_handler,
status=info.modified_status,
).inc()

View File

@@ -32,7 +32,6 @@ from sqlalchemy.pool import QueuePool
from onyx.utils.logger import setup_logger
from shared_configs.contextvars import CURRENT_ENDPOINT_CONTEXTVAR
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
logger = setup_logger()
@@ -73,7 +72,7 @@ _checkout_timeout_total = Counter(
_connections_held = Gauge(
"onyx_db_connections_held_by_endpoint",
"Number of DB connections currently held, by endpoint and engine",
["handler", "engine", "tenant_id"],
["handler", "engine"],
)
_hold_seconds = Histogram(
@@ -164,14 +163,10 @@ def _register_pool_events(engine: Engine, label: str) -> None:
conn_proxy: PoolProxiedConnection, # noqa: ARG001
) -> None:
handler = CURRENT_ENDPOINT_CONTEXTVAR.get() or "unknown"
tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or "unknown"
conn_record.info["_metrics_endpoint"] = handler
conn_record.info["_metrics_tenant_id"] = tenant_id
conn_record.info["_metrics_checkout_time"] = time.monotonic()
_checkout_total.labels(engine=label).inc()
_connections_held.labels(
handler=handler, engine=label, tenant_id=tenant_id
).inc()
_connections_held.labels(handler=handler, engine=label).inc()
@event.listens_for(engine, "checkin")
def on_checkin(
@@ -179,12 +174,9 @@ def _register_pool_events(engine: Engine, label: str) -> None:
conn_record: ConnectionPoolEntry,
) -> None:
handler = conn_record.info.pop("_metrics_endpoint", "unknown")
tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
start = conn_record.info.pop("_metrics_checkout_time", None)
_checkin_total.labels(engine=label).inc()
_connections_held.labels(
handler=handler, engine=label, tenant_id=tenant_id
).dec()
_connections_held.labels(handler=handler, engine=label).dec()
if start is not None:
_hold_seconds.labels(handler=handler, engine=label).observe(
time.monotonic() - start
@@ -207,12 +199,9 @@ def _register_pool_events(engine: Engine, label: str) -> None:
# Defensively clean up the held-connections gauge in case checkin
# doesn't fire after invalidation (e.g. hard pool shutdown).
handler = conn_record.info.pop("_metrics_endpoint", None)
tenant_id = conn_record.info.pop("_metrics_tenant_id", "unknown")
start = conn_record.info.pop("_metrics_checkout_time", None)
if handler:
_connections_held.labels(
handler=handler, engine=label, tenant_id=tenant_id
).dec()
_connections_held.labels(handler=handler, engine=label).dec()
if start is not None:
_hold_seconds.labels(handler=handler or "unknown", engine=label).observe(
time.monotonic() - start

View File

@@ -11,11 +11,9 @@ SQLAlchemy connection pool metrics are registered separately via
"""
from prometheus_fastapi_instrumentator import Instrumentator
from prometheus_fastapi_instrumentator.metrics import default as default_metrics
from sqlalchemy.exc import TimeoutError as SATimeoutError
from starlette.applications import Starlette
from onyx.server.metrics.per_tenant import per_tenant_request_callback
from onyx.server.metrics.postgres_connection_pool import pool_timeout_handler
from onyx.server.metrics.slow_requests import slow_request_callback
@@ -61,15 +59,6 @@ def setup_prometheus_metrics(app: Starlette) -> None:
excluded_handlers=_EXCLUDED_HANDLERS,
)
# Explicitly create the default metrics (http_requests_total,
# http_request_duration_seconds, etc.) and add them first. The library
# skips creating defaults when ANY custom instrumentations are registered
# via .add(), so we must include them ourselves.
default_callback = default_metrics(latency_lowr_buckets=_LATENCY_BUCKETS)
if default_callback:
instrumentator.add(default_callback)
instrumentator.add(slow_request_callback)
instrumentator.add(per_tenant_request_callback)
instrumentator.instrument(app, latency_lowr_buckets=_LATENCY_BUCKETS).expose(app)

View File

@@ -4,7 +4,6 @@ from sqlalchemy.orm import Session
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
from onyx.configs.app_configs import MANAGED_VESPA
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
@@ -33,9 +32,6 @@ from onyx.db.search_settings import update_current_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.interfaces import DocumentIndex
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.opensearch_document_index import set_cluster_state
from onyx.document_index.vespa.index import VespaIndex
from onyx.indexing.models import IndexingSetting
from onyx.key_value_store.factory import get_kv_store
@@ -315,14 +311,7 @@ def setup_multitenant_onyx() -> None:
logger.notice("DISABLE_VECTOR_DB is set — skipping multitenant Vespa setup.")
return
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
opensearch_client = OpenSearchClient()
if not wait_for_opensearch_with_timeout(client=opensearch_client):
raise RuntimeError("Failed to connect to OpenSearch.")
set_cluster_state(opensearch_client)
# For Managed Vespa, the schema is sent over via the Vespa Console manually.
# NOTE: Pretty sure this code is never hit in any production environment.
if not MANAGED_VESPA:
setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS)

View File

@@ -120,7 +120,7 @@ def generate_intermediate_report(
custom_agent_prompt=None,
simple_chat_history=history,
reminder_message=reminder_message,
context_files=None,
project_files=None,
available_tokens=llm.config.max_input_tokens,
)
@@ -325,7 +325,7 @@ def run_research_agent_call(
custom_agent_prompt=None,
simple_chat_history=msg_history,
reminder_message=reminder_message,
context_files=None,
project_files=None,
available_tokens=llm.config.max_input_tokens,
)

View File

@@ -809,7 +809,7 @@ pypandoc-binary==1.16.2
# via onyx
pyparsing==3.2.5
# via httplib2
pypdf==6.7.3
pypdf==6.6.2
# via
# onyx
# unstructured-client

View File

@@ -12,7 +12,6 @@ from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.sharepoint.connector import SharepointAuthMethod
from onyx.connectors.sharepoint.connector import SharepointConnector
from onyx.db.enums import HierarchyNodeType
from tests.daily.connectors.utils import load_all_from_connector
@@ -522,46 +521,3 @@ def test_sharepoint_connector_hierarchy_nodes(
f"Document {doc.semantic_identifier} should have "
"parent_hierarchy_raw_node_id set"
)
@pytest.fixture
def sharepoint_cert_credentials() -> dict[str, str]:
return {
"authentication_method": SharepointAuthMethod.CERTIFICATE.value,
"sp_client_id": os.environ["PERM_SYNC_SHAREPOINT_CLIENT_ID"],
"sp_private_key": os.environ["PERM_SYNC_SHAREPOINT_PRIVATE_KEY"],
"sp_certificate_password": os.environ[
"PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD"
],
"sp_directory_id": os.environ["PERM_SYNC_SHAREPOINT_DIRECTORY_ID"],
}
def test_resolve_tenant_domain_from_site_urls(
sharepoint_cert_credentials: dict[str, str],
) -> None:
"""Verify that certificate auth resolves the tenant domain from site URLs
without calling the /organization endpoint."""
site_url = os.environ["SHAREPOINT_SITE"]
connector = SharepointConnector(sites=[site_url])
connector.load_credentials(sharepoint_cert_credentials)
assert connector.sp_tenant_domain is not None
assert len(connector.sp_tenant_domain) > 0
# The tenant domain should match the first label of the site URL hostname
from urllib.parse import urlsplit
expected = urlsplit(site_url).hostname.split(".")[0] # type: ignore
assert connector.sp_tenant_domain == expected
def test_resolve_tenant_domain_from_root_site(
sharepoint_cert_credentials: dict[str, str],
) -> None:
"""Verify that certificate auth resolves the tenant domain via the root
site endpoint when no site URLs are configured."""
connector = SharepointConnector(sites=[])
connector.load_credentials(sharepoint_cert_credentials)
assert connector.sp_tenant_domain is not None
assert len(connector.sp_tenant_domain) > 0

View File

@@ -1,544 +0,0 @@
"""
External dependency unit tests for persona file sync.
Validates that:
1. The check_for_user_file_project_sync beat task picks up UserFiles with
needs_persona_sync=True (not just needs_project_sync).
2. The process_single_user_file_project_sync worker task reads persona
associations from the DB, passes persona_ids to the document index via
VespaDocumentUserFields, and clears needs_persona_sync afterwards.
3. upsert_persona correctly marks affected UserFiles with
needs_persona_sync=True when file associations change.
Uses real Redis and PostgreSQL. Document index (Vespa) calls are mocked
since we only need to verify the arguments passed to update_single.
"""
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from unittest.mock import PropertyMock
from uuid import uuid4
from sqlalchemy.orm import Session
from onyx.background.celery.tasks.user_file_processing.tasks import (
check_for_user_file_project_sync,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
process_single_user_file_project_sync,
)
from onyx.background.celery.tasks.user_file_processing.tasks import (
user_file_project_sync_lock_key,
)
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.enums import UserFileStatus
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.persona import upsert_persona
from onyx.document_index.interfaces import VespaDocumentUserFields
from onyx.redis.redis_pool import get_redis_client
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _create_completed_user_file(
db_session: Session,
user: User,
needs_persona_sync: bool = False,
needs_project_sync: bool = False,
) -> UserFile:
"""Insert a UserFile in COMPLETED status."""
uf = UserFile(
id=uuid4(),
user_id=user.id,
file_id=f"test_file_{uuid4().hex[:8]}",
name=f"test_{uuid4().hex[:8]}.txt",
file_type="text/plain",
status=UserFileStatus.COMPLETED,
needs_persona_sync=needs_persona_sync,
needs_project_sync=needs_project_sync,
chunk_count=5,
)
db_session.add(uf)
db_session.commit()
db_session.refresh(uf)
return uf
def _create_test_persona(
db_session: Session,
user: User,
user_files: list[UserFile] | None = None,
) -> Persona:
"""Create a minimal Persona via direct model insert."""
persona = Persona(
name=f"Test Persona {uuid4().hex[:8]}",
description="Test persona",
num_chunks=10.0,
chunks_above=0,
chunks_below=0,
llm_relevance_filter=False,
llm_filter_extraction=False,
recency_bias=RecencyBiasSetting.NO_DECAY,
system_prompt="You are a test assistant",
task_prompt="Answer the question",
tools=[],
document_sets=[],
users=[user],
groups=[],
is_visible=True,
is_public=True,
display_priority=None,
starter_messages=None,
deleted=False,
user_files=user_files or [],
user_id=user.id,
)
db_session.add(persona)
db_session.commit()
db_session.refresh(persona)
return persona
def _link_file_to_persona(
db_session: Session, persona: Persona, user_file: UserFile
) -> None:
"""Create the join table row between a persona and a user file."""
link = Persona__UserFile(persona_id=persona.id, user_file_id=user_file.id)
db_session.add(link)
db_session.commit()
_PATCH_QUEUE_DEPTH = (
"onyx.background.celery.tasks.user_file_processing.tasks"
".get_user_file_project_sync_queue_depth"
)
@contextmanager
def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, None]:
"""Patch the ``app`` property on a bound Celery task."""
task_instance = task.run.__self__
with (
patch.object(
type(task_instance),
"app",
new_callable=PropertyMock,
return_value=mock_app,
),
patch(_PATCH_QUEUE_DEPTH, return_value=0),
):
yield
# ---------------------------------------------------------------------------
# Test: check_for_user_file_project_sync picks up persona sync
# ---------------------------------------------------------------------------
class TestCheckSweepIncludesPersonaSync:
"""The beat task must pick up files needing persona sync, not just project sync."""
def test_persona_sync_flag_enqueues_task(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file with needs_persona_sync=True (and COMPLETED) gets enqueued."""
user = create_test_user(db_session, "persona_sweep")
uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
mock_app = MagicMock()
with _patch_task_app(check_for_user_file_project_sync, mock_app):
check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
enqueued_ids = {
call.kwargs["kwargs"]["user_file_id"]
for call in mock_app.send_task.call_args_list
}
assert str(uf.id) in enqueued_ids
def test_neither_flag_does_not_enqueue(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file with both flags False is not enqueued."""
user = create_test_user(db_session, "no_sync")
uf = _create_completed_user_file(db_session, user)
mock_app = MagicMock()
with _patch_task_app(check_for_user_file_project_sync, mock_app):
check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
enqueued_ids = {
call.kwargs["kwargs"]["user_file_id"]
for call in mock_app.send_task.call_args_list
}
assert str(uf.id) not in enqueued_ids
def test_both_flags_enqueues_once(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file with BOTH flags True is enqueued exactly once."""
user = create_test_user(db_session, "both_flags")
uf = _create_completed_user_file(
db_session, user, needs_persona_sync=True, needs_project_sync=True
)
mock_app = MagicMock()
with _patch_task_app(check_for_user_file_project_sync, mock_app):
check_for_user_file_project_sync.run(tenant_id=TEST_TENANT_ID)
matching_calls = [
call
for call in mock_app.send_task.call_args_list
if call.kwargs["kwargs"]["user_file_id"] == str(uf.id)
]
assert len(matching_calls) == 1
# ---------------------------------------------------------------------------
# Test: process_single_user_file_project_sync passes persona_ids to index
# ---------------------------------------------------------------------------
_PATCH_GET_SETTINGS = (
"onyx.background.celery.tasks.user_file_processing.tasks.get_active_search_settings"
)
_PATCH_GET_INDICES = (
"onyx.background.celery.tasks.user_file_processing.tasks.get_all_document_indices"
)
_PATCH_HTTPX_INIT = (
"onyx.background.celery.tasks.user_file_processing.tasks.httpx_init_vespa_pool"
)
_PATCH_DISABLE_VDB = (
"onyx.background.celery.tasks.user_file_processing.tasks.DISABLE_VECTOR_DB"
)
class TestSyncTaskWritesPersonaIds:
"""The sync task reads persona associations and sends them to the index."""
def test_passes_persona_ids_to_update_single(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""After linking a file to a persona, sync sends the persona ID."""
user = create_test_user(db_session, "sync_persona")
uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
persona = _create_test_persona(db_session, user)
_link_file_to_persona(db_session, persona, uf)
mock_doc_index = MagicMock()
mock_search_settings = MagicMock()
mock_search_settings.primary = MagicMock()
mock_search_settings.secondary = None
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
lock_key = user_file_project_sync_lock_key(str(uf.id))
redis_client.delete(lock_key)
with (
patch(_PATCH_DISABLE_VDB, False),
patch(_PATCH_HTTPX_INIT),
patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
):
process_single_user_file_project_sync.run(
user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
)
mock_doc_index.update_single.assert_called_once()
call_args = mock_doc_index.update_single.call_args
user_fields: VespaDocumentUserFields = call_args.kwargs["user_fields"]
assert user_fields.personas is not None
assert persona.id in user_fields.personas
assert call_args.args[0] == str(uf.id)
def test_clears_persona_sync_flag(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""After a successful sync the needs_persona_sync flag is cleared."""
user = create_test_user(db_session, "sync_clear")
uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
lock_key = user_file_project_sync_lock_key(str(uf.id))
redis_client.delete(lock_key)
with patch(_PATCH_DISABLE_VDB, True):
process_single_user_file_project_sync.run(
user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
)
db_session.refresh(uf)
assert uf.needs_persona_sync is False
def test_passes_both_project_and_persona_ids(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file linked to both a project and a persona gets both IDs."""
from onyx.db.models import Project__UserFile
from onyx.db.models import UserProject
user = create_test_user(db_session, "sync_both")
uf = _create_completed_user_file(
db_session, user, needs_persona_sync=True, needs_project_sync=True
)
persona = _create_test_persona(db_session, user)
_link_file_to_persona(db_session, persona, uf)
project = UserProject(user_id=user.id, name="test-project", instructions="")
db_session.add(project)
db_session.commit()
db_session.refresh(project)
link = Project__UserFile(project_id=project.id, user_file_id=uf.id)
db_session.add(link)
db_session.commit()
mock_doc_index = MagicMock()
mock_search_settings = MagicMock()
mock_search_settings.primary = MagicMock()
mock_search_settings.secondary = None
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
lock_key = user_file_project_sync_lock_key(str(uf.id))
redis_client.delete(lock_key)
with (
patch(_PATCH_DISABLE_VDB, False),
patch(_PATCH_HTTPX_INIT),
patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
):
process_single_user_file_project_sync.run(
user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
)
call_kwargs = mock_doc_index.update_single.call_args.kwargs
user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
assert user_fields.personas is not None
assert user_fields.user_projects is not None
assert persona.id in user_fields.personas
assert project.id in user_fields.user_projects
# Both flags should be cleared
db_session.refresh(uf)
assert uf.needs_persona_sync is False
assert uf.needs_project_sync is False
def test_deleted_persona_excluded_from_ids(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A soft-deleted persona should NOT appear in the persona_ids sent to Vespa."""
user = create_test_user(db_session, "sync_deleted")
uf = _create_completed_user_file(db_session, user, needs_persona_sync=True)
persona = _create_test_persona(db_session, user)
_link_file_to_persona(db_session, persona, uf)
persona.deleted = True
db_session.commit()
mock_doc_index = MagicMock()
mock_search_settings = MagicMock()
mock_search_settings.primary = MagicMock()
mock_search_settings.secondary = None
redis_client = get_redis_client(tenant_id=TEST_TENANT_ID)
lock_key = user_file_project_sync_lock_key(str(uf.id))
redis_client.delete(lock_key)
with (
patch(_PATCH_DISABLE_VDB, False),
patch(_PATCH_HTTPX_INIT),
patch(_PATCH_GET_SETTINGS, return_value=mock_search_settings),
patch(_PATCH_GET_INDICES, return_value=[mock_doc_index]),
):
process_single_user_file_project_sync.run(
user_file_id=str(uf.id), tenant_id=TEST_TENANT_ID
)
call_kwargs = mock_doc_index.update_single.call_args.kwargs
user_fields: VespaDocumentUserFields = call_kwargs["user_fields"]
assert user_fields.personas is not None
assert persona.id not in user_fields.personas
# ---------------------------------------------------------------------------
# Test: upsert_persona marks files for persona sync
# ---------------------------------------------------------------------------
class TestUpsertPersonaMarksSyncFlag:
"""upsert_persona must set needs_persona_sync on affected UserFiles."""
def test_creating_persona_with_files_marks_sync(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
user = create_test_user(db_session, "upsert_create")
uf = _create_completed_user_file(db_session, user)
assert uf.needs_persona_sync is False
upsert_persona(
user=user,
name=f"persona-{uuid4().hex[:8]}",
description="test",
num_chunks=10.0,
llm_relevance_filter=False,
llm_filter_extraction=False,
recency_bias=RecencyBiasSetting.NO_DECAY,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
system_prompt="test",
task_prompt="test",
datetime_aware=None,
is_public=True,
db_session=db_session,
user_file_ids=[uf.id],
)
db_session.refresh(uf)
assert uf.needs_persona_sync is True
def test_updating_persona_files_marks_both_old_and_new(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""When file associations change, both the removed and added files are flagged."""
user = create_test_user(db_session, "upsert_update")
uf_old = _create_completed_user_file(db_session, user)
uf_new = _create_completed_user_file(db_session, user)
persona = upsert_persona(
user=user,
name=f"persona-{uuid4().hex[:8]}",
description="test",
num_chunks=10.0,
llm_relevance_filter=False,
llm_filter_extraction=False,
recency_bias=RecencyBiasSetting.NO_DECAY,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
system_prompt="test",
task_prompt="test",
datetime_aware=None,
is_public=True,
db_session=db_session,
user_file_ids=[uf_old.id],
)
# Clear the flag from creation so we can observe the update
uf_old.needs_persona_sync = False
db_session.commit()
assert persona.num_chunks is not None
# Now update the persona to swap files
upsert_persona(
user=user,
name=persona.name,
description=persona.description,
num_chunks=persona.num_chunks,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
recency_bias=persona.recency_bias,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
system_prompt=persona.system_prompt,
task_prompt=persona.task_prompt,
datetime_aware=None,
is_public=persona.is_public,
db_session=db_session,
persona_id=persona.id,
user_file_ids=[uf_new.id],
)
db_session.refresh(uf_old)
db_session.refresh(uf_new)
assert uf_old.needs_persona_sync is True, "Removed file should be flagged"
assert uf_new.needs_persona_sync is True, "Added file should be flagged"
def test_removing_all_files_marks_old_files(
self,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""Removing all files from a persona flags the previously associated files."""
user = create_test_user(db_session, "upsert_remove")
uf = _create_completed_user_file(db_session, user)
persona = upsert_persona(
user=user,
name=f"persona-{uuid4().hex[:8]}",
description="test",
num_chunks=10.0,
llm_relevance_filter=False,
llm_filter_extraction=False,
recency_bias=RecencyBiasSetting.NO_DECAY,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
system_prompt="test",
task_prompt="test",
datetime_aware=None,
is_public=True,
db_session=db_session,
user_file_ids=[uf.id],
)
uf.needs_persona_sync = False
db_session.commit()
assert persona.num_chunks is not None
upsert_persona(
user=user,
name=persona.name,
description=persona.description,
num_chunks=persona.num_chunks,
llm_relevance_filter=persona.llm_relevance_filter,
llm_filter_extraction=persona.llm_filter_extraction,
recency_bias=persona.recency_bias,
llm_model_provider_override=None,
llm_model_version_override=None,
starter_messages=None,
system_prompt=persona.system_prompt,
task_prompt=persona.task_prompt,
datetime_aware=None,
is_public=persona.is_public,
db_session=db_session,
persona_id=persona.id,
user_file_ids=[],
)
db_session.refresh(uf)
assert uf.needs_persona_sync is True

View File

@@ -1,318 +0,0 @@
"""
External dependency unit tests for UserFileIndexingAdapter metadata writing.
Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
objects with both `user_project` and `personas` fields populated correctly
based on actual DB associations.
Uses real PostgreSQL for UserFile/Persona/UserProject rows.
Mocks the LLM tokenizer and file store since they are not relevant here.
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import uuid4
from sqlalchemy.orm import Session
from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.models import TextSection
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.enums import UserFileStatus
from onyx.db.models import Persona
from onyx.db.models import Persona__UserFile
from onyx.db.models import Project__UserFile
from onyx.db.models import User
from onyx.db.models import UserFile
from onyx.db.models import UserProject
from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
from onyx.indexing.models import ChunkEmbedding
from onyx.indexing.models import IndexChunk
from tests.external_dependency_unit.conftest import create_test_user
from tests.external_dependency_unit.constants import TEST_TENANT_ID
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _create_user_file(db_session: Session, user: User) -> UserFile:
uf = UserFile(
id=uuid4(),
user_id=user.id,
file_id=f"test_file_{uuid4().hex[:8]}",
name=f"test_{uuid4().hex[:8]}.txt",
file_type="text/plain",
status=UserFileStatus.COMPLETED,
chunk_count=1,
)
db_session.add(uf)
db_session.commit()
db_session.refresh(uf)
return uf
def _create_persona(db_session: Session, user: User) -> Persona:
persona = Persona(
name=f"Test Persona {uuid4().hex[:8]}",
description="Test persona",
num_chunks=10.0,
chunks_above=0,
chunks_below=0,
llm_relevance_filter=False,
llm_filter_extraction=False,
recency_bias=RecencyBiasSetting.NO_DECAY,
system_prompt="test",
task_prompt="test",
tools=[],
document_sets=[],
users=[user],
groups=[],
is_visible=True,
is_public=True,
display_priority=None,
starter_messages=None,
deleted=False,
user_id=user.id,
)
db_session.add(persona)
db_session.commit()
db_session.refresh(persona)
return persona
def _create_project(db_session: Session, user: User) -> UserProject:
project = UserProject(
user_id=user.id,
name=f"project-{uuid4().hex[:8]}",
instructions="",
)
db_session.add(project)
db_session.commit()
db_session.refresh(project)
return project
def _make_index_chunk(user_file: UserFile) -> IndexChunk:
"""Build a minimal IndexChunk whose source document ID matches the UserFile."""
doc = Document(
id=str(user_file.id),
source=DocumentSource.USER_FILE,
semantic_identifier=user_file.name,
sections=[TextSection(text="test chunk content", link=None)],
metadata={},
)
return IndexChunk(
source_document=doc,
chunk_id=0,
blurb="test chunk",
content="test chunk content",
source_links={0: ""},
image_file_id=None,
section_continuation=False,
title_prefix="",
metadata_suffix_semantic="",
metadata_suffix_keyword="",
contextual_rag_reserved_tokens=0,
doc_summary="",
chunk_context="",
mini_chunk_texts=None,
large_chunk_id=None,
embeddings=ChunkEmbedding(
full_embedding=[0.0] * 768,
mini_chunk_embeddings=[],
),
title_embedding=None,
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestAdapterWritesBothMetadataFields:
"""build_metadata_aware_chunks must populate user_project AND personas."""
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
side_effect=Exception("no LLM in test"),
)
def test_file_linked_to_persona_gets_persona_id(
self,
_mock_llm: MagicMock,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
user = create_test_user(db_session, "adapter_persona")
uf = _create_user_file(db_session, user)
persona = _create_persona(db_session, user)
db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
db_session.commit()
adapter = UserFileIndexingAdapter(
tenant_id=TEST_TENANT_ID, db_session=db_session
)
chunk = _make_index_chunk(uf)
doc = chunk.source_document
context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
context=context,
)
assert len(result.chunks) == 1
aware_chunk = result.chunks[0]
assert persona.id in aware_chunk.personas
assert aware_chunk.user_project == []
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
side_effect=Exception("no LLM in test"),
)
def test_file_linked_to_project_gets_project_id(
self,
_mock_llm: MagicMock,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
user = create_test_user(db_session, "adapter_project")
uf = _create_user_file(db_session, user)
project = _create_project(db_session, user)
db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
db_session.commit()
adapter = UserFileIndexingAdapter(
tenant_id=TEST_TENANT_ID, db_session=db_session
)
chunk = _make_index_chunk(uf)
context = DocumentBatchPrepareContext(
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
context=context,
)
assert len(result.chunks) == 1
aware_chunk = result.chunks[0]
assert project.id in aware_chunk.user_project
assert aware_chunk.personas == []
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
side_effect=Exception("no LLM in test"),
)
def test_file_linked_to_both_gets_both_ids(
self,
_mock_llm: MagicMock,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
user = create_test_user(db_session, "adapter_both")
uf = _create_user_file(db_session, user)
persona = _create_persona(db_session, user)
project = _create_project(db_session, user)
db_session.add(Persona__UserFile(persona_id=persona.id, user_file_id=uf.id))
db_session.add(Project__UserFile(project_id=project.id, user_file_id=uf.id))
db_session.commit()
adapter = UserFileIndexingAdapter(
tenant_id=TEST_TENANT_ID, db_session=db_session
)
chunk = _make_index_chunk(uf)
context = DocumentBatchPrepareContext(
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
context=context,
)
aware_chunk = result.chunks[0]
assert persona.id in aware_chunk.personas
assert project.id in aware_chunk.user_project
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
side_effect=Exception("no LLM in test"),
)
def test_file_with_no_associations_gets_empty_lists(
self,
_mock_llm: MagicMock,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
user = create_test_user(db_session, "adapter_empty")
uf = _create_user_file(db_session, user)
adapter = UserFileIndexingAdapter(
tenant_id=TEST_TENANT_ID, db_session=db_session
)
chunk = _make_index_chunk(uf)
context = DocumentBatchPrepareContext(
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
context=context,
)
aware_chunk = result.chunks[0]
assert aware_chunk.personas == []
assert aware_chunk.user_project == []
@patch(
"onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
side_effect=Exception("no LLM in test"),
)
def test_multiple_personas_all_appear(
self,
_mock_llm: MagicMock,
db_session: Session,
tenant_context: None, # noqa: ARG002
) -> None:
"""A file linked to multiple personas should have all their IDs."""
user = create_test_user(db_session, "adapter_multi")
uf = _create_user_file(db_session, user)
persona_a = _create_persona(db_session, user)
persona_b = _create_persona(db_session, user)
db_session.add(Persona__UserFile(persona_id=persona_a.id, user_file_id=uf.id))
db_session.add(Persona__UserFile(persona_id=persona_b.id, user_file_id=uf.id))
db_session.commit()
adapter = UserFileIndexingAdapter(
tenant_id=TEST_TENANT_ID, db_session=db_session
)
chunk = _make_index_chunk(uf)
context = DocumentBatchPrepareContext(
updatable_docs=[chunk.source_document], id_to_boost_map={}
)
result = adapter.build_metadata_aware_chunks(
chunks_with_embeddings=[chunk],
chunk_content_scores=[1.0],
tenant_id=TEST_TENANT_ID,
context=context,
)
aware_chunk = result.chunks[0]
assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}

View File

@@ -1,4 +1,4 @@
"""External dependency unit tests for OpenSearchIndexClient.
"""External dependency unit tests for OpenSearchClient.
These tests assume OpenSearch is running and test all implemented methods
using real schemas, pipelines, and search queries from the codebase.
@@ -19,7 +19,7 @@ from onyx.access.utils import prefix_user_email
from onyx.configs.constants import DocumentSource
from onyx.context.search.models import IndexFilters
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.opensearch_document_index import (
@@ -125,10 +125,10 @@ def opensearch_available() -> None:
@pytest.fixture(scope="function")
def test_client(
opensearch_available: None, # noqa: ARG001
) -> Generator[OpenSearchIndexClient, None, None]:
) -> Generator[OpenSearchClient, None, None]:
"""Creates an OpenSearch client for testing with automatic cleanup."""
test_index_name = f"test_index_{uuid.uuid4().hex[:8]}"
client = OpenSearchIndexClient(index_name=test_index_name)
client = OpenSearchClient(index_name=test_index_name)
yield client # Test runs here.
@@ -142,7 +142,7 @@ def test_client(
@pytest.fixture(scope="function")
def search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None, None]:
def search_pipeline(test_client: OpenSearchClient) -> Generator[None, None, None]:
"""Creates a search pipeline for testing with automatic cleanup."""
test_client.create_search_pipeline(
pipeline_id=MIN_MAX_NORMALIZATION_PIPELINE_NAME,
@@ -158,9 +158,9 @@ def search_pipeline(test_client: OpenSearchIndexClient) -> Generator[None, None,
class TestOpenSearchClient:
"""Tests for OpenSearchIndexClient."""
"""Tests for OpenSearchClient."""
def test_create_index(self, test_client: OpenSearchIndexClient) -> None:
def test_create_index(self, test_client: OpenSearchClient) -> None:
"""Tests creating an index with a real schema."""
# Precondition.
mappings = DocumentSchema.get_document_schema(
@@ -176,7 +176,7 @@ class TestOpenSearchClient:
# Verify index exists.
assert test_client.validate_index(expected_mappings=mappings) is True
def test_delete_existing_index(self, test_client: OpenSearchIndexClient) -> None:
def test_delete_existing_index(self, test_client: OpenSearchClient) -> None:
"""Tests deleting an existing index returns True."""
# Precondition.
mappings = DocumentSchema.get_document_schema(
@@ -193,7 +193,7 @@ class TestOpenSearchClient:
assert result is True
assert test_client.validate_index(expected_mappings=mappings) is False
def test_delete_nonexistent_index(self, test_client: OpenSearchIndexClient) -> None:
def test_delete_nonexistent_index(self, test_client: OpenSearchClient) -> None:
"""Tests deleting a nonexistent index returns False."""
# Under test.
# Don't create index, just try to delete.
@@ -202,7 +202,7 @@ class TestOpenSearchClient:
# Postcondition.
assert result is False
def test_index_exists(self, test_client: OpenSearchIndexClient) -> None:
def test_index_exists(self, test_client: OpenSearchClient) -> None:
"""Tests checking if an index exists."""
# Precondition.
# Index should not exist before creation.
@@ -219,7 +219,7 @@ class TestOpenSearchClient:
# Index should exist after creation.
assert test_client.index_exists() is True
def test_validate_index(self, test_client: OpenSearchIndexClient) -> None:
def test_validate_index(self, test_client: OpenSearchClient) -> None:
"""Tests validating an index."""
# Precondition.
mappings = DocumentSchema.get_document_schema(
@@ -239,120 +239,7 @@ class TestOpenSearchClient:
# Should return True after creation.
assert test_client.validate_index(expected_mappings=mappings) is True
def test_put_mapping_idempotent(self, test_client: OpenSearchIndexClient) -> None:
"""Tests put_mapping with same schema is idempotent."""
# Precondition.
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=mappings, settings=settings)
# Under test.
# Applying the same mappings again should succeed.
test_client.put_mapping(mappings)
# Postcondition.
# Index should still be valid.
assert test_client.validate_index(expected_mappings=mappings)
def test_put_mapping_adds_new_field(
self, test_client: OpenSearchIndexClient
) -> None:
"""Tests put_mapping successfully adds new fields to existing index."""
# Precondition.
# Create index with minimal schema (just required fields).
initial_mappings = {
"dynamic": "strict",
"properties": {
"document_id": {"type": "keyword"},
"chunk_index": {"type": "integer"},
"content": {"type": "text"},
"content_vector": {
"type": "knn_vector",
"dimension": 128,
"method": {
"name": "hnsw",
"space_type": "cosinesimil",
"engine": "lucene",
"parameters": {"ef_construction": 512, "m": 16},
},
},
},
}
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test.
# Add a new field using put_mapping.
updated_mappings = {
"properties": {
"document_id": {"type": "keyword"},
"chunk_index": {"type": "integer"},
"content": {"type": "text"},
"content_vector": {
"type": "knn_vector",
"dimension": 128,
"method": {
"name": "hnsw",
"space_type": "cosinesimil",
"engine": "lucene",
"parameters": {"ef_construction": 512, "m": 16},
},
},
# New field
"new_test_field": {"type": "keyword"},
},
}
# Should not raise.
test_client.put_mapping(updated_mappings)
# Postcondition.
# Validate the new schema includes the new field.
assert test_client.validate_index(expected_mappings=updated_mappings)
def test_put_mapping_fails_on_type_change(
self, test_client: OpenSearchIndexClient
) -> None:
"""Tests put_mapping fails when trying to change existing field type."""
# Precondition.
initial_mappings = {
"dynamic": "strict",
"properties": {
"document_id": {"type": "keyword"},
"test_field": {"type": "keyword"},
},
}
settings = DocumentSchema.get_index_settings()
test_client.create_index(mappings=initial_mappings, settings=settings)
# Under test and postcondition.
# Try to change test_field type from keyword to text.
conflicting_mappings = {
"properties": {
"document_id": {"type": "keyword"},
"test_field": {"type": "text"}, # Changed from keyword to text
},
}
# Should raise because field type cannot be changed.
with pytest.raises(Exception, match="mapper|illegal_argument_exception"):
test_client.put_mapping(conflicting_mappings)
def test_put_mapping_on_nonexistent_index(
self, test_client: OpenSearchIndexClient
) -> None:
"""Tests put_mapping on non-existent index raises an error."""
# Precondition.
# Index does not exist yet.
mappings = DocumentSchema.get_document_schema(
vector_dimension=128, multitenant=True
)
# Under test and postcondition.
with pytest.raises(Exception, match="index_not_found_exception|404"):
test_client.put_mapping(mappings)
def test_create_duplicate_index(self, test_client: OpenSearchIndexClient) -> None:
def test_create_duplicate_index(self, test_client: OpenSearchClient) -> None:
"""Tests creating an index twice raises an error."""
# Precondition.
mappings = DocumentSchema.get_document_schema(
@@ -367,14 +254,14 @@ class TestOpenSearchClient:
with pytest.raises(Exception, match="already exists"):
test_client.create_index(mappings=mappings, settings=settings)
def test_update_settings(self, test_client: OpenSearchIndexClient) -> None:
def test_update_settings(self, test_client: OpenSearchClient) -> None:
"""Tests that update_settings raises NotImplementedError."""
# Under test and postcondition.
with pytest.raises(NotImplementedError):
test_client.update_settings(settings={})
def test_create_and_delete_search_pipeline(
self, test_client: OpenSearchIndexClient
self, test_client: OpenSearchClient
) -> None:
"""Tests creating and deleting a search pipeline."""
# Under test and postcondition.
@@ -391,7 +278,7 @@ class TestOpenSearchClient:
)
def test_index_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests indexing a document."""
# Precondition.
@@ -419,7 +306,7 @@ class TestOpenSearchClient:
)
def test_bulk_index_documents(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests bulk indexing documents."""
# Precondition.
@@ -450,7 +337,7 @@ class TestOpenSearchClient:
)
def test_index_duplicate_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests indexing a duplicate document raises an error."""
# Precondition.
@@ -478,7 +365,7 @@ class TestOpenSearchClient:
test_client.index_document(document=doc, tenant_state=tenant_state)
def test_get_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests getting a document."""
# Precondition.
@@ -514,7 +401,7 @@ class TestOpenSearchClient:
assert retrieved_doc == original_doc
def test_get_nonexistent_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests getting a nonexistent document raises an error."""
# Precondition.
@@ -532,7 +419,7 @@ class TestOpenSearchClient:
)
def test_delete_existing_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests deleting an existing document returns True."""
# Precondition.
@@ -568,7 +455,7 @@ class TestOpenSearchClient:
test_client.get_document(document_chunk_id=doc_chunk_id)
def test_delete_nonexistent_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests deleting a nonexistent document returns False."""
# Precondition.
@@ -589,7 +476,7 @@ class TestOpenSearchClient:
assert result is False
def test_delete_by_query(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests deleting documents by query."""
# Precondition.
@@ -665,7 +552,7 @@ class TestOpenSearchClient:
assert len(keep_ids) == 1
def test_update_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests updating a document's properties."""
# Precondition.
@@ -714,7 +601,7 @@ class TestOpenSearchClient:
assert updated_doc.public == doc.public
def test_update_nonexistent_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests updating a nonexistent document raises an error."""
# Precondition.
@@ -736,7 +623,7 @@ class TestOpenSearchClient:
def test_hybrid_search_with_pipeline(
self,
test_client: OpenSearchIndexClient,
test_client: OpenSearchClient,
search_pipeline: None, # noqa: ARG002
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -817,7 +704,7 @@ class TestOpenSearchClient:
def test_search_empty_index(
self,
test_client: OpenSearchIndexClient,
test_client: OpenSearchClient,
search_pipeline: None, # noqa: ARG002
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -856,7 +743,7 @@ class TestOpenSearchClient:
def test_hybrid_search_with_pipeline_and_filters(
self,
test_client: OpenSearchIndexClient,
test_client: OpenSearchClient,
search_pipeline: None, # noqa: ARG002
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -976,7 +863,7 @@ class TestOpenSearchClient:
def test_hybrid_search_with_pipeline_and_filters_returns_chunks_with_related_content_first(
self,
test_client: OpenSearchIndexClient,
test_client: OpenSearchClient,
search_pipeline: None, # noqa: ARG002
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -1106,7 +993,7 @@ class TestOpenSearchClient:
previous_score = current_score
def test_delete_by_query_multitenant_isolation(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""
Tests delete_by_query respects tenant boundaries in multi-tenant mode.
@@ -1200,7 +1087,7 @@ class TestOpenSearchClient:
assert set(remaining_y_ids) == expected_y_ids
def test_delete_by_query_nonexistent_document(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""
Tests delete_by_query for non-existent document returns 0 deleted.
@@ -1229,7 +1116,7 @@ class TestOpenSearchClient:
assert num_deleted == 0
def test_search_for_document_ids(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests search_for_document_ids method returns correct chunk IDs."""
# Precondition.
@@ -1294,7 +1181,7 @@ class TestOpenSearchClient:
assert set(chunk_ids) == expected_ids
def test_search_with_no_document_access_can_retrieve_all_documents(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""
Tests search with no document access can retrieve all documents, even
@@ -1372,7 +1259,7 @@ class TestOpenSearchClient:
def test_time_cutoff_filter(
self,
test_client: OpenSearchIndexClient,
test_client: OpenSearchClient,
search_pipeline: None, # noqa: ARG002
monkeypatch: pytest.MonkeyPatch,
) -> None:
@@ -1465,7 +1352,7 @@ class TestOpenSearchClient:
)
def test_random_search(
self, test_client: OpenSearchIndexClient, monkeypatch: pytest.MonkeyPatch
self, test_client: OpenSearchClient, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Tests the random search query works."""
# Precondition.

View File

@@ -37,7 +37,6 @@ from onyx.db.opensearch_migration import build_sanitized_to_original_doc_id_mapp
from onyx.db.search_settings import get_active_search_settings
from onyx.document_index.interfaces_new import TenantState
from onyx.document_index.opensearch.client import OpenSearchClient
from onyx.document_index.opensearch.client import OpenSearchIndexClient
from onyx.document_index.opensearch.client import wait_for_opensearch_with_timeout
from onyx.document_index.opensearch.constants import DEFAULT_MAX_CHUNK_SIZE
from onyx.document_index.opensearch.schema import DocumentChunk
@@ -75,7 +74,7 @@ CHUNK_COUNT = 5
def _get_document_chunks_from_opensearch(
opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
opensearch_client: OpenSearchClient, document_id: str, current_tenant_id: str
) -> list[DocumentChunk]:
opensearch_client.refresh_index()
filters = IndexFilters(access_control_list=None, tenant_id=current_tenant_id)
@@ -96,7 +95,7 @@ def _get_document_chunks_from_opensearch(
def _delete_document_chunks_from_opensearch(
opensearch_client: OpenSearchIndexClient, document_id: str, current_tenant_id: str
opensearch_client: OpenSearchClient, document_id: str, current_tenant_id: str
) -> None:
opensearch_client.refresh_index()
query_body = DocumentQuery.delete_from_document_id_query(
@@ -284,10 +283,10 @@ def vespa_document_index(
def opensearch_client(
db_session: Session,
full_deployment_setup: None, # noqa: ARG001
) -> Generator[OpenSearchIndexClient, None, None]:
) -> Generator[OpenSearchClient, None, None]:
"""Creates an OpenSearch client for the test tenant."""
active = get_active_search_settings(db_session)
yield OpenSearchIndexClient(index_name=active.primary.index_name) # Test runs here.
yield OpenSearchClient(index_name=active.primary.index_name) # Test runs here.
@pytest.fixture(scope="module")
@@ -331,7 +330,7 @@ def patch_get_vespa_chunks_page_size() -> Generator[int, None, None]:
def test_documents(
db_session: Session,
vespa_document_index: VespaDocumentIndex,
opensearch_client: OpenSearchIndexClient,
opensearch_client: OpenSearchClient,
patch_get_vespa_chunks_page_size: int,
) -> Generator[list[Document], None, None]:
"""
@@ -412,7 +411,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
db_session: Session,
test_documents: list[Document],
vespa_document_index: VespaDocumentIndex,
opensearch_client: OpenSearchIndexClient,
opensearch_client: OpenSearchClient,
test_embedding_dimension: int,
clean_migration_tables: None, # noqa: ARG002
enable_opensearch_indexing_for_onyx: None, # noqa: ARG002
@@ -481,7 +480,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
db_session: Session,
test_documents: list[Document],
vespa_document_index: VespaDocumentIndex,
opensearch_client: OpenSearchIndexClient,
opensearch_client: OpenSearchClient,
test_embedding_dimension: int,
clean_migration_tables: None, # noqa: ARG002
enable_opensearch_indexing_for_onyx: None, # noqa: ARG002
@@ -619,7 +618,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
db_session: Session,
test_documents: list[Document],
vespa_document_index: VespaDocumentIndex,
opensearch_client: OpenSearchIndexClient,
opensearch_client: OpenSearchClient,
test_embedding_dimension: int,
clean_migration_tables: None, # noqa: ARG002
enable_opensearch_indexing_for_onyx: None, # noqa: ARG002
@@ -713,7 +712,7 @@ class TestMigrateChunksFromVespaToOpenSearchTask:
db_session: Session,
test_documents: list[Document],
vespa_document_index: VespaDocumentIndex,
opensearch_client: OpenSearchIndexClient,
opensearch_client: OpenSearchClient,
test_embedding_dimension: int,
clean_migration_tables: None, # noqa: ARG002
enable_opensearch_indexing_for_onyx: None, # noqa: ARG002

View File

@@ -20,7 +20,6 @@ from onyx.auth.oauth_token_manager import OAuthTokenManager
from onyx.db.models import OAuthConfig
from onyx.db.oauth_config import create_oauth_config
from onyx.db.oauth_config import upsert_user_oauth_token
from onyx.utils.sensitive import SensitiveValue
from tests.external_dependency_unit.conftest import create_test_user
@@ -492,19 +491,3 @@ class TestOAuthTokenManagerURLBuilding:
# Should use & instead of ? since URL already has query params
assert "foo=bar&" in url or "?foo=bar" in url
assert "client_id=custom_client_id" in url
class TestUnwrapSensitiveStr:
"""Tests for _unwrap_sensitive_str static method"""
def test_unwrap_sensitive_str(self) -> None:
"""Test that both SensitiveValue and plain str inputs are handled"""
# SensitiveValue input
sensitive = SensitiveValue[str](
encrypted_bytes=b"test_client_id",
decrypt_fn=lambda b: b.decode(),
)
assert OAuthTokenManager._unwrap_sensitive_str(sensitive) == "test_client_id"
# Plain str input
assert OAuthTokenManager._unwrap_sensitive_str("plain_string") == "plain_string"

View File

@@ -76,12 +76,9 @@ class ChatSessionManager:
user_performing_action: DATestUser,
persona_id: int = 0,
description: str = "Test chat session",
project_id: int | None = None,
) -> DATestChatSession:
chat_session_creation_req = ChatSessionCreationRequest(
persona_id=persona_id,
description=description,
project_id=project_id,
persona_id=persona_id, description=description
)
response = requests.post(
f"{API_SERVER_URL}/chat/create-chat-session",

View File

@@ -1,79 +0,0 @@
import requests
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.test_models import DATestScimToken
from tests.integration.common_utils.test_models import DATestUser
class ScimTokenManager:
@staticmethod
def create(
name: str,
user_performing_action: DATestUser,
) -> DATestScimToken:
response = requests.post(
f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
json={"name": name},
headers=user_performing_action.headers,
timeout=60,
)
response.raise_for_status()
data = response.json()
return DATestScimToken(
id=data["id"],
name=data["name"],
token_display=data["token_display"],
is_active=data["is_active"],
created_at=data["created_at"],
last_used_at=data.get("last_used_at"),
raw_token=data["raw_token"],
)
@staticmethod
def get_active(
user_performing_action: DATestUser,
) -> DATestScimToken | None:
response = requests.get(
f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
headers=user_performing_action.headers,
timeout=60,
)
if response.status_code == 404:
return None
response.raise_for_status()
data = response.json()
return DATestScimToken(
id=data["id"],
name=data["name"],
token_display=data["token_display"],
is_active=data["is_active"],
created_at=data["created_at"],
last_used_at=data.get("last_used_at"),
)
@staticmethod
def get_scim_headers(raw_token: str) -> dict[str, str]:
return {
**GENERAL_HEADERS,
"Authorization": f"Bearer {raw_token}",
}
@staticmethod
def scim_get(
path: str,
raw_token: str,
) -> requests.Response:
return requests.get(
f"{API_SERVER_URL}/scim/v2{path}",
headers=ScimTokenManager.get_scim_headers(raw_token),
timeout=60,
)
@staticmethod
def scim_get_no_auth(path: str) -> requests.Response:
return requests.get(
f"{API_SERVER_URL}/scim/v2{path}",
headers=GENERAL_HEADERS,
timeout=60,
)

View File

@@ -42,18 +42,6 @@ class DATestPAT(BaseModel):
last_used_at: str | None = None
class DATestScimToken(BaseModel):
"""SCIM bearer token model for testing."""
id: int
name: str
raw_token: str | None = None # Only present on initial creation
token_display: str
is_active: bool
created_at: str
last_used_at: str | None = None
class DATestAPIKey(BaseModel):
api_key_id: int
api_key_display: str

View File

@@ -23,8 +23,6 @@ _ENV_PROVIDER = "NIGHTLY_LLM_PROVIDER"
_ENV_MODELS = "NIGHTLY_LLM_MODELS"
_ENV_API_KEY = "NIGHTLY_LLM_API_KEY"
_ENV_API_BASE = "NIGHTLY_LLM_API_BASE"
_ENV_API_VERSION = "NIGHTLY_LLM_API_VERSION"
_ENV_DEPLOYMENT_NAME = "NIGHTLY_LLM_DEPLOYMENT_NAME"
_ENV_CUSTOM_CONFIG_JSON = "NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
_ENV_STRICT = "NIGHTLY_LLM_STRICT"
@@ -36,8 +34,6 @@ class NightlyProviderConfig(BaseModel):
model_names: list[str]
api_key: str | None
api_base: str | None
api_version: str | None
deployment_name: str | None
custom_config: dict[str, str] | None
strict: bool
@@ -49,29 +45,17 @@ def _env_true(env_var: str, default: bool = False) -> bool:
return value.strip().lower() in {"1", "true", "yes", "on"}
def _parse_models_env(env_var: str) -> list[str]:
raw_value = os.environ.get(env_var, "").strip()
if not raw_value:
return []
try:
parsed_json = json.loads(raw_value)
except json.JSONDecodeError:
parsed_json = None
if isinstance(parsed_json, list):
return [str(model).strip() for model in parsed_json if str(model).strip()]
return [part.strip() for part in raw_value.split(",") if part.strip()]
def _split_csv_env(env_var: str) -> list[str]:
return [
part.strip() for part in os.environ.get(env_var, "").split(",") if part.strip()
]
def _load_provider_config() -> NightlyProviderConfig:
provider = os.environ.get(_ENV_PROVIDER, "").strip().lower()
model_names = _parse_models_env(_ENV_MODELS)
model_names = _split_csv_env(_ENV_MODELS)
api_key = os.environ.get(_ENV_API_KEY) or None
api_base = os.environ.get(_ENV_API_BASE) or None
api_version = os.environ.get(_ENV_API_VERSION) or None
deployment_name = os.environ.get(_ENV_DEPLOYMENT_NAME) or None
strict = _env_true(_ENV_STRICT, default=False)
custom_config: dict[str, str] | None = None
@@ -90,8 +74,6 @@ def _load_provider_config() -> NightlyProviderConfig:
model_names=model_names,
api_key=api_key,
api_base=api_base,
api_version=api_version,
deployment_name=deployment_name,
custom_config=custom_config,
strict=strict,
)
@@ -113,15 +95,10 @@ def _validate_provider_config(config: NightlyProviderConfig) -> None:
message=f"{_ENV_MODELS} must include at least one model",
)
if config.provider != "ollama_chat" and not (
config.api_key or config.custom_config
):
if config.provider != "ollama_chat" and not config.api_key:
_skip_or_fail(
strict=config.strict,
message=(
f"{_ENV_API_KEY} or {_ENV_CUSTOM_CONFIG_JSON} is required for "
f"provider '{config.provider}'"
),
message=(f"{_ENV_API_KEY} is required for provider '{config.provider}'"),
)
if config.provider == "ollama_chat" and not (
@@ -132,22 +109,6 @@ def _validate_provider_config(config: NightlyProviderConfig) -> None:
message=(f"{_ENV_API_BASE} is required for provider '{config.provider}'"),
)
if config.provider == "azure":
if not config.api_base:
_skip_or_fail(
strict=config.strict,
message=(
f"{_ENV_API_BASE} is required for provider '{config.provider}'"
),
)
if not config.api_version:
_skip_or_fail(
strict=config.strict,
message=(
f"{_ENV_API_VERSION} is required for provider '{config.provider}'"
),
)
def _assert_integration_mode_enabled() -> None:
assert (
@@ -186,8 +147,6 @@ def _create_provider_payload(
model_name: str,
api_key: str | None,
api_base: str | None,
api_version: str | None,
deployment_name: str | None,
custom_config: dict[str, str] | None,
) -> dict:
return {
@@ -195,8 +154,6 @@ def _create_provider_payload(
"provider": provider,
"api_key": api_key,
"api_base": api_base,
"api_version": api_version,
"deployment_name": deployment_name,
"custom_config": custom_config,
"default_model_name": model_name,
"is_public": True,
@@ -298,8 +255,6 @@ def _create_and_test_provider_for_model(
model_name=model_name,
api_key=config.api_key,
api_base=resolved_api_base,
api_version=config.api_version,
deployment_name=config.deployment_name,
custom_config=config.custom_config,
)
@@ -358,21 +313,10 @@ def test_nightly_provider_chat_workflow(admin_user: DATestUser) -> None:
_seed_connector_for_search_tool(admin_user)
search_tool_id = _get_internal_search_tool_id(admin_user)
failures: list[str] = []
for model_name in config.model_names:
try:
_create_and_test_provider_for_model(
admin_user=admin_user,
config=config,
model_name=model_name,
search_tool_id=search_tool_id,
)
except BaseException as exc:
if isinstance(exc, (KeyboardInterrupt, SystemExit)):
raise
failures.append(
f"provider={config.provider} model={model_name} error={type(exc).__name__}: {exc}"
)
if failures:
pytest.fail("Nightly provider chat failures:\n" + "\n".join(failures))
_create_and_test_provider_for_model(
admin_user=admin_user,
config=config,
model_name=model_name,
search_tool_id=search_tool_id,
)

View File

@@ -72,9 +72,6 @@ def test_cold_startup_default_assistant() -> None:
assert (
"read_file" in tool_names
), "Default assistant should have FileReaderTool attached"
assert (
"python" in tool_names
), "Default assistant should have PythonTool attached"
# Also verify by display names for clarity
assert (
@@ -89,11 +86,8 @@ def test_cold_startup_default_assistant() -> None:
assert (
"File Reader" in tool_display_names
), "Default assistant should have File Reader tool"
assert (
"Code Interpreter" in tool_display_names
), "Default assistant should have Code Interpreter tool"
# Should have exactly 6 tools
# Should have exactly 5 tools
assert (
len(tool_associations) == 6
), f"Default assistant should have exactly 6 tools attached, got {len(tool_associations)}"
len(tool_associations) == 5
), f"Default assistant should have exactly 5 tools attached, got {len(tool_associations)}"

View File

@@ -1,318 +0,0 @@
"""
Integration tests for the unified persona file context flow.
End-to-end tests that verify:
1. Files can be uploaded and attached to a persona via API.
2. The persona correctly reports its attached files.
3. A chat session with a file-bearing persona processes without error.
4. Precedence: custom persona files take priority over project files when
the chat session is inside a project.
These tests run against a real Onyx deployment (all services running).
File processing is asynchronous, so we poll the file status endpoint
until files reach COMPLETED before chatting.
"""
import time
import requests
from onyx.db.enums import UserFileStatus
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import MAX_DELAY
from tests.integration.common_utils.managers.chat import ChatSessionManager
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.managers.project import ProjectManager
from tests.integration.common_utils.test_file_utils import create_test_text_file
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
FILE_PROCESSING_POLL_INTERVAL = 2
def _poll_file_statuses(
user_file_ids: list[str],
user: DATestUser,
target_status: UserFileStatus = UserFileStatus.COMPLETED,
timeout: int = MAX_DELAY,
) -> None:
"""Block until all files reach the target status or timeout expires."""
deadline = time.time() + timeout
while time.time() < deadline:
response = requests.post(
f"{API_SERVER_URL}/user/projects/file/statuses",
json={"file_ids": user_file_ids},
headers=user.headers,
)
response.raise_for_status()
statuses = response.json()
if all(f["status"] == target_status.value for f in statuses):
return
time.sleep(FILE_PROCESSING_POLL_INTERVAL)
raise TimeoutError(
f"Files {user_file_ids} did not reach {target_status.value} "
f"within {timeout}s"
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_persona_with_files_chat_no_error(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""Upload files, attach them to a persona, wait for processing,
then send a chat message. Verify no error is returned."""
# Upload files (creates UserFile records)
text_file = create_test_text_file(
"The secret project codename is NIGHTINGALE. "
"It was started in 2024 by the Advanced Research division."
)
file_descriptors, error = FileManager.upload_files(
files=[("nightingale_brief.txt", text_file)],
user_performing_action=admin_user,
)
assert not error, f"File upload failed: {error}"
assert len(file_descriptors) == 1
user_file_id = file_descriptors[0]["user_file_id"]
assert user_file_id is not None
# Wait for file processing
_poll_file_statuses([user_file_id], admin_user, timeout=120)
# Create persona with the file attached
persona = PersonaManager.create(
user_performing_action=admin_user,
name="Nightingale Agent",
description="Agent with secret file",
system_prompt="You are a helpful assistant with access to uploaded files.",
user_file_ids=[user_file_id],
)
# Verify persona has the file
persona_snapshots = PersonaManager.get_one(persona.id, admin_user)
assert len(persona_snapshots) == 1
assert user_file_id in persona_snapshots[0].user_file_ids
# Chat with the persona
chat_session = ChatSessionManager.create(
persona_id=persona.id,
description="Test persona file context",
user_performing_action=admin_user,
)
response = ChatSessionManager.send_message(
chat_session_id=chat_session.id,
message="What is the secret project codename?",
user_performing_action=admin_user,
)
assert response.error is None, f"Chat should succeed, got error: {response.error}"
assert len(response.full_message) > 0, "Response should not be empty"
def test_persona_without_files_still_works(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""A persona with no attached files should still chat normally."""
persona = PersonaManager.create(
user_performing_action=admin_user,
name="Blank Agent",
description="No files attached",
system_prompt="You are a helpful assistant.",
)
chat_session = ChatSessionManager.create(
persona_id=persona.id,
description="Test blank persona",
user_performing_action=admin_user,
)
response = ChatSessionManager.send_message(
chat_session_id=chat_session.id,
message="Hello, how are you?",
user_performing_action=admin_user,
)
assert response.error is None
assert len(response.full_message) > 0
def test_persona_files_override_project_files(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""When a custom persona (with its own files) is used inside a project,
the persona's files take precedence — the project's files are invisible.
We verify this by putting different content in project vs persona files
and checking which content the model responds with."""
# Upload persona file
persona_file = create_test_text_file("The persona's secret word is ALBATROSS.")
persona_fds, err1 = FileManager.upload_files(
files=[("persona_secret.txt", persona_file)],
user_performing_action=admin_user,
)
assert not err1
persona_user_file_id = persona_fds[0]["user_file_id"]
assert persona_user_file_id is not None
# Create a project and upload project files
project = ProjectManager.create(
name="Precedence Test Project",
user_performing_action=admin_user,
)
project_files = [
("project_secret.txt", b"The project's secret word is FLAMINGO."),
]
project_upload_result = ProjectManager.upload_files(
project_id=project.id,
files=project_files,
user_performing_action=admin_user,
)
assert len(project_upload_result.user_files) == 1
project_user_file_id = str(project_upload_result.user_files[0].id)
# Wait for both persona and project file processing
_poll_file_statuses([persona_user_file_id], admin_user, timeout=120)
_poll_file_statuses([project_user_file_id], admin_user, timeout=120)
# Create persona with persona file
persona = PersonaManager.create(
user_performing_action=admin_user,
name="Override Agent",
description="Persona with its own files",
system_prompt="You are a helpful assistant. Answer using the files.",
user_file_ids=[persona_user_file_id],
)
# Create chat session inside the project but using the custom persona
chat_session = ChatSessionManager.create(
persona_id=persona.id,
project_id=project.id,
user_performing_action=admin_user,
)
response = ChatSessionManager.send_message(
chat_session_id=chat_session.id,
message="What is the secret word?",
user_performing_action=admin_user,
)
assert response.error is None, f"Chat should succeed, got error: {response.error}"
# The persona's file should be what the model sees, not the project's
message_lower = response.full_message.lower()
assert "albatross" in message_lower, (
"Response should reference the persona file's secret word (ALBATROSS), "
f"but got: {response.full_message}"
)
def test_default_persona_in_project_uses_project_files(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""When the default persona (id=0) is used inside a project,
the project's files should be used for context."""
project = ProjectManager.create(
name="Default Persona Project",
user_performing_action=admin_user,
)
project_files = [
("project_info.txt", b"The project mascot is a PANGOLIN."),
]
upload_result = ProjectManager.upload_files(
project_id=project.id,
files=project_files,
user_performing_action=admin_user,
)
assert len(upload_result.user_files) == 1
# Wait for project file processing
project_file_id = str(upload_result.user_files[0].id)
_poll_file_statuses([project_file_id], admin_user, timeout=120)
# Create chat session inside project using default persona (id=0)
chat_session = ChatSessionManager.create(
persona_id=0,
project_id=project.id,
user_performing_action=admin_user,
)
response = ChatSessionManager.send_message(
chat_session_id=chat_session.id,
message="What is the project mascot?",
user_performing_action=admin_user,
)
assert response.error is None
assert "pangolin" in response.full_message.lower(), (
"Response should reference the project file content (PANGOLIN), "
f"but got: {response.full_message}"
)
def test_custom_persona_no_files_in_project_ignores_project(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""A custom persona with NO files, used inside a project with files,
should NOT see the project's files. The project is purely organizational.
We verify by asking about content only in the project file and checking
the model does NOT reference it."""
project = ProjectManager.create(
name="Ignored Project",
user_performing_action=admin_user,
)
project_upload_result = ProjectManager.upload_files(
project_id=project.id,
files=[("project_only.txt", b"The project secret is CAPYBARA.")],
user_performing_action=admin_user,
)
assert len(project_upload_result.user_files) == 1
project_user_file_id = str(project_upload_result.user_files[0].id)
# Wait for project file processing
_poll_file_statuses([project_user_file_id], admin_user, timeout=120)
# Custom persona with no files
persona = PersonaManager.create(
user_performing_action=admin_user,
name="No Files Agent",
description="No files, project is irrelevant",
system_prompt=(
"You are a helpful assistant. If you do not have information "
"to answer a question, say 'I do not have that information.'"
),
)
chat_session = ChatSessionManager.create(
persona_id=persona.id,
project_id=project.id,
user_performing_action=admin_user,
)
response = ChatSessionManager.send_message(
chat_session_id=chat_session.id,
message="What is the project secret?",
user_performing_action=admin_user,
)
assert response.error is None
assert len(response.full_message) > 0
assert "capybara" not in response.full_message.lower(), (
"Response should NOT reference the project file content (CAPYBARA) "
"because the custom persona has no files and should not inherit "
f"project files, but got: {response.full_message}"
)

View File

@@ -1,166 +0,0 @@
"""Integration tests for SCIM token management.
Covers the admin token API and SCIM bearer-token authentication:
1. Token lifecycle: create, retrieve metadata, use for SCIM requests
2. Token rotation: creating a new token revokes previous tokens
3. Revoked tokens are rejected by SCIM endpoints
4. Non-admin users cannot manage SCIM tokens
5. SCIM requests without a token are rejected
6. Service discovery endpoints work without authentication
7. last_used_at is updated after a SCIM request
"""
import time
import requests
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.scim_token import ScimTokenManager
from tests.integration.common_utils.managers.user import UserManager
from tests.integration.common_utils.test_models import DATestUser
def test_scim_token_lifecycle(admin_user: DATestUser) -> None:
"""Create token → retrieve metadata → use for SCIM request."""
token = ScimTokenManager.create(
name="Test SCIM Token",
user_performing_action=admin_user,
)
assert token.raw_token is not None
assert token.raw_token.startswith("onyx_scim_")
assert token.is_active is True
assert "****" in token.token_display
# GET returns the same metadata but raw_token is None because the
# server only reveals the raw token once at creation time (it stores
# only the SHA-256 hash).
active = ScimTokenManager.get_active(user_performing_action=admin_user)
assert active == token.model_copy(update={"raw_token": None})
# Token works for SCIM requests
response = ScimTokenManager.scim_get("/Users", token.raw_token)
assert response.status_code == 200
body = response.json()
assert "Resources" in body
assert body["totalResults"] >= 0
def test_scim_token_rotation_revokes_previous(admin_user: DATestUser) -> None:
"""Creating a new token automatically revokes the previous one."""
first = ScimTokenManager.create(
name="First Token",
user_performing_action=admin_user,
)
assert first.raw_token is not None
response = ScimTokenManager.scim_get("/Users", first.raw_token)
assert response.status_code == 200
# Create second token — should revoke first
second = ScimTokenManager.create(
name="Second Token",
user_performing_action=admin_user,
)
assert second.raw_token is not None
# Active token should now be the second one
active = ScimTokenManager.get_active(user_performing_action=admin_user)
assert active == second.model_copy(update={"raw_token": None})
# First token rejected, second works
assert ScimTokenManager.scim_get("/Users", first.raw_token).status_code == 401
assert ScimTokenManager.scim_get("/Users", second.raw_token).status_code == 200
def test_scim_request_without_token_rejected(
admin_user: DATestUser, # noqa: ARG001
) -> None:
"""SCIM endpoints reject requests with no Authorization header."""
assert ScimTokenManager.scim_get_no_auth("/Users").status_code == 401
def test_scim_request_with_bad_token_rejected(
admin_user: DATestUser, # noqa: ARG001
) -> None:
"""SCIM endpoints reject requests with an invalid token."""
assert (
ScimTokenManager.scim_get("/Users", "onyx_scim_bogus_token_value").status_code
== 401
)
def test_non_admin_cannot_create_token(
admin_user: DATestUser, # noqa: ARG001
) -> None:
"""Non-admin users get 403 when trying to create a SCIM token."""
basic_user = UserManager.create(name="scim_basic_user")
response = requests.post(
f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
json={"name": "Should Fail"},
headers=basic_user.headers,
timeout=60,
)
assert response.status_code == 403
def test_non_admin_cannot_get_token(
admin_user: DATestUser, # noqa: ARG001
) -> None:
"""Non-admin users get 403 when trying to retrieve SCIM token metadata."""
basic_user = UserManager.create(name="scim_basic_user2")
response = requests.get(
f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
headers=basic_user.headers,
timeout=60,
)
assert response.status_code == 403
def test_no_active_token_returns_404(new_admin_user: DATestUser) -> None:
"""GET active token returns 404 when no token exists."""
# new_admin_user depends on the reset fixture, ensuring a clean DB
# with no active SCIM tokens.
active = ScimTokenManager.get_active(user_performing_action=new_admin_user)
assert active is None
response = requests.get(
f"{API_SERVER_URL}/admin/enterprise-settings/scim/token",
headers=new_admin_user.headers,
timeout=60,
)
assert response.status_code == 404
def test_service_discovery_no_auth_required(
admin_user: DATestUser, # noqa: ARG001
) -> None:
"""Service discovery endpoints work without any authentication."""
for path in ["/ServiceProviderConfig", "/ResourceTypes", "/Schemas"]:
response = ScimTokenManager.scim_get_no_auth(path)
assert response.status_code == 200, f"{path} returned {response.status_code}"
def test_last_used_at_updated_after_scim_request(
admin_user: DATestUser,
) -> None:
"""last_used_at timestamp is updated after using the token."""
token = ScimTokenManager.create(
name="Last Used Token",
user_performing_action=admin_user,
)
assert token.raw_token is not None
active = ScimTokenManager.get_active(user_performing_action=admin_user)
assert active is not None
assert active.last_used_at is None
# Make a SCIM request, then verify last_used_at is set
assert ScimTokenManager.scim_get("/Users", token.raw_token).status_code == 200
time.sleep(0.5)
active_after = ScimTokenManager.get_active(user_performing_action=admin_user)
assert active_after is not None
assert active_after.last_used_at is not None

View File

@@ -3,8 +3,6 @@ from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from ee.onyx.external_permissions.sharepoint.permission_utils import (
_enumerate_ad_groups_paginated,
)
@@ -17,9 +15,6 @@ from ee.onyx.external_permissions.sharepoint.permission_utils import (
from ee.onyx.external_permissions.sharepoint.permission_utils import (
AD_GROUP_ENUMERATION_THRESHOLD,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
get_external_access_from_sharepoint,
)
from ee.onyx.external_permissions.sharepoint.permission_utils import (
get_sharepoint_external_groups,
)
@@ -271,65 +266,3 @@ def test_enumerate_all_without_token_skips(
assert results == []
mock_enum.assert_not_called()
# ---------------------------------------------------------------------------
# get_external_access_from_sharepoint site page URL handling
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"site_base_url, web_url, expected_relative_url",
[
(
"https://tenant.sharepoint.com/sites/Evan%27sSite",
"https://tenant.sharepoint.com/sites/Evan%27sSite/SitePages/Home.aspx",
"/sites/Evan%27sSite/SitePages/Home.aspx",
),
(
"https://tenant.sharepoint.com/sites/NormalSite",
"https://tenant.sharepoint.com/sites/NormalSite/SitePages/Page.aspx",
"/sites/NormalSite/SitePages/Page.aspx",
),
(
"https://tenant.sharepoint.com/sites/Site%20With%20Spaces",
"https://tenant.sharepoint.com/sites/Site%20With%20Spaces/SitePages/Doc.aspx",
"/sites/Site%20With%20Spaces/SitePages/Doc.aspx",
),
],
ids=["apostrophe-encoded", "no-special-chars", "space-encoded"],
)
@patch(f"{MODULE}._get_groups_and_members_recursively")
@patch(f"{MODULE}.sleep_and_retry")
def test_site_page_url_not_duplicated(
mock_sleep: MagicMock, # noqa: ARG001
mock_recursive: MagicMock,
site_base_url: str,
web_url: str,
expected_relative_url: str,
) -> None:
"""Regression: the server-relative URL passed to
get_file_by_server_relative_url must preserve percent-encoding so the
Office365 library's SPResPath.create_relative() recognises the site prefix
and doesn't duplicate it."""
mock_recursive.return_value = GroupsResult(
groups_to_emails={},
found_public_group=False,
)
ctx = MagicMock()
ctx.base_url = site_base_url
site_page = {"webUrl": web_url}
get_external_access_from_sharepoint(
client_context=ctx,
graph_client=MagicMock(),
drive_name=None,
drive_item=None,
site_page=site_page,
)
ctx.web.get_file_by_server_relative_url.assert_called_once_with(
expected_relative_url
)

View File

@@ -1,426 +0,0 @@
"""Tests for the unified context file extraction logic (Phase 5).
Covers:
- resolve_context_user_files: precedence rule (custom persona supersedes project)
- extract_context_files: all-or-nothing context window fit check
- Search filter / search_usage determination in the caller
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from uuid import UUID
from uuid import uuid4
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.process_message import determine_search_params
from onyx.chat.process_message import extract_context_files
from onyx.chat.process_message import resolve_context_user_files
from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.models import UserFile
from onyx.file_store.models import ChatFileType
from onyx.file_store.models import InMemoryChatFile
from onyx.tools.models import SearchToolUsage
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_user_file(
token_count: int = 100,
name: str = "file.txt",
file_id: str | None = None,
) -> UserFile:
file_uuid = UUID(file_id) if file_id else uuid4()
return UserFile(
id=file_uuid,
file_id=str(file_uuid),
name=name,
token_count=token_count,
)
def _make_persona(
persona_id: int,
user_files: list | None = None,
) -> MagicMock:
persona = MagicMock()
persona.id = persona_id
persona.user_files = user_files or []
return persona
def _make_in_memory_file(
file_id: str,
content: str = "hello world",
file_type: ChatFileType = ChatFileType.PLAIN_TEXT,
filename: str = "file.txt",
) -> InMemoryChatFile:
return InMemoryChatFile(
file_id=file_id,
content=content.encode("utf-8"),
file_type=file_type,
filename=filename,
)
# ===========================================================================
# resolve_context_user_files
# ===========================================================================
class TestResolveContextUserFiles:
"""Precedence rule: custom persona fully supersedes project."""
def test_custom_persona_with_files_returns_persona_files(self) -> None:
persona_files = [_make_user_file(), _make_user_file()]
persona = _make_persona(persona_id=42, user_files=persona_files)
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
)
assert result == persona_files
def test_custom_persona_without_files_returns_empty(self) -> None:
"""Custom persona with no files should NOT fall through to project."""
persona = _make_persona(persona_id=42, user_files=[])
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
)
assert result == []
def test_custom_persona_none_files_returns_empty(self) -> None:
"""Custom persona with user_files=None should NOT fall through."""
persona = _make_persona(persona_id=42, user_files=None)
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
)
assert result == []
@patch("onyx.chat.process_message.get_user_files_from_project")
def test_default_persona_in_project_returns_project_files(
self, mock_get_files: MagicMock
) -> None:
project_files = [_make_user_file(), _make_user_file()]
mock_get_files.return_value = project_files
persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)
user_id = uuid4()
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=99, user_id=user_id, db_session=db_session
)
assert result == project_files
mock_get_files.assert_called_once_with(
project_id=99, user_id=user_id, db_session=db_session
)
def test_default_persona_no_project_returns_empty(self) -> None:
persona = _make_persona(persona_id=DEFAULT_PERSONA_ID)
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=None, user_id=uuid4(), db_session=db_session
)
assert result == []
@patch("onyx.chat.process_message.get_user_files_from_project")
def test_custom_persona_without_files_ignores_project(
self, mock_get_files: MagicMock
) -> None:
"""Even with a project_id, custom persona means project is invisible."""
persona = _make_persona(persona_id=7, user_files=[])
db_session = MagicMock()
result = resolve_context_user_files(
persona=persona, project_id=99, user_id=uuid4(), db_session=db_session
)
assert result == []
mock_get_files.assert_not_called()
# ===========================================================================
# extract_context_files
# ===========================================================================
class TestExtractContextFiles:
"""All-or-nothing context window fit check."""
def test_empty_user_files_returns_empty(self) -> None:
db_session = MagicMock()
result = extract_context_files(
user_files=[],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=db_session,
)
assert result.file_texts == []
assert result.image_files == []
assert result.use_as_search_filter is False
assert result.uncapped_token_count is None
@patch("onyx.chat.process_message.load_in_memory_chat_files")
def test_files_fit_in_context_are_loaded(self, mock_load: MagicMock) -> None:
file_id = str(uuid4())
uf = _make_user_file(token_count=100, file_id=file_id)
mock_load.return_value = [
_make_in_memory_file(file_id=file_id, content="file content")
]
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.file_texts == ["file content"]
assert result.use_as_search_filter is False
assert result.total_token_count == 100
assert len(result.file_metadata) == 1
assert result.file_metadata[0].file_id == file_id
def test_files_overflow_context_not_loaded(self) -> None:
"""When aggregate tokens exceed 60% of available window, nothing is loaded."""
uf = _make_user_file(token_count=7000)
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.file_texts == []
assert result.image_files == []
assert result.use_as_search_filter is True
assert result.uncapped_token_count == 7000
assert result.total_token_count == 0
def test_overflow_boundary_exact(self) -> None:
"""Token count exactly at the 60% boundary should trigger overflow."""
# Available = (10000 - 0) * 0.6 = 6000. Tokens = 6000 → >= threshold.
uf = _make_user_file(token_count=6000)
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.use_as_search_filter is True
@patch("onyx.chat.process_message.load_in_memory_chat_files")
def test_just_under_boundary_loads(self, mock_load: MagicMock) -> None:
"""Token count just under the 60% boundary should load files."""
file_id = str(uuid4())
uf = _make_user_file(token_count=5999, file_id=file_id)
mock_load.return_value = [_make_in_memory_file(file_id=file_id, content="data")]
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.use_as_search_filter is False
assert result.file_texts == ["data"]
@patch("onyx.chat.process_message.load_in_memory_chat_files")
def test_multiple_files_aggregate_check(self, mock_load: MagicMock) -> None:
"""Multiple small files that individually fit but collectively overflow."""
files = [_make_user_file(token_count=2500) for _ in range(3)]
# 3 * 2500 = 7500 > 6000 threshold
result = extract_context_files(
user_files=files,
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.use_as_search_filter is True
assert result.file_texts == []
mock_load.assert_not_called()
@patch("onyx.chat.process_message.load_in_memory_chat_files")
def test_reserved_tokens_reduce_available_space(self, mock_load: MagicMock) -> None:
"""Reserved tokens shrink the available window."""
file_id = str(uuid4())
uf = _make_user_file(token_count=3000, file_id=file_id)
# Available = (10000 - 5000) * 0.6 = 3000. Tokens = 3000 → overflow.
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=5000,
db_session=MagicMock(),
)
assert result.use_as_search_filter is True
mock_load.assert_not_called()
@patch("onyx.chat.process_message.load_in_memory_chat_files")
def test_image_files_are_extracted(self, mock_load: MagicMock) -> None:
file_id = str(uuid4())
uf = _make_user_file(token_count=50, file_id=file_id)
mock_load.return_value = [
InMemoryChatFile(
file_id=file_id,
content=b"\x89PNG",
file_type=ChatFileType.IMAGE,
filename="photo.png",
)
]
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert len(result.image_files) == 1
assert result.image_files[0].file_id == file_id
assert result.file_texts == []
assert result.total_token_count == 50
@patch("onyx.chat.process_message.DISABLE_VECTOR_DB", True)
def test_overflow_with_vector_db_disabled_provides_tool_metadata(self) -> None:
"""When vector DB is disabled, overflow produces FileToolMetadata."""
uf = _make_user_file(token_count=7000, name="bigfile.txt")
result = extract_context_files(
user_files=[uf],
llm_max_context_window=10000,
reserved_token_count=0,
db_session=MagicMock(),
)
assert result.use_as_search_filter is False
assert len(result.file_metadata_for_tool) == 1
assert result.file_metadata_for_tool[0].filename == "bigfile.txt"
# ===========================================================================
# Search filter + search_usage determination
# ===========================================================================
class TestSearchFilterDetermination:
"""Verify that determine_search_params correctly resolves
search_project_id, search_persona_id, and search_usage based on
the extraction result and the precedence rule.
"""
@staticmethod
def _make_context(
use_as_search_filter: bool = False,
file_texts: list[str] | None = None,
uncapped_token_count: int | None = None,
) -> ExtractedContextFiles:
return ExtractedContextFiles(
file_texts=file_texts or [],
image_files=[],
use_as_search_filter=use_as_search_filter,
total_token_count=0,
file_metadata=[],
uncapped_token_count=uncapped_token_count,
)
def test_custom_persona_files_fit_no_filter(self) -> None:
"""Custom persona, files fit → no search filter, AUTO."""
result = determine_search_params(
persona_id=42,
project_id=99,
extracted_context_files=self._make_context(
file_texts=["content"],
uncapped_token_count=100,
),
)
assert result.search_project_id is None
assert result.search_persona_id is None
assert result.search_usage == SearchToolUsage.AUTO
def test_custom_persona_files_overflow_persona_filter(self) -> None:
"""Custom persona, files overflow → persona_id filter, AUTO."""
result = determine_search_params(
persona_id=42,
project_id=99,
extracted_context_files=self._make_context(use_as_search_filter=True),
)
assert result.search_persona_id == 42
assert result.search_project_id is None
assert result.search_usage == SearchToolUsage.AUTO
def test_custom_persona_no_files_no_project_leak(self) -> None:
"""Custom persona (no files) in project → nothing leaks from project."""
result = determine_search_params(
persona_id=42,
project_id=99,
extracted_context_files=self._make_context(),
)
assert result.search_project_id is None
assert result.search_persona_id is None
assert result.search_usage == SearchToolUsage.AUTO
def test_default_persona_project_files_fit_disables_search(self) -> None:
"""Default persona, project files fit → DISABLED."""
result = determine_search_params(
persona_id=DEFAULT_PERSONA_ID,
project_id=99,
extracted_context_files=self._make_context(
file_texts=["content"],
uncapped_token_count=100,
),
)
assert result.search_project_id is None
assert result.search_usage == SearchToolUsage.DISABLED
def test_default_persona_project_files_overflow_enables_search(self) -> None:
"""Default persona, project files overflow → ENABLED + project_id filter."""
result = determine_search_params(
persona_id=DEFAULT_PERSONA_ID,
project_id=99,
extracted_context_files=self._make_context(
use_as_search_filter=True,
uncapped_token_count=7000,
),
)
assert result.search_project_id == 99
assert result.search_persona_id is None
assert result.search_usage == SearchToolUsage.ENABLED
def test_default_persona_no_project_auto(self) -> None:
"""Default persona, no project → AUTO."""
result = determine_search_params(
persona_id=DEFAULT_PERSONA_ID,
project_id=None,
extracted_context_files=self._make_context(),
)
assert result.search_project_id is None
assert result.search_usage == SearchToolUsage.AUTO
def test_default_persona_project_no_files_disables_search(self) -> None:
"""Default persona in project with no files → DISABLED."""
result = determine_search_params(
persona_id=DEFAULT_PERSONA_ID,
project_id=99,
extracted_context_files=self._make_context(),
)
assert result.search_usage == SearchToolUsage.DISABLED

View File

@@ -7,10 +7,10 @@ from onyx.chat.llm_loop import _try_fallback_tool_extraction
from onyx.chat.llm_loop import construct_message_history
from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import ContextFileMetadata
from onyx.chat.models import ExtractedContextFiles
from onyx.chat.models import ExtractedProjectFiles
from onyx.chat.models import FileToolMetadata
from onyx.chat.models import LlmStepResult
from onyx.chat.models import ProjectFileMetadata
from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import MessageType
from onyx.file_store.models import ChatFileType
@@ -74,20 +74,20 @@ def create_tool_response(
)
def create_context_files(
def create_project_files(
num_files: int = 0, num_images: int = 0, tokens_per_file: int = 100
) -> ExtractedContextFiles:
"""Helper to create ExtractedContextFiles for testing."""
file_texts = [f"Project file {i} content" for i in range(num_files)]
file_metadata = [
ContextFileMetadata(
) -> ExtractedProjectFiles:
"""Helper to create ExtractedProjectFiles for testing."""
project_file_texts = [f"Project file {i} content" for i in range(num_files)]
project_file_metadata = [
ProjectFileMetadata(
file_id=f"file_{i}",
filename=f"file_{i}.txt",
file_content=f"Project file {i} content",
)
for i in range(num_files)
]
image_files = [
project_image_files = [
ChatLoadedFile(
file_id=f"image_{i}",
content=b"",
@@ -98,13 +98,13 @@ def create_context_files(
)
for i in range(num_images)
]
return ExtractedContextFiles(
file_texts=file_texts,
image_files=image_files,
use_as_search_filter=False,
return ExtractedProjectFiles(
project_file_texts=project_file_texts,
project_image_files=project_image_files,
project_as_filter=False,
total_token_count=num_files * tokens_per_file,
file_metadata=file_metadata,
uncapped_token_count=num_files * tokens_per_file,
project_file_metadata=project_file_metadata,
project_uncapped_token_count=num_files * tokens_per_file,
)
@@ -121,14 +121,14 @@ class TestConstructMessageHistory:
user_msg2 = create_message("How are you?", MessageType.USER, 5)
simple_chat_history = [user_msg1, assistant_msg1, user_msg2]
context_files = create_context_files()
project_files = create_project_files()
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -148,14 +148,14 @@ class TestConstructMessageHistory:
custom_agent = create_message("Custom instructions", MessageType.USER, 10)
simple_chat_history = [user_msg1, assistant_msg1, user_msg2]
context_files = create_context_files()
project_files = create_project_files()
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=custom_agent,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -167,25 +167,25 @@ class TestConstructMessageHistory:
assert result[3] == custom_agent # Before last user message
assert result[4] == user_msg2
def test_with_context_files(self) -> None:
def test_with_project_files(self) -> None:
"""Test that project files are inserted before the last user message."""
system_prompt = create_message("System", MessageType.SYSTEM, 10)
user_msg1 = create_message("First message", MessageType.USER, 5)
user_msg2 = create_message("Second message", MessageType.USER, 5)
simple_chat_history = [user_msg1, user_msg2]
context_files = create_context_files(num_files=2, tokens_per_file=50)
project_files = create_project_files(num_files=2, tokens_per_file=50)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
# Should have: system, user1, context_files_message, user2
# Should have: system, user1, project_files_message, user2
assert len(result) == 4
assert result[0] == system_prompt
assert result[1] == user_msg1
@@ -202,14 +202,14 @@ class TestConstructMessageHistory:
reminder = create_message("Remember to cite sources", MessageType.USER, 10)
simple_chat_history = [user_msg]
context_files = create_context_files()
project_files = create_project_files()
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=reminder,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -235,14 +235,14 @@ class TestConstructMessageHistory:
assistant_with_tool,
tool_response,
]
context_files = create_context_files()
project_files = create_project_files()
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -264,18 +264,18 @@ class TestConstructMessageHistory:
custom_agent = create_message("Custom", MessageType.USER, 10)
simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]
context_files = create_context_files(num_files=1, tokens_per_file=50)
project_files = create_project_files(num_files=1, tokens_per_file=50)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=custom_agent,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
# Should have: system, user1, custom_agent, context_files, user2, assistant_with_tool
# Should have: system, user1, custom_agent, project_files, user2, assistant_with_tool
assert len(result) == 6
assert result[0] == system_prompt
assert result[1] == user_msg1
@@ -292,14 +292,14 @@ class TestConstructMessageHistory:
user_msg2 = create_message("Second", MessageType.USER, 5)
simple_chat_history = [user_msg1, user_msg2]
context_files = create_context_files(num_files=0, num_images=2)
project_files = create_project_files(num_files=0, num_images=2)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -332,14 +332,14 @@ class TestConstructMessageHistory:
)
simple_chat_history = [user_msg]
context_files = create_context_files(num_files=0, num_images=1)
project_files = create_project_files(num_files=0, num_images=1)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -366,7 +366,7 @@ class TestConstructMessageHistory:
assistant_msg2,
user_msg3,
]
context_files = create_context_files()
project_files = create_project_files()
# Budget only allows last 3 messages + system (10 + 20 + 20 + 20 = 70 tokens)
result = construct_message_history(
@@ -374,7 +374,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=80,
)
@@ -395,7 +395,7 @@ class TestConstructMessageHistory:
tool_response = create_tool_response("tc_1", "tool_response", 20)
simple_chat_history = [user_msg1, user_msg2, assistant_with_tool, tool_response]
context_files = create_context_files()
project_files = create_project_files()
# Budget only allows last user message and messages after + system
# (10 + 20 + 20 + 20 = 70 tokens)
@@ -404,7 +404,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=80,
)
@@ -432,7 +432,7 @@ class TestConstructMessageHistory:
assistant_msg1,
user_msg2,
]
context_files = create_context_files()
project_files = create_project_files()
# Remaining history budget is 10 tokens (30 total - 10 system - 10 last user):
# keeps [tool_response, assistant_msg1] from history_before_last_user,
@@ -442,7 +442,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=30,
)
@@ -461,7 +461,7 @@ class TestConstructMessageHistory:
user_msg2 = create_message("Latest question", MessageType.USER, 10)
simple_chat_history = [user_msg1, assistant_with_tool, tool_response, user_msg2]
context_files = create_context_files()
project_files = create_project_files()
# Remaining history budget is 25 tokens (45 total - 10 system - 10 last user):
# keeps both assistant_with_tool and tool_response in history_before_last_user.
@@ -470,7 +470,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=45,
)
@@ -487,18 +487,18 @@ class TestConstructMessageHistory:
reminder = create_message("Reminder", MessageType.USER, 10)
simple_chat_history: list[ChatMessageSimple] = []
context_files = create_context_files(num_files=1, tokens_per_file=50)
project_files = create_project_files(num_files=1, tokens_per_file=50)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=custom_agent,
simple_chat_history=simple_chat_history,
reminder_message=reminder,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
# Should have: system, custom_agent, context_files, reminder
# Should have: system, custom_agent, project_files, reminder
assert len(result) == 4
assert result[0] == system_prompt
assert result[1] == custom_agent
@@ -512,7 +512,7 @@ class TestConstructMessageHistory:
assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 5)
simple_chat_history = [assistant_msg, assistant_with_tool]
context_files = create_context_files()
project_files = create_project_files()
with pytest.raises(ValueError, match="No user message found"):
construct_message_history(
@@ -520,7 +520,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -531,7 +531,7 @@ class TestConstructMessageHistory:
custom_agent = create_message("Custom", MessageType.USER, 50)
simple_chat_history = [user_msg]
context_files = create_context_files(num_files=1, tokens_per_file=100)
project_files = create_project_files(num_files=1, tokens_per_file=100)
# Total required: 50 (system) + 50 (custom) + 100 (project) + 50 (user) = 250
# But only 200 available
@@ -541,7 +541,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=custom_agent,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=200,
)
@@ -553,7 +553,7 @@ class TestConstructMessageHistory:
assistant_with_tool = create_assistant_with_tool_call("tc_1", "tool", 30)
simple_chat_history = [user_msg1, user_msg2, assistant_with_tool]
context_files = create_context_files()
project_files = create_project_files()
# Budget: 50 tokens
# Required: 10 (system) + 30 (user2) + 30 (assistant_with_tool) = 70 tokens
@@ -566,7 +566,7 @@ class TestConstructMessageHistory:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=50,
)
@@ -592,20 +592,20 @@ class TestConstructMessageHistory:
assistant_with_tool,
tool_response,
]
context_files = create_context_files(num_files=2, tokens_per_file=20)
project_files = create_project_files(num_files=2, tokens_per_file=20)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=custom_agent,
simple_chat_history=simple_chat_history,
reminder_message=reminder,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
# Expected order:
# system, user1, assistant1, user2, assistant2,
# custom_agent, context_files, user3, assistant_with_tool, tool_response, reminder
# custom_agent, project_files, user3, assistant_with_tool, tool_response, reminder
assert len(result) == 11
assert result[0] == system_prompt
assert result[1] == user_msg1
@@ -622,20 +622,20 @@ class TestConstructMessageHistory:
assert result[9] == tool_response # After last user
assert result[10] == reminder # At the very end
def test_context_files_json_format(self) -> None:
def test_project_files_json_format(self) -> None:
"""Test that project files are formatted correctly as JSON."""
system_prompt = create_message("System", MessageType.SYSTEM, 10)
user_msg = create_message("Hello", MessageType.USER, 5)
simple_chat_history = [user_msg]
context_files = create_context_files(num_files=2, tokens_per_file=50)
project_files = create_project_files(num_files=2, tokens_per_file=50)
result = construct_message_history(
system_prompt=system_prompt,
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=context_files,
project_files=project_files,
available_tokens=1000,
)
@@ -692,7 +692,7 @@ class TestForgottenFileMetadata:
custom_agent_prompt=None,
simple_chat_history=simple_chat_history,
reminder_message=None,
context_files=create_context_files(),
project_files=create_project_files(),
available_tokens=available_tokens,
token_counter=_simple_token_counter,
all_injected_file_metadata=all_injected_file_metadata,

View File

@@ -106,9 +106,6 @@ def test_checkout_event_stores_endpoint_and_increments_gauge() -> None:
patch(
"onyx.server.metrics.postgres_connection_pool.CURRENT_ENDPOINT_CONTEXTVAR"
) as mock_ctx,
patch(
"onyx.server.metrics.postgres_connection_pool.CURRENT_TENANT_ID_CONTEXTVAR"
) as mock_tenant_ctx,
patch(
"onyx.server.metrics.postgres_connection_pool._connections_held"
) as mock_gauge,
@@ -117,14 +114,12 @@ def test_checkout_event_stores_endpoint_and_increments_gauge() -> None:
mock_labels = MagicMock()
mock_gauge.labels.return_value = mock_labels
mock_ctx.get.return_value = "/api/chat/send-message"
mock_tenant_ctx.get.return_value = "tenant_xyz"
listeners["checkout"](None, conn_record, None)
assert conn_record.info["_metrics_endpoint"] == "/api/chat/send-message"
assert conn_record.info["_metrics_tenant_id"] == "tenant_xyz"
assert "_metrics_checkout_time" in conn_record.info
mock_gauge.labels.assert_called_with(
handler="/api/chat/send-message", engine="sync", tenant_id="tenant_xyz"
handler="/api/chat/send-message", engine="sync"
)
mock_labels.inc.assert_called_once()
@@ -149,7 +144,6 @@ def test_checkin_event_observes_hold_duration() -> None:
conn_record = _make_conn_record()
conn_record.info["_metrics_endpoint"] = "/api/search"
conn_record.info["_metrics_tenant_id"] = "tenant_abc"
conn_record.info["_metrics_checkout_time"] = time.monotonic() - 0.5
with (
@@ -168,9 +162,7 @@ def test_checkin_event_observes_hold_duration() -> None:
listeners["checkin"](None, conn_record)
mock_gauge.labels.assert_called_with(
handler="/api/search", engine="sync", tenant_id="tenant_abc"
)
mock_gauge.labels.assert_called_with(handler="/api/search", engine="sync")
mock_labels.dec.assert_called_once()
mock_hist.labels.assert_called_with(handler="/api/search", engine="sync")
mock_hist_labels.observe.assert_called_once()
@@ -180,12 +172,11 @@ def test_checkin_event_observes_hold_duration() -> None:
# conn_record.info should be cleaned up
assert "_metrics_endpoint" not in conn_record.info
assert "_metrics_tenant_id" not in conn_record.info
assert "_metrics_checkout_time" not in conn_record.info
def test_checkin_with_missing_endpoint_uses_unknown() -> None:
"""Verify checkin gracefully handles missing endpoint and tenant info."""
"""Verify checkin gracefully handles missing endpoint info."""
engine = MagicMock()
engine.pool = MagicMock()
listeners: dict[str, Any] = {}
@@ -216,9 +207,7 @@ def test_checkin_with_missing_endpoint_uses_unknown() -> None:
listeners["checkin"](None, conn_record)
mock_gauge.labels.assert_called_with(
handler="unknown", engine="sync", tenant_id="unknown"
)
mock_gauge.labels.assert_called_with(handler="unknown", engine="sync")
# --- setup_postgres_connection_pool_metrics tests ---

View File

@@ -10,7 +10,6 @@ from fastapi.testclient import TestClient
from prometheus_client import CollectorRegistry
from prometheus_client import Gauge
from onyx.server.metrics.per_tenant import per_tenant_request_callback
from onyx.server.metrics.prometheus_setup import setup_prometheus_metrics
from onyx.server.metrics.slow_requests import slow_request_callback
@@ -82,7 +81,7 @@ def test_setup_attaches_instrumentator_to_app() -> None:
inprogress_labels=True,
excluded_handlers=["/health", "/metrics", "/openapi.json"],
)
assert mock_instance.add.call_count == 3
mock_instance.add.assert_called_once()
mock_instance.instrument.assert_called_once_with(
app,
latency_lowr_buckets=(
@@ -101,56 +100,6 @@ def test_setup_attaches_instrumentator_to_app() -> None:
mock_instance.expose.assert_called_once_with(app)
def test_per_tenant_callback_increments_with_tenant_id() -> None:
"""Verify per-tenant callback reads tenant from contextvar and increments."""
with (
patch(
"onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR"
) as mock_ctx,
patch("onyx.server.metrics.per_tenant._requests_by_tenant") as mock_counter,
):
mock_labels = MagicMock()
mock_counter.labels.return_value = mock_labels
mock_ctx.get.return_value = "tenant_abc"
info = _make_info(
duration=0.1, method="POST", handler="/api/chat", status="200"
)
per_tenant_request_callback(info)
mock_counter.labels.assert_called_once_with(
tenant_id="tenant_abc",
method="POST",
handler="/api/chat",
status="200",
)
mock_labels.inc.assert_called_once()
def test_per_tenant_callback_falls_back_to_unknown() -> None:
"""Verify per-tenant callback uses 'unknown' when contextvar is None."""
with (
patch(
"onyx.server.metrics.per_tenant.CURRENT_TENANT_ID_CONTEXTVAR"
) as mock_ctx,
patch("onyx.server.metrics.per_tenant._requests_by_tenant") as mock_counter,
):
mock_labels = MagicMock()
mock_counter.labels.return_value = mock_labels
mock_ctx.get.return_value = None
info = _make_info(duration=0.1)
per_tenant_request_callback(info)
mock_counter.labels.assert_called_once_with(
tenant_id="unknown",
method="GET",
handler="/api/test",
status="200",
)
mock_labels.inc.assert_called_once()
def test_inprogress_gauge_increments_during_request() -> None:
"""Verify the in-progress gauge goes up while a request is in flight."""
registry = CollectorRegistry()

View File

@@ -163,16 +163,3 @@ Add clear comments:
- Any TODOs you add in the code must be accompanied by either the name/username
of the owner of that TODO, or an issue number for an issue referencing that
piece of work.
- Avoid module-level logic that runs on import, which leads to import-time side
effects. Essentially every piece of meaningful logic should exist within some
function that has to be explicitly invoked. Acceptable exceptions to this may
include loading environment variables or setting up loggers.
- If you find yourself needing something like this, you may want that logic to
exist in a file dedicated for manual execution (contains `if __name__ ==
"__main__":`) which should not be imported by anything else.
- Related to the above, do not conflate Python scripts you intend to run from
the command line (contains `if __name__ == "__main__":`) with modules you
intend to import from elsewhere. If for some unlikely reason they have to be
the same file, any logic specific to executing the file (including imports)
should be contained in the `if __name__ == "__main__":` block.
- Generally these executable files exist in `backend/scripts/`.

View File

@@ -534,10 +534,9 @@ services:
required: false
# Below is needed for the `docker-out-of-docker` execution mode
# For Linux rootless Docker, set DOCKER_SOCK_PATH=${XDG_RUNTIME_DIR}/docker.sock
user: root
volumes:
- ${DOCKER_SOCK_PATH:-/var/run/docker.sock}:/var/run/docker.sock
- /var/run/docker.sock:/var/run/docker.sock
# uncomment below + comment out the above to use the `docker-in-docker` execution mode
# privileged: true

View File

@@ -92,7 +92,7 @@ backend = [
"python-gitlab==5.6.0",
"python-pptx==0.6.23",
"pypandoc_binary==1.16.2",
"pypdf==6.7.3",
"pypdf==6.6.2",
"pytest-mock==3.12.0",
"pytest-playwright==0.7.0",
"python-docx==1.1.2",

View File

@@ -88,6 +88,8 @@ Example usage:
}
func runCherryPick(cmd *cobra.Command, args []string, opts *CherryPickOptions) {
git.DisablePostCheckoutHook()
defer git.EnablePostCheckoutHook()
git.CheckGitHubCLI()
commitSHAs := args
@@ -285,6 +287,8 @@ func finishCherryPick(state *git.CherryPickState, stashResult *git.StashResult)
// It finishes any in-progress git cherry-pick, then falls into the normal
// cherryPickToRelease path which handles skip-applied-commits, push, and PR creation.
func runCherryPickContinue() {
git.DisablePostCheckoutHook()
defer git.EnablePostCheckoutHook()
git.CheckGitHubCLI()
state, err := git.LoadCherryPickState()

View File

@@ -303,3 +303,41 @@ func CleanCherryPickState() {
log.Debugf("Cleaned up cherry-pick state file")
}
}
// DisablePostCheckoutHook temporarily renames the post-checkout hook so that
// git branch switches don't trigger uv-sync (which would rebuild the ods
// binary from the target branch's source). Call EnablePostCheckoutHook to
// restore it.
func DisablePostCheckoutHook() {
gitDir, err := GetGitDir()
if err != nil {
return
}
hook := filepath.Join(gitDir, "hooks", "post-checkout")
disabled := hook + ".ods-disabled"
if err := os.Rename(hook, disabled); err != nil {
if !os.IsNotExist(err) {
log.Warnf("Failed to disable post-checkout hook: %v", err)
}
} else {
log.Debugf("Disabled post-checkout hook")
}
}
// EnablePostCheckoutHook restores the post-checkout hook after a
// DisablePostCheckoutHook call.
func EnablePostCheckoutHook() {
gitDir, err := GetGitDir()
if err != nil {
return
}
hook := filepath.Join(gitDir, "hooks", "post-checkout")
disabled := hook + ".ods-disabled"
if err := os.Rename(disabled, hook); err != nil {
if !os.IsNotExist(err) {
log.Warnf("Failed to re-enable post-checkout hook: %v", err)
}
} else {
log.Debugf("Re-enabled post-checkout hook")
}
}

8
uv.lock generated
View File

@@ -4677,7 +4677,7 @@ requires-dist = [
{ name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
{ name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
{ name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.7.3" },
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.6.2" },
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
{ name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
@@ -5924,11 +5924,11 @@ wheels = [
[[package]]
name = "pypdf"
version = "6.7.3"
version = "6.6.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/53/9b/63e767042fc852384dc71e5ff6f990ee4e1b165b1526cf3f9c23a4eebb47/pypdf-6.7.3.tar.gz", hash = "sha256:eca55c78d0ec7baa06f9288e2be5c4e8242d5cbb62c7a4b94f2716f8e50076d2", size = 5303304, upload-time = "2026-02-24T17:23:11.42Z" }
sdist = { url = "https://files.pythonhosted.org/packages/b8/bb/a44bab1ac3c54dbcf653d7b8bcdee93dddb2d3bf025a3912cacb8149a2f2/pypdf-6.6.2.tar.gz", hash = "sha256:0a3ea3b3303982333404e22d8f75d7b3144f9cf4b2970b96856391a516f9f016", size = 5281850, upload-time = "2026-01-26T11:57:55.964Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b0/90/3308a9b8b46c1424181fdf3f4580d2b423c5471425799e7fc62f92d183f4/pypdf-6.7.3-py3-none-any.whl", hash = "sha256:cd25ac508f20b554a9fafd825186e3ba29591a69b78c156783c5d8a2d63a1c0a", size = 331263, upload-time = "2026-02-24T17:23:09.932Z" },
{ url = "https://files.pythonhosted.org/packages/7d/be/549aaf1dfa4ab4aed29b09703d2fb02c4366fc1f05e880948c296c5764b9/pypdf-6.6.2-py3-none-any.whl", hash = "sha256:44c0c9811cfb3b83b28f1c3d054531d5b8b81abaedee0d8cb403650d023832ba", size = 329132, upload-time = "2026-01-26T11:57:54.099Z" },
]
[[package]]

View File

@@ -1,233 +0,0 @@
import "@opal/core/hoverable/styles.css";
import React, { createContext, useContext, useState, useCallback } from "react";
import { cn } from "@opal/utils";
import type { WithoutStyles } from "@opal/types";
// ---------------------------------------------------------------------------
// Context-per-group registry
// ---------------------------------------------------------------------------
/**
* Lazily-created map of group names to React contexts.
*
* Each group gets its own `React.Context<boolean | null>` so that a
* `Hoverable.Item` only re-renders when its *own* group's hover state
* changes — not when any unrelated group changes.
*
* The default value is `null` (no provider found), which lets
* `Hoverable.Item` distinguish "no Root ancestor" from "Root says
* not hovered" and throw when `group` was explicitly specified.
*/
const contextMap = new Map<string, React.Context<boolean | null>>();
function getOrCreateContext(group: string): React.Context<boolean | null> {
let ctx = contextMap.get(group);
if (!ctx) {
ctx = createContext<boolean | null>(null);
ctx.displayName = `HoverableContext(${group})`;
contextMap.set(group, ctx);
}
return ctx;
}
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface HoverableRootProps
extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
children: React.ReactNode;
group: string;
}
type HoverableItemVariant = "opacity-on-hover";
interface HoverableItemProps
extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
children: React.ReactNode;
group?: string;
variant?: HoverableItemVariant;
}
// ---------------------------------------------------------------------------
// HoverableRoot
// ---------------------------------------------------------------------------
/**
* Hover-tracking container for a named group.
*
* Wraps children in a `<div>` that tracks mouse-enter / mouse-leave and
* provides the hover state via a per-group React context.
*
* Nesting works because each `Hoverable.Root` creates a **new** context
* provider that shadows the parent — so an inner `Hoverable.Item group="b"`
* reads from the inner provider, not the outer `group="a"` provider.
*
* @example
* ```tsx
* <Hoverable.Root group="card">
* <Card>
* <Hoverable.Item group="card" variant="opacity-on-hover">
* <TrashIcon />
* </Hoverable.Item>
* </Card>
* </Hoverable.Root>
* ```
*/
function HoverableRoot({
group,
children,
onMouseEnter: consumerMouseEnter,
onMouseLeave: consumerMouseLeave,
...props
}: HoverableRootProps) {
const [hovered, setHovered] = useState(false);
const onMouseEnter = useCallback(
(e: React.MouseEvent<HTMLDivElement>) => {
setHovered(true);
consumerMouseEnter?.(e);
},
[consumerMouseEnter]
);
const onMouseLeave = useCallback(
(e: React.MouseEvent<HTMLDivElement>) => {
setHovered(false);
consumerMouseLeave?.(e);
},
[consumerMouseLeave]
);
const GroupContext = getOrCreateContext(group);
return (
<GroupContext.Provider value={hovered}>
<div {...props} onMouseEnter={onMouseEnter} onMouseLeave={onMouseLeave}>
{children}
</div>
</GroupContext.Provider>
);
}
// ---------------------------------------------------------------------------
// HoverableItem
// ---------------------------------------------------------------------------
/**
* An element whose visibility is controlled by hover state.
*
* **Local mode** (`group` omitted): the item handles hover on its own
* element via CSS `:hover`. This is the core abstraction.
*
* **Group mode** (`group` provided): visibility is driven by a matching
* `Hoverable.Root` ancestor's hover state via React context. If no
* matching Root is found, an error is thrown.
*
* Uses data-attributes for variant styling (see `styles.css`).
*
* @example
* ```tsx
* // Local mode — hover on the item itself
* <Hoverable.Item variant="opacity-on-hover">
* <TrashIcon />
* </Hoverable.Item>
*
* // Group mode — hover on the Root reveals the item
* <Hoverable.Root group="card">
* <Hoverable.Item group="card" variant="opacity-on-hover">
* <TrashIcon />
* </Hoverable.Item>
* </Hoverable.Root>
* ```
*
* @throws If `group` is specified but no matching `Hoverable.Root` ancestor exists.
*/
function HoverableItem({
group,
variant = "opacity-on-hover",
children,
...props
}: HoverableItemProps) {
const contextValue = useContext(
group ? getOrCreateContext(group) : NOOP_CONTEXT
);
if (group && contextValue === null) {
throw new Error(
`Hoverable.Item group="${group}" has no matching Hoverable.Root ancestor. ` +
`Either wrap it in <Hoverable.Root group="${group}"> or remove the group prop for local hover.`
);
}
const isLocal = group === undefined;
return (
<div
{...props}
className={cn("hoverable-item")}
data-hoverable-variant={variant}
data-hoverable-active={
isLocal ? undefined : contextValue ? "true" : undefined
}
data-hoverable-local={isLocal ? "true" : undefined}
>
{children}
</div>
);
}
/** Stable context used when no group is specified (local mode). */
const NOOP_CONTEXT = createContext<boolean | null>(null);
// ---------------------------------------------------------------------------
// Compound export
// ---------------------------------------------------------------------------
/**
* Hoverable compound component for hover-to-reveal patterns.
*
* Provides two sub-components:
*
* - `Hoverable.Root` — A container that tracks hover state for a named group
* and provides it via React context.
*
* - `Hoverable.Item` — The core abstraction. On its own (no `group`), it
* applies local CSS `:hover` for the variant effect. When `group` is
* specified, it reads hover state from the nearest matching
* `Hoverable.Root` — and throws if no matching Root is found.
*
* Supports nesting: a child `Hoverable.Root` shadows the parent's context,
* so each group's items only respond to their own root's hover.
*
* @example
* ```tsx
* import { Hoverable } from "@opal/core";
*
* // Group mode — hovering the card reveals the trash icon
* <Hoverable.Root group="card">
* <Card>
* <span>Card content</span>
* <Hoverable.Item group="card" variant="opacity-on-hover">
* <TrashIcon />
* </Hoverable.Item>
* </Card>
* </Hoverable.Root>
*
* // Local mode — hovering the item itself reveals it
* <Hoverable.Item variant="opacity-on-hover">
* <TrashIcon />
* </Hoverable.Item>
* ```
*/
const Hoverable = {
Root: HoverableRoot,
Item: HoverableItem,
};
export {
Hoverable,
type HoverableRootProps,
type HoverableItemProps,
type HoverableItemVariant,
};

View File

@@ -1,18 +0,0 @@
/* Hoverable — item transitions */
.hoverable-item {
transition: opacity 200ms ease-in-out;
}
.hoverable-item[data-hoverable-variant="opacity-on-hover"] {
opacity: 0;
}
/* Group mode — Root controls visibility via React context */
.hoverable-item[data-hoverable-variant="opacity-on-hover"][data-hoverable-active="true"] {
opacity: 1;
}
/* Local mode — item handles its own :hover */
.hoverable-item[data-hoverable-variant="opacity-on-hover"][data-hoverable-local="true"]:hover {
opacity: 1;
}

View File

@@ -1,11 +1,3 @@
/* Hoverable */
export {
Hoverable,
type HoverableRootProps,
type HoverableItemProps,
type HoverableItemVariant,
} from "@opal/core/hoverable/components";
/* Interactive */
export {
Interactive,

View File

@@ -12,7 +12,7 @@ const SvgOrganization = ({ size, ...props }: IconProps) => (
>
<path
d="M7.5 14H13.5C14.0523 14 14.5 13.5523 14.5 13V6C14.5 5.44772 14.0523 5 13.5 5H7.5M7.5 14V11M7.5 14H4.5M7.5 5V3C7.5 2.44772 7.05228 2 6.5 2H4.5M7.5 5H1.5M7.5 5V8M1.5 5V3C1.5 2.44772 1.94772 2 2.5 2H4.5M1.5 5V8M7.5 8V11M7.5 8H4.5M1.5 8V11M1.5 8H4.5M7.5 11H4.5M1.5 11V13C1.5 13.5523 1.94772 14 2.5 14H4.5M1.5 11H4.5M4.5 2V8M4.5 14V11M4.5 11V8M10 8H12M10 11H12"
strokeWidth={1.5}
strokeWidth={1}
strokeLinecap="round"
strokeLinejoin="round"
/>

View File

@@ -10,7 +10,7 @@ export default function Main() {
<SettingsLayouts.Header
icon={SvgMcp}
title="MCP Actions"
description="Connect MCP (Model Context Protocol) servers to add custom actions and tools for your agents."
description="Connect MCP (Model Context Protocol) servers to add custom actions and tools for your assistants."
separator
/>
<SettingsLayouts.Body>

View File

@@ -10,7 +10,7 @@ export default function Main() {
<SettingsLayouts.Header
icon={SvgActions}
title="OpenAPI Actions"
description="Connect OpenAPI servers to add custom actions and tools for your agents."
description="Connect OpenAPI servers to add custom actions and tools for your assistants."
separator
/>
<SettingsLayouts.Body>

View File

@@ -170,7 +170,7 @@ export function PersonasTable({
{deleteModalOpen && personaToDelete && (
<ConfirmationModalLayout
icon={SvgAlertCircle}
title="Delete Agent"
title="Delete Assistant"
onClose={closeDeleteModal}
submit={<Button onClick={handleDeletePersona}>Delete</Button>}
>
@@ -183,15 +183,15 @@ export function PersonasTable({
const isDefault = personaToToggleDefault.is_default_persona;
const title = isDefault
? "Remove Featured Agent"
: "Set Featured Agent";
? "Remove Featured Assistant"
: "Set Featured Assistant";
const buttonText = isDefault ? "Remove Feature" : "Set as Featured";
const text = isDefault
? `Are you sure you want to remove the featured status of ${personaToToggleDefault.name}?`
: `Are you sure you want to set the featured status of ${personaToToggleDefault.name}?`;
const additionalText = isDefault
? `Removing "${personaToToggleDefault.name}" as a featured agent will not affect its visibility or accessibility.`
: `Setting "${personaToToggleDefault.name}" as a featured agent will make it public and visible to all users. This action cannot be undone.`;
? `Removing "${personaToToggleDefault.name}" as a featured assistant will not affect its visibility or accessibility.`
: `Setting "${personaToToggleDefault.name}" as a featured assistant will make it public and visible to all users. This action cannot be undone.`;
return (
<ConfirmationModalLayout
@@ -217,7 +217,7 @@ export function PersonasTable({
"Name",
"Description",
"Type",
"Featured Agent",
"Featured Assistant",
"Is Visible",
"Delete",
]}

View File

@@ -47,8 +47,8 @@ function MainContent({
return (
<div>
<Text className="mb-2">
Agents are a way to build custom search/question-answering experiences
for different use cases.
Assistants are a way to build custom search/question-answering
experiences for different use cases.
</Text>
<Text className="mt-2">They allow you to customize:</Text>
<div className="text-sm">
@@ -63,21 +63,21 @@ function MainContent({
<div>
<Separator />
<Title>Create an Agent</Title>
<Title>Create an Assistant</Title>
<CreateButton href="/app/agents/create?admin=true">
New Agent
New Assistant
</CreateButton>
<Separator />
<Title>Existing Agents</Title>
<Title>Existing Assistants</Title>
{totalItems > 0 ? (
<>
<SubLabel>
Agents will be displayed as options on the Chat / Search
interfaces in the order they are displayed below. Agents marked as
hidden will not be displayed. Editable agents are shown at the
top.
Assistants will be displayed as options on the Chat / Search
interfaces in the order they are displayed below. Assistants
marked as hidden will not be displayed. Editable assistants are
shown at the top.
</SubLabel>
<PersonasTable
personas={customPersonas}
@@ -96,21 +96,21 @@ function MainContent({
) : (
<div className="mt-6 p-8 border border-border rounded-lg bg-background-weak text-center">
<Text className="text-lg font-medium mb-2">
No custom agents yet
No custom assistants yet
</Text>
<Text className="text-subtle mb-3">
Create your first agent to:
Create your first assistant to:
</Text>
<ul className="text-subtle text-sm list-disc text-left inline-block mb-3">
<li>Build department-specific knowledge bases</li>
<li>Create specialized research agents</li>
<li>Create specialized research assistants</li>
<li>Set up compliance and policy advisors</li>
</ul>
<Text className="text-subtle text-sm mb-4">
...and so much more!
</Text>
<CreateButton href="/app/agents/create?admin=true">
Create Your First Agent
Create Your First Assistant
</CreateButton>
</div>
)}
@@ -128,13 +128,13 @@ export default function Page() {
return (
<>
<AdminPageTitle icon={SvgOnyxOctagon} title="Agents" />
<AdminPageTitle icon={SvgOnyxOctagon} title="Assistants" />
{isLoading && <ThreeDotsLoader />}
{error && (
<ErrorCallout
errorTitle="Failed to load agents"
errorTitle="Failed to load assistants"
errorMsg={
error?.info?.message ||
error?.info?.detail ||

View File

@@ -17,7 +17,6 @@ import {
SvgPlus,
SvgWallet,
SvgFileText,
SvgOrganization,
} from "@opal/icons";
import { BillingInformation, LicenseStatus } from "@/lib/billing/interfaces";
import {
@@ -144,20 +143,17 @@ function SubscriptionCard({
license,
onViewPlans,
disabled,
isManualLicenseOnly,
onReconnect,
}: {
billing?: BillingInformation;
license?: LicenseStatus;
onViewPlans: () => void;
disabled?: boolean;
isManualLicenseOnly?: boolean;
onReconnect?: () => Promise<void>;
}) {
const [isReconnecting, setIsReconnecting] = useState(false);
const planName = isManualLicenseOnly ? "Enterprise Plan" : "Business Plan";
const PlanIcon = isManualLicenseOnly ? SvgOrganization : SvgUsers;
const planName = "Business Plan";
const expirationDate = billing?.current_period_end ?? license?.expires_at;
const formattedDate = formatDateShort(expirationDate);
@@ -215,7 +211,7 @@ function SubscriptionCard({
height="auto"
>
<Section gap={0.25} alignItems="start" height="auto" width="auto">
<PlanIcon className="w-5 h-5" />
<SvgUsers className="w-5 h-5 stroke-text-03" />
<Text headingH3Muted text04>
{planName}
</Text>
@@ -230,19 +226,7 @@ function SubscriptionCard({
height="auto"
width="fit"
>
{isManualLicenseOnly ? (
<Text secondaryBody text03 className="text-right">
Your plan is managed through sales.
<br />
<a
href="mailto:support@onyx.app?subject=Billing%20change%20request"
className="underline"
>
Contact billing
</a>{" "}
to make changes.
</Text>
) : disabled ? (
{disabled ? (
<Button
main
secondary
@@ -282,13 +266,11 @@ function SeatsCard({
license,
onRefresh,
disabled,
hideUpdateSeats,
}: {
billing?: BillingInformation;
license?: LicenseStatus;
onRefresh?: () => Promise<void>;
disabled?: boolean;
hideUpdateSeats?: boolean;
}) {
const [isEditing, setIsEditing] = useState(false);
const [isSubmitting, setIsSubmitting] = useState(false);
@@ -502,17 +484,15 @@ function SeatsCard({
<Button main tertiary href="/admin/users" leftIcon={SvgExternalLink}>
View Users
</Button>
{!hideUpdateSeats && (
<Button
main
secondary
onClick={handleStartEdit}
leftIcon={SvgPlus}
disabled={isLoadingUsers || disabled || !billing}
>
Update Seats
</Button>
)}
<Button
main
secondary
onClick={handleStartEdit}
leftIcon={SvgPlus}
disabled={isLoadingUsers || disabled || !billing}
>
Update Seats
</Button>
</Section>
</Section>
</Card>
@@ -613,9 +593,7 @@ interface BillingDetailsViewProps {
onViewPlans: () => void;
onRefresh?: () => Promise<void>;
isAirGapped?: boolean;
isManualLicenseOnly?: boolean;
hasStripeError?: boolean;
licenseCard?: React.ReactNode;
}
export default function BillingDetailsView({
@@ -624,13 +602,10 @@ export default function BillingDetailsView({
onViewPlans,
onRefresh,
isAirGapped,
isManualLicenseOnly,
hasStripeError,
licenseCard,
}: BillingDetailsViewProps) {
const expirationState = billing ? getExpirationState(billing, license) : null;
const disableBillingActions =
isAirGapped || hasStripeError || isManualLicenseOnly;
const disableBillingActions = isAirGapped || hasStripeError;
return (
<Section gap={1} height="auto" width="full">
@@ -647,7 +622,7 @@ export default function BillingDetailsView({
)}
{/* Air-gapped mode info banner */}
{isAirGapped && !hasStripeError && !isManualLicenseOnly && (
{isAirGapped && !hasStripeError && (
<Message
static
info
@@ -690,21 +665,16 @@ export default function BillingDetailsView({
license={license}
onViewPlans={onViewPlans}
disabled={disableBillingActions}
isManualLicenseOnly={isManualLicenseOnly}
onReconnect={onRefresh}
/>
)}
{/* License card (inline for manual license users) */}
{licenseCard}
{/* Seats card */}
<SeatsCard
billing={billing}
license={license}
onRefresh={onRefresh}
disabled={disableBillingActions}
hideUpdateSeats={isManualLicenseOnly}
/>
{/* Payment section */}

View File

@@ -19,7 +19,6 @@ interface LicenseActivationCardProps {
onClose: () => void;
onSuccess: () => void;
license?: LicenseStatus;
hideClose?: boolean;
}
export default function LicenseActivationCard({
@@ -27,7 +26,6 @@ export default function LicenseActivationCard({
onClose,
onSuccess,
license,
hideClose,
}: LicenseActivationCardProps) {
const [licenseKey, setLicenseKey] = useState("");
const [isActivating, setIsActivating] = useState(false);
@@ -122,11 +120,9 @@ export default function LicenseActivationCard({
<Button main secondary onClick={() => setShowInput(true)}>
Update Key
</Button>
{!hideClose && (
<Button main tertiary onClick={handleClose}>
Close
</Button>
)}
<Button main tertiary onClick={handleClose}>
Close
</Button>
</Section>
</Section>
</Card>

View File

@@ -121,12 +121,11 @@ export default function BillingPage() {
const billing = hasSubscription ? (billingData as BillingInformation) : null;
const isSelfHosted = !NEXT_PUBLIC_CLOUD_ENABLED;
// User is only air-gapped if they have a manual license AND Stripe is not connected
// Once Stripe connects successfully, they're no longer air-gapped
const hasManualLicense = licenseData?.source === "manual_upload";
// Air-gapped: billing endpoint is unreachable (manual license + connectivity error)
const isAirGapped = !!(hasManualLicense && billingError);
// Stripe error: auto-fetched license but billing endpoint is unreachable
const stripeConnected = billingData && !billingError;
const isAirGapped = hasManualLicense && !stripeConnected;
const hasStripeError = !!(
isSelfHosted &&
licenseData?.has_license &&
@@ -134,10 +133,6 @@ export default function BillingPage() {
!hasManualLicense
);
// Manual license without active Stripe subscription
// Stripe-dependent actions (manage plan, update seats) won't work
const isManualLicenseOnly = !!(hasManualLicense && !hasSubscription);
// Set initial view based on subscription status (only once when data first loads)
useEffect(() => {
if (!isLoading && view === null) {
@@ -248,10 +243,7 @@ export default function BillingPage() {
return {
icon: hasSubscription ? SvgWallet : SvgArrowUpCircle,
title: hasSubscription ? "View Plans" : "Upgrade Plan",
showBackButton: !!(
hasSubscription ||
(isSelfHosted && licenseData?.has_license)
),
showBackButton: !!hasSubscription,
};
case "details":
return {
@@ -279,11 +271,9 @@ export default function BillingPage() {
};
const handleBack = () => {
const hasEntitlement =
hasSubscription || (isSelfHosted && licenseData?.has_license);
if (view === "checkout") {
changeView(hasEntitlement ? "details" : "plans");
} else if (view === "plans" && hasEntitlement) {
changeView(hasSubscription ? "details" : "plans");
} else if (view === "plans" && hasSubscription) {
changeView("details");
}
};
@@ -315,19 +305,7 @@ export default function BillingPage() {
onViewPlans={() => changeView("plans")}
onRefresh={handleRefresh}
isAirGapped={isAirGapped}
isManualLicenseOnly={isManualLicenseOnly}
hasStripeError={hasStripeError}
licenseCard={
isManualLicenseOnly ? (
<LicenseActivationCard
isOpen
onSuccess={handleLicenseActivated}
license={licenseData ?? undefined}
onClose={() => {}}
hideClose
/>
) : undefined
}
/>
),
};
@@ -344,7 +322,7 @@ export default function BillingPage() {
if (isLoading || view === null) return null;
return (
<>
{showLicenseActivationInput && !isManualLicenseOnly && (
{showLicenseActivationInput && (
<div className="w-full billing-card-enter">
<LicenseActivationCard
isOpen={showLicenseActivationInput}
@@ -363,7 +341,6 @@ export default function BillingPage() {
isSelfHosted ? () => setShowLicenseActivationInput(true) : undefined
}
hideLicenseLink={
isManualLicenseOnly ||
showLicenseActivationInput ||
(view === "plans" &&
(!!hasSubscription || !!licenseData?.has_license))

View File

@@ -156,7 +156,7 @@ export const SlackChannelConfigCreationForm = ({
is: "assistant",
then: (schema) =>
schema.required(
"An agent is required when using the 'Agent' knowledge source"
"A persona is required when using the'Assistant' knowledge source"
),
}),
standard_answer_categories: Yup.array(),

View File

@@ -224,14 +224,14 @@ export function SlackChannelConfigFormFields({
<RadioGroupItemField
value="assistant"
id="assistant"
label="Search Agent"
label="Search Assistant"
sublabel="Control both the documents and the prompt to use for answering questions"
/>
<RadioGroupItemField
value="non_search_assistant"
id="non_search_assistant"
label="Non-Search Agent"
sublabel="Chat with an agent that does not use documents"
label="Non-Search Assistant"
sublabel="Chat with an assistant that does not use documents"
/>
</RadioGroup>
</div>
@@ -327,15 +327,15 @@ export function SlackChannelConfigFormFields({
<div className="mt-4">
<SubLabel>
<>
Select the search-enabled agent OnyxBot will use while answering
questions in Slack.
Select the search-enabled assistant OnyxBot will use while
answering questions in Slack.
{syncEnabledAssistants.length > 0 && (
<>
<br />
<span className="text-sm text-text-dark/80">
Note: Some of your agents have auto-synced connectors in
their document sets. You cannot select these agents as
they will not be able to answer questions in Slack.{" "}
Note: Some of your assistants have auto-synced connectors
in their document sets. You cannot select these assistants
as they will not be able to answer questions in Slack.{" "}
<button
type="button"
onClick={() =>
@@ -349,7 +349,7 @@ export function SlackChannelConfigFormFields({
{viewSyncEnabledAssistants
? "Hide un-selectable "
: "View all "}
agents
assistants
</button>
</span>
</>
@@ -367,7 +367,7 @@ export function SlackChannelConfigFormFields({
{viewSyncEnabledAssistants && syncEnabledAssistants.length > 0 && (
<div className="mt-4">
<p className="text-sm text-text-dark/80">
Un-selectable agents:
Un-selectable assistants:
</p>
<div className="mb-3 mt-2 flex gap-2 flex-wrap text-sm">
{syncEnabledAssistants.map(
@@ -394,15 +394,15 @@ export function SlackChannelConfigFormFields({
<div className="mt-4">
<SubLabel>
<>
Select the non-search agent OnyxBot will use while answering
Select the non-search assistant OnyxBot will use while answering
questions in Slack.
{syncEnabledAssistants.length > 0 && (
<>
<br />
<span className="text-sm text-text-dark/80">
Note: Some of your agents have auto-synced connectors in
their document sets. You cannot select these agents as
they will not be able to answer questions in Slack.{" "}
Note: Some of your assistants have auto-synced connectors
in their document sets. You cannot select these assistants
as they will not be able to answer questions in Slack.{" "}
<button
type="button"
onClick={() =>
@@ -416,7 +416,7 @@ export function SlackChannelConfigFormFields({
{viewSyncEnabledAssistants
? "Hide un-selectable "
: "View all "}
agents
assistants
</button>
</span>
</>
@@ -524,7 +524,7 @@ export function SlackChannelConfigFormFields({
name="is_ephemeral"
label="Respond to user in a private (ephemeral) message"
tooltip="If set, OnyxBot will respond only to the user in a private (ephemeral) message. If you also
chose 'Search' Agent above, selecting this option will make documents that are private to the user
chose 'Search' Assistant above, selecting this option will make documents that are private to the user
available for their queries."
/>

View File

@@ -1,7 +0,0 @@
"use client";
import CodeInterpreterPage from "@/refresh-pages/admin/CodeInterpreterPage";
export default function Page() {
return <CodeInterpreterPage />;
}

View File

@@ -39,10 +39,10 @@ export function AdvancedOptions({
agents={agents}
isLoading={agentsLoading}
error={agentsError}
label="Agent Whitelist"
subtext="Restrict this provider to specific agents."
label="Assistant Whitelist"
subtext="Restrict this provider to specific assistants."
disabled={formikProps.values.is_public}
disabledMessage="This LLM Provider is public and available to all agents."
disabledMessage="This LLM Provider is public and available to all assistants."
/>
</div>
</>

View File

@@ -299,11 +299,11 @@ export default function Page({ params }: Props) {
});
refreshGuild();
toast.success(
personaId ? "Default agent updated" : "Default agent cleared"
personaId ? "Default assistant updated" : "Default assistant cleared"
);
} catch (err) {
toast.error(
err instanceof Error ? err.message : "Failed to update agent"
err instanceof Error ? err.message : "Failed to update assistant"
);
} finally {
setIsUpdating(false);
@@ -355,7 +355,7 @@ export default function Page({ params }: Props) {
<InputSelect.Trigger placeholder="Select agent" />
<InputSelect.Content>
<InputSelect.Item value="default">
Default Agent
Default Assistant
</InputSelect.Item>
{personas.map((persona) => (
<InputSelect.Item

View File

@@ -14,11 +14,6 @@ import {
TimelineRendererComponent,
TimelineRendererOutput,
} from "./TimelineRendererComponent";
import {
isReasoningPackets,
isDeepResearchPlanPackets,
isMemoryToolPackets,
} from "./packetHelpers";
import Tabs from "@/refresh-components/Tabs";
import { SvgBranch, SvgFold, SvgExpand } from "@opal/icons";
import { Button } from "@opal/components";
@@ -65,13 +60,6 @@ export function ParallelTimelineTabs({
[turnGroup.steps, activeTab]
);
// Determine if the active step needs full-width content (no right padding)
const noPaddingRight = activeStep
? isReasoningPackets(activeStep.packets) ||
isDeepResearchPlanPackets(activeStep.packets) ||
isMemoryToolPackets(activeStep.packets)
: false;
// Memoized loading states for each step
const loadingStates = useMemo(
() =>
@@ -94,10 +82,9 @@ export function ParallelTimelineTabs({
isFirstStep={false}
isSingleStep={false}
collapsible={true}
noPaddingRight={noPaddingRight}
/>
),
[isLastTurnGroup, noPaddingRight]
[isLastTurnGroup]
);
const hasActivePackets = Boolean(activeStep && activeStep.packets.length > 0);

View File

@@ -50,7 +50,7 @@ export function TimelineStepComposer({
header={result.status}
isExpanded={result.isExpanded}
onToggle={result.onToggle}
collapsible={collapsible && !isSingleStep}
collapsible={collapsible}
supportsCollapsible={result.supportsCollapsible}
isLastStep={index === results.length - 1 && isLastStep}
isFirstStep={index === 0 && isFirstStep}

View File

@@ -63,7 +63,7 @@ export const FetchToolRenderer: MessageRenderer<FetchToolPacket, {}> = ({
return children([
{
icon: SvgCircle,
status: "Reading",
status: null,
content: <div />,
supportsCollapsible: false,
timelineLayout: "timeline",

View File

@@ -46,7 +46,7 @@ export const MemoryToolRenderer: MessageRenderer<MemoryToolPacket, {}> = ({
return children([
{
icon: SvgEditBig,
status: "Memory",
status: null,
content: <div />,
supportsCollapsible: false,
timelineLayout: "timeline",

View File

@@ -169,9 +169,7 @@ export const ReasoningRenderer: MessageRenderer<
);
if (!hasStart && !hasEnd && content.length === 0) {
return children([
{ icon: SvgCircle, status: THINKING_STATUS, content: <></> },
]);
return children([{ icon: SvgCircle, status: null, content: <></> }]);
}
const reasoningContent = (

View File

@@ -61,7 +61,7 @@ export const InternalSearchToolRenderer: MessageRenderer<
children,
}) => {
const searchState = constructCurrentSearchState(packets);
const { queries, results, isComplete } = searchState;
const { queries, results } = searchState;
const isCompact = renderType === RenderType.COMPACT;
const isHighlight = renderType === RenderType.HIGHLIGHT;
@@ -75,7 +75,7 @@ export const InternalSearchToolRenderer: MessageRenderer<
return children([
{
icon: SvgSearchMenu,
status: queriesHeader,
status: null,
content: <></>,
supportsCollapsible: true,
timelineLayout: "timeline",
@@ -109,15 +109,7 @@ export const InternalSearchToolRenderer: MessageRenderer<
window.open(doc.link, "_blank", "noopener,noreferrer");
}
}}
emptyState={
!isComplete ? (
<BlinkingBar />
) : (
<Text as="p" text04 mainUiMuted>
No results found
</Text>
)
}
emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
/>
</div>
),
@@ -172,15 +164,7 @@ export const InternalSearchToolRenderer: MessageRenderer<
window.open(doc.link, "_blank", "noopener,noreferrer");
}
}}
emptyState={
!isComplete ? (
<BlinkingBar />
) : (
<Text as="p" text04 mainUiMuted>
No results found
</Text>
)
}
emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
/>
),
},
@@ -229,15 +213,7 @@ export const InternalSearchToolRenderer: MessageRenderer<
window.open(doc.link, "_blank", "noopener,noreferrer");
}
}}
emptyState={
!isComplete ? (
<BlinkingBar />
) : (
<Text as="p" text03 mainUiMuted>
No results found
</Text>
)
}
emptyState={!stopPacketSeen ? <BlinkingBar /> : undefined}
/>
</>
)}

View File

@@ -53,7 +53,7 @@ export const WebSearchToolRenderer: MessageRenderer<SearchToolPacket, {}> = ({
return children([
{
icon: SvgGlobe,
status: "Searching the web",
status: null,
content: <div />,
supportsCollapsible: false,
timelineLayout: "timeline",

View File

@@ -427,7 +427,7 @@ export const GroupDisplay = ({
<Separator />
<h2 className="text-xl font-bold mt-8 mb-2">Agents</h2>
<h2 className="text-xl font-bold mt-8 mb-2">Assistants</h2>
<div>
{userGroup.document_sets.length > 0 ? (
@@ -445,7 +445,7 @@ export const GroupDisplay = ({
</div>
) : (
<>
<Text>No Agents in this group...</Text>
<Text>No Assistants in this group...</Text>
</>
)}
</div>

View File

@@ -152,14 +152,14 @@ export function PersonaMessagesChart({
} else if (selectedPersonaId === undefined) {
content = (
<div className="h-80 text-text-500 flex flex-col">
<p className="m-auto">Select an agent to view analytics</p>
<p className="m-auto">Select an assistant to view analytics</p>
</div>
);
} else if (!personaMessagesData?.length) {
content = (
<div className="h-80 text-text-500 flex flex-col">
<p className="m-auto">
No data found for selected agent in the specified time range
No data found for selected assistant in the specified time range
</p>
</div>
);
@@ -178,9 +178,11 @@ export function PersonaMessagesChart({
return (
<CardSection className="mt-8">
<Title>Agent Analytics</Title>
<Title>Assistant Analytics</Title>
<div className="flex flex-col gap-4">
<Text>Messages and unique users per day for the selected agent</Text>
<Text>
Messages and unique users per day for the selected assistant
</Text>
<div className="flex items-center gap-4">
<Select
value={selectedPersonaId?.toString() ?? ""}
@@ -189,14 +191,14 @@ export function PersonaMessagesChart({
}}
>
<SelectTrigger className="flex w-full max-w-xs">
<SelectValue placeholder="Select an agent to display" />
<SelectValue placeholder="Select an assistant to display" />
</SelectTrigger>
<SelectContent>
<div className="flex items-center px-2 pb-2 sticky top-0 bg-background border-b">
<Search className="h-4 w-4 mr-2 shrink-0 opacity-50" />
<input
className="flex h-8 w-full rounded-sm bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50"
placeholder="Search agents..."
placeholder="Search assistants..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
onClick={(e) => e.stopPropagation()}

View File

@@ -146,7 +146,7 @@ export function AssistantStats({ assistantId }: { assistantId: number }) {
return (
<Card className="w-full">
<CardHeader className="flex flex-row items-center justify-between space-y-0 pb-2">
<p className="text-base font-normal text-2xl">Agent Analytics</p>
<p className="text-base font-normal text-2xl">Assistant Analytics</p>
<AdminDateRangeSelector
value={dateRange}
onValueChange={setDateRange}

View File

@@ -72,7 +72,6 @@ export function ClientLayout({
enableEnterpriseSS={enableEnterprise}
/>
<div
data-main-container
className={cn(
"flex flex-1 flex-col min-w-0 min-h-0 overflow-y-auto",
!hasOwnLayout && "py-10 px-4 md:px-12"

View File

@@ -41,14 +41,8 @@ export default function AccessRestricted() {
const [error, setError] = useState<string | null>(null);
const { data: license } = useLicense();
const hadPreviousLicense = license?.has_license === true;
const showRenewalMessage = NEXT_PUBLIC_CLOUD_ENABLED || hadPreviousLicense;
const initialModalMessage = showRenewalMessage
? NEXT_PUBLIC_CLOUD_ENABLED
? "Your access to Onyx has been temporarily suspended due to a lapse in your subscription."
: "Your access to Onyx has been temporarily suspended due to a lapse in your license."
: "An Enterprise license is required to use Onyx. Your data is protected and will be available once a license is activated.";
// Distinguish between "never had a license" vs "license lapsed"
const hasLicenseLapsed = license?.has_license === true;
const handleResubscribe = async () => {
setIsLoading(true);
@@ -78,7 +72,11 @@ export default function AccessRestricted() {
<SvgLock className="stroke-status-error-05 w-[1.5rem] h-[1.5rem]" />
</div>
<Text text03>{initialModalMessage}</Text>
<Text text03>
{hasLicenseLapsed
? "Your access to Onyx has been temporarily suspended due to a lapse in your subscription."
: "An Enterprise license is required to use Onyx. Your data is protected and will be available once a license is activated."}
</Text>
{NEXT_PUBLIC_CLOUD_ENABLED ? (
<>
@@ -113,7 +111,7 @@ export default function AccessRestricted() {
) : (
<>
<Text text03>
{hadPreviousLicense
{hasLicenseLapsed
? "To reinstate your access and continue using Onyx, please contact your system administrator to renew your license."
: "To get started, please contact your system administrator to obtain an Enterprise license."}
</Text>
@@ -123,8 +121,8 @@ export default function AccessRestricted() {
<Link className={linkClassName} href="/admin/billing">
Admin Billing
</Link>{" "}
page to {hadPreviousLicense ? "renew" : "activate"} your license,
sign up through Stripe or reach out to{" "}
page to {hasLicenseLapsed ? "renew" : "activate"} your license, sign
up through Stripe or reach out to{" "}
<a className={linkClassName} href="mailto:support@onyx.app">
support@onyx.app
</a>

View File

@@ -12,17 +12,17 @@ export default function NoAssistantModal() {
return (
<Modal open>
<Modal.Content width="sm" height="sm">
<Modal.Header icon={SvgUser} title="No Agent Available" />
<Modal.Header icon={SvgUser} title="No Assistant Available" />
<Modal.Body>
<Text as="p">
You currently have no agent configured. To use this feature, you
You currently have no assistant configured. To use this feature, you
need to take action.
</Text>
{isAdmin ? (
<>
<Text as="p">
As an administrator, you can create a new agent by visiting the
admin panel.
As an administrator, you can create a new assistant by visiting
the admin panel.
</Text>
<Button className="w-full" href="/admin/assistants">
Go to Admin Panel
@@ -30,7 +30,8 @@ export default function NoAssistantModal() {
</>
) : (
<Text as="p">
Please contact your administrator to configure an agent for you.
Please contact your administrator to configure an assistant for
you.
</Text>
)}
</Modal.Body>

View File

@@ -1,44 +0,0 @@
import useSWR from "swr";
import { errorHandlingFetcher } from "@/lib/fetcher";
const HEALTH_ENDPOINT = "/api/admin/code-interpreter/health";
const STATUS_ENDPOINT = "/api/admin/code-interpreter";
interface CodeInterpreterHealth {
healthy: boolean;
}
interface CodeInterpreterStatus {
enabled: boolean;
}
export default function useCodeInterpreter() {
const {
data: healthData,
error: healthError,
isLoading: isHealthLoading,
mutate: refetchHealth,
} = useSWR<CodeInterpreterHealth>(HEALTH_ENDPOINT, errorHandlingFetcher, {
refreshInterval: 30000,
});
const {
data: statusData,
error: statusError,
isLoading: isStatusLoading,
mutate: refetchStatus,
} = useSWR<CodeInterpreterStatus>(STATUS_ENDPOINT, errorHandlingFetcher);
function refetch() {
refetchHealth();
refetchStatus();
}
return {
isHealthy: healthData?.healthy ?? false,
isEnabled: statusData?.enabled ?? false,
isLoading: isHealthLoading || isStatusLoading,
error: healthError || statusError,
refetch,
};
}

View File

@@ -1,15 +0,0 @@
const UPDATE_ENDPOINT = "/api/admin/code-interpreter";
interface CodeInterpreterUpdateRequest {
enabled: boolean;
}
export async function updateCodeInterpreter(
request: CodeInterpreterUpdateRequest
): Promise<Response> {
return fetch(UPDATE_ENDPOINT, {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(request),
});
}

View File

@@ -131,8 +131,7 @@ export async function updateAgentSharedStatus(
userIds: string[],
groupIds: number[],
isPublic: boolean | undefined,
isPaidEnterpriseFeaturesEnabled: boolean,
labelIds?: number[]
isPaidEnterpriseFeaturesEnabled: boolean
): Promise<null | string> {
// MIT versions should not send group_ids - warn if caller provided non-empty groups
if (!isPaidEnterpriseFeaturesEnabled && groupIds.length > 0) {
@@ -153,7 +152,6 @@ export async function updateAgentSharedStatus(
// Only include group_ids for enterprise versions
group_ids: isPaidEnterpriseFeaturesEnabled ? groupIds : undefined,
is_public: isPublic,
label_ids: labelIds,
}),
});
@@ -168,63 +166,3 @@ export async function updateAgentSharedStatus(
return "Network error. Please check your connection and try again.";
}
}
/**
* Updates the labels assigned to an agent via the share endpoint.
*
* @param agentId - The ID of the agent to update
* @param labelIds - Array of label IDs to assign to the agent
* @returns null on success, or an error message string on failure
*/
export async function updateAgentLabels(
agentId: number,
labelIds: number[]
): Promise<string | null> {
try {
const response = await fetch(`/api/persona/${agentId}/share`, {
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ label_ids: labelIds }),
});
if (response.ok) {
return null;
}
const errorMessage = (await response.json()).detail || "Unknown error";
return errorMessage;
} catch (error) {
console.error("updateAgentLabels: Network error", error);
return "Network error. Please check your connection and try again.";
}
}
/**
* Updates the featured (default) status of an agent.
*
* @param agentId - The ID of the agent to update
* @param isFeatured - Whether the agent should be featured
* @returns null on success, or an error message string on failure
*/
export async function updateAgentFeaturedStatus(
agentId: number,
isFeatured: boolean
): Promise<string | null> {
try {
const response = await fetch(`/api/admin/persona/${agentId}/default`, {
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ is_default_persona: isFeatured }),
});
if (response.ok) {
return null;
}
const errorMessage = (await response.json()).detail || "Unknown error";
return errorMessage;
} catch (error) {
console.error("updateAgentFeaturedStatus: Network error", error);
return "Network error. Please check your connection and try again.";
}
}

View File

@@ -257,27 +257,19 @@ export const useLabels = () => {
return mutate("/api/persona/labels");
};
const createLabel = async (name: string): Promise<PersonaLabel | null> => {
const createLabel = async (name: string) => {
const response = await fetch("/api/persona/labels", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name }),
});
if (!response.ok) {
return null;
if (response.ok) {
const newLabel = await response.json();
mutate("/api/persona/labels", [...(labels || []), newLabel], false);
}
const newLabel: PersonaLabel = await response.json();
mutate(
"/api/persona/labels",
(currentLabels: PersonaLabel[] | undefined) => [
...(currentLabels || []),
newLabel,
],
false
);
return newLabel;
return response;
};
const updateLabel = async (id: number, name: string) => {

View File

@@ -1,51 +1,29 @@
import Text from "@/refresh-components/texts/Text";
import { SvgX } from "@opal/icons";
import { Button } from "@opal/components";
import type { IconProps } from "@opal/types";
export interface ChipProps {
children?: string;
icon?: React.FunctionComponent<IconProps>;
onRemove?: () => void;
smallLabel?: boolean;
}
/**
* A simple chip/tag component for displaying metadata.
* Supports an optional remove button via the `onRemove` prop.
*
* @example
* ```tsx
* <Chip>Tag Name</Chip>
* <Chip icon={SvgUser}>John Doe</Chip>
* <Chip onRemove={() => removeTag(id)}>Removable</Chip>
* ```
*/
export default function Chip({
children,
icon: Icon,
onRemove,
smallLabel = true,
}: ChipProps) {
export default function Chip({ children, icon: Icon }: ChipProps) {
return (
<div className="flex items-center gap-1 px-1.5 py-0.5 rounded-08 bg-background-tint-02">
{Icon && <Icon size={12} className="text-text-03" />}
{children && (
<Text figureSmallLabel={smallLabel} text03>
<Text figureSmallLabel text03>
{children}
</Text>
)}
{onRemove && (
<Button
onClick={(e) => {
e.stopPropagation();
onRemove();
}}
prominence="tertiary"
icon={SvgX}
size="xs"
/>
)}
</div>
);
}

View File

@@ -39,9 +39,9 @@ const useTabsContext = () => {
*
* Contained (default):
* ┌─────────────────────────────────────────────────┐
* │ ┌──────────┐ ╔══════════╗ ┌──────────┐
* │ │ Tab 1 │ ║ Tab 2 ║ │ Tab 3 │ │ ← gray background
* │ └──────────┘ ╚══════════╝ └──────────┘
* │ ┌──────────┐ ╔══════════╗ ┌──────────┐ │
* │ │ Tab 1 │ ║ Tab 2 ║ │ Tab 3 │ │ ← gray background
* │ └──────────┘ ╚══════════╝ └──────────┘ │
* └─────────────────────────────────────────────────┘
* ↑ active tab (white bg, shadow)
*
@@ -49,7 +49,7 @@ const useTabsContext = () => {
* Tab 1 Tab 2 Tab 3 [Action]
* ╔═════╗
* ║ ║ ↑ optional rightContent
* ────────────╨═════╨─────────────────────────────
* ────────────╨═════╨─────────────────────────────
* ↑ sliding indicator under active tab
*
* @example

View File

@@ -1,125 +0,0 @@
"use client";
import * as React from "react";
import { cn } from "@/lib/utils";
import Chip from "@/refresh-components/Chip";
import {
innerClasses,
textClasses,
Variants,
wrapperClasses,
} from "@/refresh-components/inputs/styles";
import type { IconProps } from "@opal/types";
export interface ChipItem {
id: string;
label: string;
}
export interface InputChipFieldProps {
chips: ChipItem[];
onRemoveChip: (id: string) => void;
onAdd: (value: string) => void;
value: string;
onChange: (value: string) => void;
placeholder?: string;
disabled?: boolean;
variant?: Variants;
icon?: React.FunctionComponent<IconProps>;
className?: string;
}
/**
* A tag/chip input field that renders chips inline alongside a text input.
*
* Pressing Enter adds a chip via `onAdd`. Pressing Backspace on an empty
* input removes the last chip. Each chip has a remove button.
*
* @example
* ```tsx
* <InputChipField
* chips={[{ id: "1", label: "Search" }]}
* onRemoveChip={(id) => remove(id)}
* onAdd={(value) => add(value)}
* value={inputValue}
* onChange={setInputValue}
* placeholder="Add labels..."
* icon={SvgTag}
* />
* ```
*/
function InputChipField({
chips,
onRemoveChip,
onAdd,
value,
onChange,
placeholder,
disabled = false,
variant = "primary",
icon: Icon,
className,
}: InputChipFieldProps) {
const inputRef = React.useRef<HTMLInputElement>(null);
function handleKeyDown(e: React.KeyboardEvent<HTMLInputElement>) {
if (disabled) {
return;
}
if (e.key === "Enter") {
e.preventDefault();
e.stopPropagation();
const trimmed = value.trim();
if (trimmed) {
onAdd(trimmed);
}
}
if (e.key === "Backspace" && value === "") {
const lastChip = chips[chips.length - 1];
if (lastChip) {
onRemoveChip(lastChip.id);
}
}
}
return (
<div
className={cn(
"flex flex-row items-center flex-wrap gap-1 p-1.5 rounded-08 cursor-text w-full",
wrapperClasses[variant],
className
)}
onClick={() => inputRef.current?.focus()}
>
{Icon && <Icon size={16} className="text-text-04 shrink-0" />}
{chips.map((chip) => (
<Chip
key={chip.id}
onRemove={disabled ? undefined : () => onRemoveChip(chip.id)}
smallLabel={false}
>
{chip.label}
</Chip>
))}
<input
ref={inputRef}
type="text"
disabled={disabled}
value={value}
onChange={(e) => onChange(e.target.value)}
onKeyDown={handleKeyDown}
placeholder={chips.length === 0 ? placeholder : undefined}
className={cn(
"flex-1 min-w-[80px] h-[1.5rem] bg-transparent p-0.5 focus:outline-none",
innerClasses[variant],
textClasses[variant]
)}
/>
</div>
);
}
export default InputChipField;

View File

@@ -12,8 +12,6 @@ import {
SvgX,
SvgXOctagon,
} from "@opal/icons";
import type { IconFunctionComponent } from "@opal/types";
const containerClasses = {
flash: {
default: {
@@ -220,7 +218,6 @@ export interface MessageProps extends React.HTMLAttributes<HTMLDivElement> {
// Features:
icon?: boolean;
iconComponent?: IconFunctionComponent;
actions?: boolean | string;
close?: boolean;
@@ -247,7 +244,6 @@ function MessageInner(
description,
icon = true,
iconComponent,
actions,
close = true,
@@ -283,9 +279,8 @@ function MessageInner(
const textClass = useMemo(() => textClasses[type].text, [type]);
const descriptionClass = useMemo(() => textClasses[type].description, [type]);
const IconComponent = iconComponent
? iconComponent
: level === "success"
const IconComponent =
level === "success"
? SvgCheckCircle
: level === "warning"
? SvgAlertTriangle

View File

@@ -64,7 +64,6 @@ function MemoryItem({
if (!shouldHighlight) return;
wrapperRef.current?.scrollIntoView({ block: "center", behavior: "smooth" });
textareaRef.current?.focus();
setIsHighlighting(true);
const timer = setTimeout(() => {

View File

@@ -115,9 +115,14 @@ export default function ActionLineItem({
<Section gap={0.25} flexDirection="row">
{!isUnavailable && tool?.oauth_config_id && toolAuthStatus && (
<Button
icon={SvgKey}
prominence="secondary"
size="sm"
icon={({ className }) => (
<SvgKey
className={cn(
className,
"stroke-yellow-500 hover:stroke-yellow-600"
)}
/>
)}
onClick={noProp(() => {
if (
!toolAuthStatus.hasToken ||
@@ -179,7 +184,7 @@ export default function ActionLineItem({
)}
{isSearchToolAndNotInProject && (
<Button
<IconButton
icon={
isSearchToolWithNoConnectors ? SvgSettings : SvgChevronRight
}
@@ -188,8 +193,11 @@ export default function ActionLineItem({
router.push("/admin/add-connector");
else onSourceManagementOpen?.();
})}
prominence="tertiary"
size="sm"
internal
className={cn(
isSearchToolWithNoConnectors &&
"invisible group-hover/LineItem:visible"
)}
tooltip={
isSearchToolWithNoConnectors
? "Add Connectors"

Some files were not shown because too many files have changed in this diff Show More