mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-02-27 04:35:50 +00:00
Compare commits
69 Commits
csv_render
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43a59e4d74 | ||
|
|
dc8fc7eefc | ||
|
|
78f7914093 | ||
|
|
f4d777b80d | ||
|
|
da4d57b5e3 | ||
|
|
dcdcd067bd | ||
|
|
8b15a29723 | ||
|
|
763853674f | ||
|
|
429b6f3465 | ||
|
|
37d5be1b40 | ||
|
|
8ab99dbb06 | ||
|
|
52799e9c7a | ||
|
|
aef009cc97 | ||
|
|
18d1ea1770 | ||
|
|
f336ad00f4 | ||
|
|
0558e687d9 | ||
|
|
784a99e24a | ||
|
|
da1f5a11f4 | ||
|
|
5633805890 | ||
|
|
0817b45ae1 | ||
|
|
af0e4bdebc | ||
|
|
4cd2320732 | ||
|
|
90a361f0e1 | ||
|
|
194efde97b | ||
|
|
d922a42262 | ||
|
|
f00c3a486e | ||
|
|
192080c9e4 | ||
|
|
c5787dc073 | ||
|
|
d424d6462c | ||
|
|
ecea86deb6 | ||
|
|
a5c1f50a8a | ||
|
|
4a04cfd486 | ||
|
|
f22e9628db | ||
|
|
255ba10af6 | ||
|
|
563202a080 | ||
|
|
1062dc0743 | ||
|
|
0826348568 | ||
|
|
375079136d | ||
|
|
82aad5e253 | ||
|
|
beb1c49c69 | ||
|
|
c4556515be | ||
|
|
a4387f230b | ||
|
|
d91e452658 | ||
|
|
dd274f8667 | ||
|
|
2c82f0da16 | ||
|
|
26101636f2 | ||
|
|
5e2c0c6cf4 | ||
|
|
33b64db498 | ||
|
|
b925cc1a56 | ||
|
|
bac4b7c945 | ||
|
|
6f6ef1e657 | ||
|
|
885c69f460 | ||
|
|
4b837303ff | ||
|
|
d856a9befb | ||
|
|
adade353c5 | ||
|
|
3cb6ec2f85 | ||
|
|
691eebf00a | ||
|
|
905b6633e6 | ||
|
|
fd088196ff | ||
|
|
cafbf5b8be | ||
|
|
1235181559 | ||
|
|
caa2e45632 | ||
|
|
9c62e03120 | ||
|
|
0937305064 | ||
|
|
e4c06570e3 | ||
|
|
78fc7c86d7 | ||
|
|
84d3aea847 | ||
|
|
00a404d3cd | ||
|
|
787cf90d96 |
@@ -11,6 +11,11 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cherry-pick-to-latest-release:
|
||||
outputs:
|
||||
should_cherrypick: ${{ steps.gate.outputs.should_cherrypick }}
|
||||
pr_number: ${{ steps.gate.outputs.pr_number }}
|
||||
cherry_pick_reason: ${{ steps.run_cherry_pick.outputs.reason }}
|
||||
cherry_pick_details: ${{ steps.run_cherry_pick.outputs.details }}
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
@@ -36,9 +41,13 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Read the PR body and check whether the helper checkbox is checked.
|
||||
pr_body="$(gh api "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}" --jq '.body // ""')"
|
||||
# Read the PR once so we can gate behavior and infer preferred actor.
|
||||
pr_json="$(gh api "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}")"
|
||||
pr_body="$(printf '%s' "$pr_json" | jq -r '.body // ""')"
|
||||
merged_by="$(printf '%s' "$pr_json" | jq -r '.merged_by.login // ""')"
|
||||
|
||||
echo "pr_number=$pr_number" >> "$GITHUB_OUTPUT"
|
||||
echo "merged_by=$merged_by" >> "$GITHUB_OUTPUT"
|
||||
|
||||
if echo "$pr_body" | grep -qiE "\\[x\\][[:space:]]*(\\[[^]]+\\][[:space:]]*)?Please cherry-pick this PR to the latest release version"; then
|
||||
echo "should_cherrypick=true" >> "$GITHUB_OUTPUT"
|
||||
@@ -71,9 +80,82 @@ jobs:
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Create cherry-pick PR to latest release
|
||||
id: run_cherry_pick
|
||||
if: steps.gate.outputs.should_cherrypick == 'true'
|
||||
continue-on-error: true
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
CHERRY_PICK_ASSIGNEE: ${{ steps.gate.outputs.merged_by }}
|
||||
run: |
|
||||
uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify
|
||||
set -o pipefail
|
||||
output_file="$(mktemp)"
|
||||
uv run --no-sync --with onyx-devtools ods cherry-pick "${GITHUB_SHA}" --yes --no-verify 2>&1 | tee "$output_file"
|
||||
exit_code="${PIPESTATUS[0]}"
|
||||
|
||||
if [ "${exit_code}" -eq 0 ]; then
|
||||
echo "status=success" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "status=failure" >> "$GITHUB_OUTPUT"
|
||||
|
||||
reason="command-failed"
|
||||
if grep -qiE "merge conflict during cherry-pick|CONFLICT|could not apply|cherry-pick in progress with staged changes" "$output_file"; then
|
||||
reason="merge-conflict"
|
||||
fi
|
||||
echo "reason=${reason}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
{
|
||||
echo "details<<EOF"
|
||||
tail -n 40 "$output_file"
|
||||
echo "EOF"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Mark workflow as failed if cherry-pick failed
|
||||
if: steps.gate.outputs.should_cherrypick == 'true' && steps.run_cherry_pick.outputs.status == 'failure'
|
||||
run: |
|
||||
echo "::error::Automated cherry-pick failed (${{ steps.run_cherry_pick.outputs.reason }})."
|
||||
exit 1
|
||||
|
||||
notify-slack-on-cherry-pick-failure:
|
||||
needs:
|
||||
- cherry-pick-to-latest-release
|
||||
if: always() && needs.cherry-pick-to-latest-release.outputs.should_cherrypick == 'true' && needs.cherry-pick-to-latest-release.result != 'success'
|
||||
runs-on: ubuntu-slim
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Build cherry-pick failure summary
|
||||
id: failure-summary
|
||||
env:
|
||||
SOURCE_PR_NUMBER: ${{ needs.cherry-pick-to-latest-release.outputs.pr_number }}
|
||||
CHERRY_PICK_REASON: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_reason }}
|
||||
CHERRY_PICK_DETAILS: ${{ needs.cherry-pick-to-latest-release.outputs.cherry_pick_details }}
|
||||
run: |
|
||||
source_pr_url="https://github.com/${GITHUB_REPOSITORY}/pull/${SOURCE_PR_NUMBER}"
|
||||
|
||||
reason_text="cherry-pick command failed"
|
||||
if [ "${CHERRY_PICK_REASON}" = "merge-conflict" ]; then
|
||||
reason_text="merge conflict during cherry-pick"
|
||||
fi
|
||||
|
||||
details_excerpt="$(printf '%s' "${CHERRY_PICK_DETAILS}" | tail -n 8 | tr '\n' ' ' | sed "s/[[:space:]]\\+/ /g" | sed "s/\"/'/g" | cut -c1-350)"
|
||||
failed_jobs="• cherry-pick-to-latest-release\\n• source PR: ${source_pr_url}\\n• reason: ${reason_text}"
|
||||
if [ -n "${details_excerpt}" ]; then
|
||||
failed_jobs="${failed_jobs}\\n• excerpt: ${details_excerpt}"
|
||||
fi
|
||||
|
||||
echo "jobs=${failed_jobs}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Notify #cherry-pick-prs about cherry-pick failure
|
||||
uses: ./.github/actions/slack-notify
|
||||
with:
|
||||
webhook-url: ${{ secrets.CHERRY_PICK_PRS_WEBHOOK }}
|
||||
failed-jobs: ${{ steps.failure-summary.outputs.jobs }}
|
||||
title: "🚨 Automated Cherry-Pick Failed"
|
||||
ref-name: ${{ github.ref_name }}
|
||||
|
||||
@@ -116,7 +116,6 @@ jobs:
|
||||
run: |
|
||||
cat <<EOF > deployment/docker_compose/.env
|
||||
COMPOSE_PROFILES=s3-filestore,opensearch-enabled
|
||||
CODE_INTERPRETER_BETA_ENABLED=true
|
||||
DISABLE_TELEMETRY=true
|
||||
OPENSEARCH_FOR_ONYX_ENABLED=true
|
||||
EOF
|
||||
|
||||
4
.github/workflows/pr-integration-tests.yml
vendored
4
.github/workflows/pr-integration-tests.yml
vendored
@@ -20,6 +20,7 @@ env:
|
||||
# Test Environment Variables
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
SLACK_BOT_TOKEN_TEST_SPACE: ${{ secrets.SLACK_BOT_TOKEN_TEST_SPACE }}
|
||||
CONFLUENCE_TEST_SPACE_URL: ${{ vars.CONFLUENCE_TEST_SPACE_URL }}
|
||||
CONFLUENCE_USER_NAME: ${{ vars.CONFLUENCE_USER_NAME }}
|
||||
CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
|
||||
@@ -423,6 +424,7 @@ jobs:
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e EXA_API_KEY=${EXA_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
|
||||
-e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
|
||||
-e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
|
||||
-e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
|
||||
@@ -443,6 +445,7 @@ jobs:
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
|
||||
-e MOCK_CONNECTOR_SERVER_PORT=8001 \
|
||||
-e ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${{ matrix.edition == 'ee' && 'true' || 'false' }} \
|
||||
${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
|
||||
/app/tests/integration/${{ matrix.test-dir.path }}
|
||||
|
||||
@@ -701,6 +704,7 @@ jobs:
|
||||
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
|
||||
-e EXA_API_KEY=${EXA_API_KEY} \
|
||||
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
|
||||
-e SLACK_BOT_TOKEN_TEST_SPACE=${SLACK_BOT_TOKEN_TEST_SPACE} \
|
||||
-e TEST_WEB_HOSTNAME=test-runner \
|
||||
-e AUTH_TYPE=cloud \
|
||||
-e MULTI_TENANT=true \
|
||||
|
||||
@@ -548,7 +548,7 @@ class in the utils over directly calling the APIs with a library like `requests`
|
||||
calling the utilities directly (e.g. do NOT create admin users with
|
||||
`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
|
||||
|
||||
A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
|
||||
A great example of this type of test is `backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py`.
|
||||
|
||||
To run them:
|
||||
|
||||
@@ -616,3 +616,9 @@ This is a minimal list - feel free to include more. Do NOT write code as part of
|
||||
Keep it high level. You can reference certain files or functions though.
|
||||
|
||||
Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
|
||||
|
||||
## Best Practices
|
||||
|
||||
In addition to the other content in this file, best practices for contributing
|
||||
to the codebase can be found at `contributing_guides/best_practices.md`.
|
||||
Understand its contents and follow them.
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
"""code interpreter seed
|
||||
|
||||
Revision ID: 07b98176f1de
|
||||
Revises: 7cb492013621
|
||||
Create Date: 2026-02-23 15:55:07.606784
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "07b98176f1de"
|
||||
down_revision = "7cb492013621"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Seed the single instance of code_interpreter_server
|
||||
# NOTE: There should only exist at most and at minimum 1 code_interpreter_server row
|
||||
op.execute(
|
||||
sa.text("INSERT INTO code_interpreter_server (server_enabled) VALUES (true)")
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(sa.text("DELETE FROM code_interpreter_server"))
|
||||
@@ -0,0 +1,48 @@
|
||||
"""add enterprise and name fields to scim_user_mapping
|
||||
|
||||
Revision ID: 7616121f6e97
|
||||
Revises: 07b98176f1de
|
||||
Create Date: 2026-02-23 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "7616121f6e97"
|
||||
down_revision = "07b98176f1de"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"scim_user_mapping",
|
||||
sa.Column("department", sa.String(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"scim_user_mapping",
|
||||
sa.Column("manager", sa.String(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"scim_user_mapping",
|
||||
sa.Column("given_name", sa.String(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"scim_user_mapping",
|
||||
sa.Column("family_name", sa.String(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"scim_user_mapping",
|
||||
sa.Column("scim_emails_json", sa.Text(), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("scim_user_mapping", "scim_emails_json")
|
||||
op.drop_column("scim_user_mapping", "family_name")
|
||||
op.drop_column("scim_user_mapping", "given_name")
|
||||
op.drop_column("scim_user_mapping", "manager")
|
||||
op.drop_column("scim_user_mapping", "department")
|
||||
@@ -22,6 +22,7 @@ from onyx.document_index.vespa_constants import HIDDEN
|
||||
from onyx.document_index.vespa_constants import IMAGE_FILE_NAME
|
||||
from onyx.document_index.vespa_constants import METADATA_LIST
|
||||
from onyx.document_index.vespa_constants import METADATA_SUFFIX
|
||||
from onyx.document_index.vespa_constants import PERSONAS
|
||||
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
|
||||
@@ -58,6 +59,7 @@ FIELDS_NEEDED_FOR_TRANSFORMATION: list[str] = [
|
||||
METADATA_SUFFIX,
|
||||
DOCUMENT_SETS,
|
||||
USER_PROJECT,
|
||||
PERSONAS,
|
||||
PRIMARY_OWNERS,
|
||||
SECONDARY_OWNERS,
|
||||
ACCESS_CONTROL_LIST,
|
||||
@@ -276,6 +278,7 @@ def transform_vespa_chunks_to_opensearch_chunks(
|
||||
)
|
||||
)
|
||||
user_projects: list[int] | None = vespa_chunk.get(USER_PROJECT)
|
||||
personas: list[int] | None = vespa_chunk.get(PERSONAS)
|
||||
primary_owners: list[str] | None = vespa_chunk.get(PRIMARY_OWNERS)
|
||||
secondary_owners: list[str] | None = vespa_chunk.get(SECONDARY_OWNERS)
|
||||
|
||||
@@ -325,6 +328,7 @@ def transform_vespa_chunks_to_opensearch_chunks(
|
||||
metadata_suffix=metadata_suffix,
|
||||
document_sets=document_sets,
|
||||
user_projects=user_projects,
|
||||
personas=personas,
|
||||
primary_owners=primary_owners,
|
||||
secondary_owners=secondary_owners,
|
||||
tenant_id=tenant_state,
|
||||
|
||||
@@ -5,8 +5,10 @@ from uuid import UUID
|
||||
|
||||
import httpx
|
||||
import sqlalchemy as sa
|
||||
from celery import Celery
|
||||
from celery import shared_task
|
||||
from celery import Task
|
||||
from redis import Redis
|
||||
from redis.lock import Lock as RedisLock
|
||||
from retry import retry
|
||||
from sqlalchemy import select
|
||||
@@ -24,12 +26,14 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROCESSING_TASK_EXPIRES
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.configs.constants import USER_FILE_PROCESSING_MAX_QUEUE_DEPTH
|
||||
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
|
||||
from onyx.connectors.file.connector import LocalFileConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
@@ -75,10 +79,58 @@ def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def _user_file_project_sync_queued_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_QUEUED_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def _user_file_delete_lock_key(user_file_id: str | UUID) -> str:
|
||||
return f"{OnyxRedisLocks.USER_FILE_DELETE_LOCK_PREFIX}:{user_file_id}"
|
||||
|
||||
|
||||
def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
|
||||
redis_celery: Redis = celery_app.broker_connection().channel().client # type: ignore
|
||||
return celery_get_queue_length(
|
||||
OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, redis_celery
|
||||
)
|
||||
|
||||
|
||||
def enqueue_user_file_project_sync_task(
|
||||
*,
|
||||
celery_app: Celery,
|
||||
redis_client: Redis,
|
||||
user_file_id: str | UUID,
|
||||
tenant_id: str,
|
||||
priority: OnyxCeleryPriority = OnyxCeleryPriority.HIGH,
|
||||
) -> bool:
|
||||
"""Enqueue a project-sync task if no matching queued task already exists."""
|
||||
queued_key = _user_file_project_sync_queued_key(user_file_id)
|
||||
|
||||
# NX+EX gives us atomic dedupe and a self-healing TTL.
|
||||
queued_guard_set = redis_client.set(
|
||||
queued_key,
|
||||
1,
|
||||
nx=True,
|
||||
ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
|
||||
)
|
||||
if not queued_guard_set:
|
||||
return False
|
||||
|
||||
try:
|
||||
celery_app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
|
||||
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
|
||||
priority=priority,
|
||||
expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
|
||||
)
|
||||
except Exception:
|
||||
# Roll back the queued guard if task publish fails.
|
||||
redis_client.delete(queued_key)
|
||||
raise
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2, jitter=(0.0, 1.0))
|
||||
def _visit_chunks(
|
||||
*,
|
||||
@@ -632,8 +684,8 @@ def process_single_user_file_delete(
|
||||
ignore_result=True,
|
||||
)
|
||||
def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
|
||||
"""Scan for user files with PROJECT_SYNC status and enqueue per-file tasks."""
|
||||
task_logger.info("check_for_user_file_project_sync - Starting")
|
||||
"""Scan for user files needing project sync and enqueue per-file tasks."""
|
||||
task_logger.info("Starting")
|
||||
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
lock: RedisLock = redis_client.lock(
|
||||
@@ -645,7 +697,16 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
|
||||
return None
|
||||
|
||||
enqueued = 0
|
||||
skipped_guard = 0
|
||||
try:
|
||||
queue_depth = get_user_file_project_sync_queue_depth(self.app)
|
||||
if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:
|
||||
task_logger.warning(
|
||||
f"Queue depth {queue_depth} exceeds "
|
||||
f"{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}, skipping enqueue for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
user_file_ids = (
|
||||
db_session.execute(
|
||||
@@ -661,19 +722,23 @@ def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
|
||||
)
|
||||
|
||||
for user_file_id in user_file_ids:
|
||||
self.app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
|
||||
kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
|
||||
if not enqueue_user_file_project_sync_task(
|
||||
celery_app=self.app,
|
||||
redis_client=redis_client,
|
||||
user_file_id=user_file_id,
|
||||
tenant_id=tenant_id,
|
||||
priority=OnyxCeleryPriority.HIGH,
|
||||
)
|
||||
):
|
||||
skipped_guard += 1
|
||||
continue
|
||||
enqueued += 1
|
||||
finally:
|
||||
if lock.owned():
|
||||
lock.release()
|
||||
|
||||
task_logger.info(
|
||||
f"check_for_user_file_project_sync - Enqueued {enqueued} tasks for tenant={tenant_id}"
|
||||
f"Enqueued {enqueued} "
|
||||
f"Skipped guard {skipped_guard} tasks for tenant={tenant_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -692,6 +757,8 @@ def process_single_user_file_project_sync(
|
||||
)
|
||||
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
redis_client.delete(_user_file_project_sync_queued_key(user_file_id))
|
||||
|
||||
file_lock: RedisLock = redis_client.lock(
|
||||
_user_file_project_sync_lock_key(user_file_id),
|
||||
timeout=CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT,
|
||||
|
||||
@@ -58,6 +58,8 @@ from onyx.file_store.document_batch_storage import DocumentBatchStorage
|
||||
from onyx.file_store.document_batch_storage import get_document_batch_storage
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
|
||||
from onyx.indexing.postgres_sanitization import sanitize_document_for_postgres
|
||||
from onyx.indexing.postgres_sanitization import sanitize_hierarchy_nodes_for_postgres
|
||||
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
|
||||
from onyx.redis.redis_hierarchy import ensure_source_node_exists
|
||||
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
|
||||
@@ -156,36 +158,7 @@ def strip_null_characters(doc_batch: list[Document]) -> list[Document]:
|
||||
logger.warning(
|
||||
f"doc {doc.id} too large, Document size: {sys.getsizeof(doc)}"
|
||||
)
|
||||
cleaned_doc = doc.model_copy()
|
||||
|
||||
# Postgres cannot handle NUL characters in text fields
|
||||
if "\x00" in cleaned_doc.id:
|
||||
logger.warning(f"NUL characters found in document ID: {cleaned_doc.id}")
|
||||
cleaned_doc.id = cleaned_doc.id.replace("\x00", "")
|
||||
|
||||
if cleaned_doc.title and "\x00" in cleaned_doc.title:
|
||||
logger.warning(
|
||||
f"NUL characters found in document title: {cleaned_doc.title}"
|
||||
)
|
||||
cleaned_doc.title = cleaned_doc.title.replace("\x00", "")
|
||||
|
||||
if "\x00" in cleaned_doc.semantic_identifier:
|
||||
logger.warning(
|
||||
f"NUL characters found in document semantic identifier: {cleaned_doc.semantic_identifier}"
|
||||
)
|
||||
cleaned_doc.semantic_identifier = cleaned_doc.semantic_identifier.replace(
|
||||
"\x00", ""
|
||||
)
|
||||
|
||||
for section in cleaned_doc.sections:
|
||||
if section.link is not None:
|
||||
section.link = section.link.replace("\x00", "")
|
||||
|
||||
# since text can be longer, just replace to avoid double scan
|
||||
if isinstance(section, TextSection) and section.text is not None:
|
||||
section.text = section.text.replace("\x00", "")
|
||||
|
||||
cleaned_batch.append(cleaned_doc)
|
||||
cleaned_batch.append(sanitize_document_for_postgres(doc))
|
||||
|
||||
return cleaned_batch
|
||||
|
||||
@@ -602,10 +575,13 @@ def connector_document_extraction(
|
||||
|
||||
# Process hierarchy nodes batch - upsert to Postgres and cache in Redis
|
||||
if hierarchy_node_batch:
|
||||
hierarchy_node_batch_cleaned = (
|
||||
sanitize_hierarchy_nodes_for_postgres(hierarchy_node_batch)
|
||||
)
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
upserted_nodes = upsert_hierarchy_nodes_batch(
|
||||
db_session=db_session,
|
||||
nodes=hierarchy_node_batch,
|
||||
nodes=hierarchy_node_batch_cleaned,
|
||||
source=db_connector.source,
|
||||
commit=True,
|
||||
is_connector_public=is_connector_public,
|
||||
@@ -624,7 +600,7 @@ def connector_document_extraction(
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Persisted and cached {len(hierarchy_node_batch)} hierarchy nodes "
|
||||
f"Persisted and cached {len(hierarchy_node_batch_cleaned)} hierarchy nodes "
|
||||
f"for attempt={index_attempt_id}"
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.context.search.models import SearchDocsResponse
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.memory import add_memory
|
||||
from onyx.db.memory import update_memory_at_index
|
||||
from onyx.db.memory import UserMemoryContext
|
||||
@@ -656,7 +657,12 @@ def run_llm_loop(
|
||||
fallback_extraction_attempted: bool = False
|
||||
citation_mapping: dict[int, str] = {} # Maps citation_num -> document_id/URL
|
||||
|
||||
default_base_system_prompt: str = get_default_base_system_prompt(db_session)
|
||||
# Fetch this in a short-lived session so the long-running stream loop does
|
||||
# not pin a connection just to keep read state alive.
|
||||
with get_session_with_current_tenant() as prompt_db_session:
|
||||
default_base_system_prompt: str = get_default_base_system_prompt(
|
||||
prompt_db_session
|
||||
)
|
||||
system_prompt = None
|
||||
custom_agent_prompt_msg = None
|
||||
|
||||
|
||||
@@ -856,6 +856,11 @@ def handle_stream_message_objects(
|
||||
reserved_tokens=reserved_token_count,
|
||||
)
|
||||
|
||||
# Release any read transaction before entering the long-running LLM stream.
|
||||
# Without this, the request-scoped session can keep a connection checked out
|
||||
# for the full stream duration.
|
||||
db_session.commit()
|
||||
|
||||
# The stream generator can resume on a different worker thread after early yields.
|
||||
# Set this right before launching the LLM loop so run_in_background copies the right context.
|
||||
if new_msg_req.mock_llm_response is not None:
|
||||
|
||||
@@ -210,10 +210,10 @@ AUTH_COOKIE_EXPIRE_TIME_SECONDS = int(
|
||||
REQUIRE_EMAIL_VERIFICATION = (
|
||||
os.environ.get("REQUIRE_EMAIL_VERIFICATION", "").lower() == "true"
|
||||
)
|
||||
SMTP_SERVER = os.environ.get("SMTP_SERVER") or "smtp.gmail.com"
|
||||
SMTP_SERVER = os.environ.get("SMTP_SERVER") or ""
|
||||
SMTP_PORT = int(os.environ.get("SMTP_PORT") or "587")
|
||||
SMTP_USER = os.environ.get("SMTP_USER", "your-email@gmail.com")
|
||||
SMTP_PASS = os.environ.get("SMTP_PASS", "your-gmail-password")
|
||||
SMTP_USER = os.environ.get("SMTP_USER") or ""
|
||||
SMTP_PASS = os.environ.get("SMTP_PASS") or ""
|
||||
EMAIL_FROM = os.environ.get("EMAIL_FROM") or SMTP_USER
|
||||
|
||||
SENDGRID_API_KEY = os.environ.get("SENDGRID_API_KEY") or ""
|
||||
|
||||
@@ -167,6 +167,14 @@ CELERY_USER_FILE_PROCESSING_TASK_EXPIRES = 60 # 1 minute (in seconds)
|
||||
# beat generator stops adding more. Prevents unbounded queue growth when workers
|
||||
# fall behind.
|
||||
USER_FILE_PROCESSING_MAX_QUEUE_DEPTH = 500
|
||||
# How long a queued user-file-project-sync task remains valid.
|
||||
# Should be short enough to discard stale queue entries under load while still
|
||||
# allowing workers enough time to pick up new tasks.
|
||||
CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES = 60 # 1 minute (in seconds)
|
||||
|
||||
# Max queue depth before user-file-project-sync producers stop enqueuing.
|
||||
# This applies backpressure when workers are falling behind.
|
||||
USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH = 500
|
||||
|
||||
CELERY_USER_FILE_PROJECT_SYNC_LOCK_TIMEOUT = 5 * 60 # 5 minutes (in seconds)
|
||||
|
||||
@@ -459,6 +467,7 @@ class OnyxRedisLocks:
|
||||
USER_FILE_QUEUED_PREFIX = "da_lock:user_file_queued"
|
||||
USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
|
||||
USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
|
||||
USER_FILE_PROJECT_SYNC_QUEUED_PREFIX = "da_lock:user_file_project_sync_queued"
|
||||
USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
|
||||
USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"
|
||||
|
||||
|
||||
@@ -16,6 +16,22 @@ from onyx.utils.retry_wrapper import retry_builder
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_RATE_LIMIT_REASONS = {"userRateLimitExceeded", "rateLimitExceeded"}
|
||||
|
||||
|
||||
def _is_rate_limit_error(error: HttpError) -> bool:
|
||||
"""Google sometimes returns rate-limit errors as 403 with reason
|
||||
'userRateLimitExceeded' instead of 429. This helper detects both."""
|
||||
if error.resp.status == 429:
|
||||
return True
|
||||
if error.resp.status != 403:
|
||||
return False
|
||||
error_details = getattr(error, "error_details", None) or []
|
||||
for detail in error_details:
|
||||
if isinstance(detail, dict) and detail.get("reason") in _RATE_LIMIT_REASONS:
|
||||
return True
|
||||
return "userRateLimitExceeded" in str(error) or "rateLimitExceeded" in str(error)
|
||||
|
||||
|
||||
# Google Drive APIs are quite flakey and may 500 for an
|
||||
# extended period of time. This is now addressed by checkpointing.
|
||||
@@ -57,7 +73,7 @@ def _execute_with_retry(request: Any) -> Any:
|
||||
except HttpError as error:
|
||||
attempt += 1
|
||||
|
||||
if error.resp.status == 429:
|
||||
if _is_rate_limit_error(error):
|
||||
# Attempt to get 'Retry-After' from headers
|
||||
retry_after = error.resp.get("Retry-After")
|
||||
if retry_after:
|
||||
@@ -140,16 +156,16 @@ def _execute_single_retrieval(
|
||||
)
|
||||
logger.error(f"Error executing request: {e}")
|
||||
raise e
|
||||
elif _is_rate_limit_error(e):
|
||||
results = _execute_with_retry(
|
||||
lambda: retrieval_function(**request_kwargs).execute()
|
||||
)
|
||||
elif e.resp.status == 404 or e.resp.status == 403:
|
||||
if continue_on_404_or_403:
|
||||
logger.debug(f"Error executing request: {e}")
|
||||
results = {}
|
||||
else:
|
||||
raise e
|
||||
elif e.resp.status == 429:
|
||||
results = _execute_with_retry(
|
||||
lambda: retrieval_function(**request_kwargs).execute()
|
||||
)
|
||||
else:
|
||||
logger.exception("Error executing request:")
|
||||
raise e
|
||||
|
||||
96
backend/onyx/connectors/microsoft_graph_env.py
Normal file
96
backend/onyx/connectors/microsoft_graph_env.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Inverse mapping from user-facing Microsoft host URLs to the SDK's AzureEnvironment.
|
||||
|
||||
The office365 library's GraphClient requires an ``AzureEnvironment`` string
|
||||
(e.g. ``"Global"``, ``"GCC High"``) to route requests to the correct national
|
||||
cloud. Our connectors instead expose free-text ``authority_host`` and
|
||||
``graph_api_host`` fields so the frontend doesn't need to know about SDK
|
||||
internals.
|
||||
|
||||
This module bridges the gap: given the two host URLs the user configured, it
|
||||
resolves the matching ``AzureEnvironment`` value (and the implied SharePoint
|
||||
domain suffix) so callers can pass ``environment=…`` to ``GraphClient``.
|
||||
"""
|
||||
|
||||
from office365.graph_client import AzureEnvironment # type: ignore[import-untyped]
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
|
||||
|
||||
class MicrosoftGraphEnvironment(BaseModel):
|
||||
"""One row of the inverse mapping."""
|
||||
|
||||
environment: str
|
||||
graph_host: str
|
||||
authority_host: str
|
||||
sharepoint_domain_suffix: str
|
||||
|
||||
|
||||
_ENVIRONMENTS: list[MicrosoftGraphEnvironment] = [
|
||||
MicrosoftGraphEnvironment(
|
||||
environment=AzureEnvironment.Global,
|
||||
graph_host="https://graph.microsoft.com",
|
||||
authority_host="https://login.microsoftonline.com",
|
||||
sharepoint_domain_suffix="sharepoint.com",
|
||||
),
|
||||
MicrosoftGraphEnvironment(
|
||||
environment=AzureEnvironment.USGovernmentHigh,
|
||||
graph_host="https://graph.microsoft.us",
|
||||
authority_host="https://login.microsoftonline.us",
|
||||
sharepoint_domain_suffix="sharepoint.us",
|
||||
),
|
||||
MicrosoftGraphEnvironment(
|
||||
environment=AzureEnvironment.USGovernmentDoD,
|
||||
graph_host="https://dod-graph.microsoft.us",
|
||||
authority_host="https://login.microsoftonline.us",
|
||||
sharepoint_domain_suffix="sharepoint.us",
|
||||
),
|
||||
MicrosoftGraphEnvironment(
|
||||
environment=AzureEnvironment.China,
|
||||
graph_host="https://microsoftgraph.chinacloudapi.cn",
|
||||
authority_host="https://login.chinacloudapi.cn",
|
||||
sharepoint_domain_suffix="sharepoint.cn",
|
||||
),
|
||||
MicrosoftGraphEnvironment(
|
||||
environment=AzureEnvironment.Germany,
|
||||
graph_host="https://graph.microsoft.de",
|
||||
authority_host="https://login.microsoftonline.de",
|
||||
sharepoint_domain_suffix="sharepoint.de",
|
||||
),
|
||||
]
|
||||
|
||||
_GRAPH_HOST_INDEX: dict[str, MicrosoftGraphEnvironment] = {
|
||||
env.graph_host: env for env in _ENVIRONMENTS
|
||||
}
|
||||
|
||||
|
||||
def resolve_microsoft_environment(
|
||||
graph_api_host: str,
|
||||
authority_host: str,
|
||||
) -> MicrosoftGraphEnvironment:
|
||||
"""Return the ``MicrosoftGraphEnvironment`` that matches the supplied hosts.
|
||||
|
||||
Raises ``ConnectorValidationError`` when the combination is unknown or
|
||||
internally inconsistent (e.g. a GCC-High graph host paired with a
|
||||
commercial authority host).
|
||||
"""
|
||||
graph_api_host = graph_api_host.rstrip("/")
|
||||
authority_host = authority_host.rstrip("/")
|
||||
|
||||
env = _GRAPH_HOST_INDEX.get(graph_api_host)
|
||||
if env is None:
|
||||
known = ", ".join(sorted(_GRAPH_HOST_INDEX))
|
||||
raise ConnectorValidationError(
|
||||
f"Unsupported Microsoft Graph API host '{graph_api_host}'. "
|
||||
f"Recognised hosts: {known}"
|
||||
)
|
||||
|
||||
if env.authority_host != authority_host:
|
||||
raise ConnectorValidationError(
|
||||
f"Authority host '{authority_host}' is inconsistent with "
|
||||
f"graph API host '{graph_api_host}'. "
|
||||
f"Expected authority host '{env.authority_host}' "
|
||||
f"for the {env.environment} environment."
|
||||
)
|
||||
|
||||
return env
|
||||
@@ -6,6 +6,7 @@ from typing import cast
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from pydantic import field_validator
|
||||
from pydantic import model_validator
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
@@ -167,6 +168,14 @@ class DocumentBase(BaseModel):
|
||||
# list of strings.
|
||||
metadata: dict[str, str | list[str]]
|
||||
|
||||
@field_validator("metadata", mode="before")
|
||||
@classmethod
|
||||
def _coerce_metadata_values(cls, v: dict[str, Any]) -> dict[str, str | list[str]]:
|
||||
return {
|
||||
key: [str(item) for item in val] if isinstance(val, list) else str(val)
|
||||
for key, val in v.items()
|
||||
}
|
||||
|
||||
# UTC time
|
||||
doc_updated_at: datetime | None = None
|
||||
chunk_count: int | None = None
|
||||
|
||||
@@ -47,6 +47,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import IndexingHeartbeatInterface
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnectorWithPermSync
|
||||
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import ConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
@@ -146,7 +147,9 @@ class DriveItemData(BaseModel):
|
||||
self.id,
|
||||
ResourcePath("items", ResourcePath(self.drive_id, ResourcePath("drives"))),
|
||||
)
|
||||
return DriveItem(graph_client, path)
|
||||
item = DriveItem(graph_client, path)
|
||||
item.set_property("id", self.id)
|
||||
return item
|
||||
|
||||
|
||||
# The office365 library's ClientContext caches the access token from its
|
||||
@@ -837,10 +840,20 @@ class SharepointConnector(
|
||||
self._cached_rest_ctx: ClientContext | None = None
|
||||
self._cached_rest_ctx_url: str | None = None
|
||||
self._cached_rest_ctx_created_at: float = 0.0
|
||||
self.authority_host = authority_host.rstrip("/")
|
||||
self.graph_api_host = graph_api_host.rstrip("/")
|
||||
|
||||
resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)
|
||||
self._azure_environment = resolved_env.environment
|
||||
self.authority_host = resolved_env.authority_host
|
||||
self.graph_api_host = resolved_env.graph_host
|
||||
self.graph_api_base = f"{self.graph_api_host}/v1.0"
|
||||
self.sharepoint_domain_suffix = sharepoint_domain_suffix
|
||||
self.sharepoint_domain_suffix = resolved_env.sharepoint_domain_suffix
|
||||
if sharepoint_domain_suffix != resolved_env.sharepoint_domain_suffix:
|
||||
logger.warning(
|
||||
f"Configured sharepoint_domain_suffix '{sharepoint_domain_suffix}' "
|
||||
f"differs from the expected suffix '{resolved_env.sharepoint_domain_suffix}' "
|
||||
f"for the {resolved_env.environment} environment. "
|
||||
f"Using '{resolved_env.sharepoint_domain_suffix}'."
|
||||
)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
# Validate that at least one content type is enabled
|
||||
@@ -1592,6 +1605,7 @@ class SharepointConnector(
|
||||
if certificate_data is None:
|
||||
raise RuntimeError("Failed to load certificate")
|
||||
|
||||
logger.info(f"Creating MSAL app with authority url {authority_url}")
|
||||
self.msal_app = msal.ConfidentialClientApplication(
|
||||
authority=authority_url,
|
||||
client_id=sp_client_id,
|
||||
@@ -1623,7 +1637,9 @@ class SharepointConnector(
|
||||
raise ConnectorValidationError("Failed to acquire token for graph")
|
||||
return token
|
||||
|
||||
self._graph_client = GraphClient(_acquire_token_for_graph)
|
||||
self._graph_client = GraphClient(
|
||||
_acquire_token_for_graph, environment=self._azure_environment
|
||||
)
|
||||
if auth_method == SharepointAuthMethod.CERTIFICATE.value:
|
||||
org = self.graph_client.organization.get().execute_query()
|
||||
if not org or len(org) == 0:
|
||||
|
||||
@@ -11,6 +11,7 @@ from dateutil import parser
|
||||
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
@@ -258,3 +259,21 @@ class SlabConnector(LoadConnector, PollConnector, SlimConnectorWithPermSync):
|
||||
slim_doc_batch = []
|
||||
if slim_doc_batch:
|
||||
yield slim_doc_batch
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
"""
|
||||
Very basic validation, we could do more here
|
||||
"""
|
||||
if not self.base_url.startswith("https://") and not self.base_url.startswith(
|
||||
"http://"
|
||||
):
|
||||
raise ConnectorValidationError(
|
||||
"Base URL must start with https:// or http://"
|
||||
)
|
||||
|
||||
try:
|
||||
get_all_post_ids(self.slab_bot_token)
|
||||
except ConnectorMissingCredentialError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Failed to fetch posts from Slab: {e}")
|
||||
|
||||
@@ -23,6 +23,7 @@ from onyx.connectors.interfaces import CheckpointOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnectorWithPermSync
|
||||
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment
|
||||
from onyx.connectors.models import ConnectorCheckpoint
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import ConnectorMissingCredentialError
|
||||
@@ -73,8 +74,11 @@ class TeamsConnector(
|
||||
self.msal_app: msal.ConfidentialClientApplication | None = None
|
||||
self.max_workers = max_workers
|
||||
self.requested_team_list: list[str] = teams
|
||||
self.authority_host = authority_host.rstrip("/")
|
||||
self.graph_api_host = graph_api_host.rstrip("/")
|
||||
|
||||
resolved_env = resolve_microsoft_environment(graph_api_host, authority_host)
|
||||
self._azure_environment = resolved_env.environment
|
||||
self.authority_host = resolved_env.authority_host
|
||||
self.graph_api_host = resolved_env.graph_host
|
||||
|
||||
# impls for BaseConnector
|
||||
|
||||
@@ -106,7 +110,9 @@ class TeamsConnector(
|
||||
|
||||
return token
|
||||
|
||||
self.graph_client = GraphClient(_acquire_token_func)
|
||||
self.graph_client = GraphClient(
|
||||
_acquire_token_func, environment=self._azure_environment
|
||||
)
|
||||
return None
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
|
||||
21
backend/onyx/db/code_interpreter.py
Normal file
21
backend/onyx/db/code_interpreter.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.models import CodeInterpreterServer
|
||||
|
||||
|
||||
def fetch_code_interpreter_server(
|
||||
db_session: Session,
|
||||
) -> CodeInterpreterServer:
|
||||
server = db_session.scalars(select(CodeInterpreterServer)).one()
|
||||
return server
|
||||
|
||||
|
||||
def update_code_interpreter_server_enabled(
|
||||
db_session: Session,
|
||||
enabled: bool,
|
||||
) -> CodeInterpreterServer:
|
||||
server = db_session.scalars(select(CodeInterpreterServer)).one()
|
||||
server.server_enabled = enabled
|
||||
db_session.commit()
|
||||
return server
|
||||
@@ -4940,6 +4940,11 @@ class ScimUserMapping(Base):
|
||||
ForeignKey("user.id", ondelete="CASCADE"), unique=True, nullable=False
|
||||
)
|
||||
scim_username: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
department: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
manager: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
given_name: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
family_name: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
scim_emails_json: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False
|
||||
|
||||
@@ -2,6 +2,7 @@ import random
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from logging import getLogger
|
||||
from uuid import UUID
|
||||
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.db.chat import create_chat_session
|
||||
@@ -13,18 +14,26 @@ from onyx.db.models import ChatSession
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def seed_chat_history(num_sessions: int, num_messages: int, days: int) -> None:
|
||||
def seed_chat_history(
|
||||
num_sessions: int,
|
||||
num_messages: int,
|
||||
days: int,
|
||||
user_id: UUID | None = None,
|
||||
persona_id: int | None = None,
|
||||
) -> None:
|
||||
"""Utility function to seed chat history for testing.
|
||||
|
||||
num_sessions: the number of sessions to seed
|
||||
num_messages: the number of messages to seed per sessions
|
||||
days: the number of days looking backwards from the current time over which to randomize
|
||||
the times.
|
||||
user_id: optional user to associate with sessions
|
||||
persona_id: optional persona/assistant to associate with sessions
|
||||
"""
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
logger.info(f"Seeding {num_sessions} sessions.")
|
||||
for y in range(0, num_sessions):
|
||||
create_chat_session(db_session, f"pytest_session_{y}", None, None)
|
||||
create_chat_session(db_session, f"pytest_session_{y}", user_id, persona_id)
|
||||
|
||||
# randomize all session times
|
||||
logger.info(f"Seeding {num_messages} messages per session.")
|
||||
|
||||
@@ -121,6 +121,7 @@ class VespaDocumentUserFields:
|
||||
"""
|
||||
|
||||
user_projects: list[int] | None = None
|
||||
personas: list[int] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -148,6 +148,7 @@ class MetadataUpdateRequest(BaseModel):
|
||||
hidden: bool | None = None
|
||||
secondary_index_updated: bool | None = None
|
||||
project_ids: set[int] | None = None
|
||||
persona_ids: set[int] | None = None
|
||||
|
||||
|
||||
class IndexRetrievalFilters(BaseModel):
|
||||
|
||||
@@ -50,6 +50,7 @@ from onyx.document_index.opensearch.schema import DocumentSchema
|
||||
from onyx.document_index.opensearch.schema import get_opensearch_doc_chunk_id
|
||||
from onyx.document_index.opensearch.schema import GLOBAL_BOOST_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import HIDDEN_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import PERSONAS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.schema import USER_PROJECTS_FIELD_NAME
|
||||
from onyx.document_index.opensearch.search import DocumentQuery
|
||||
from onyx.document_index.opensearch.search import (
|
||||
@@ -215,6 +216,7 @@ def _convert_onyx_chunk_to_opensearch_document(
|
||||
# OpenSearch and it will not store any data at all for this field, which
|
||||
# is different from supplying an empty list.
|
||||
user_projects=chunk.user_project or None,
|
||||
personas=chunk.personas or None,
|
||||
primary_owners=get_experts_stores_representations(
|
||||
chunk.source_document.primary_owners
|
||||
),
|
||||
@@ -362,6 +364,11 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
|
||||
if user_fields and user_fields.user_projects
|
||||
else None
|
||||
),
|
||||
persona_ids=(
|
||||
set(user_fields.personas)
|
||||
if user_fields and user_fields.personas
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -709,6 +716,10 @@ class OpenSearchDocumentIndex(DocumentIndex):
|
||||
properties_to_update[USER_PROJECTS_FIELD_NAME] = list(
|
||||
update_request.project_ids
|
||||
)
|
||||
if update_request.persona_ids is not None:
|
||||
properties_to_update[PERSONAS_FIELD_NAME] = list(
|
||||
update_request.persona_ids
|
||||
)
|
||||
|
||||
if not properties_to_update:
|
||||
if len(update_request.document_ids) > 1:
|
||||
|
||||
@@ -41,6 +41,7 @@ IMAGE_FILE_ID_FIELD_NAME = "image_file_id"
|
||||
SOURCE_LINKS_FIELD_NAME = "source_links"
|
||||
DOCUMENT_SETS_FIELD_NAME = "document_sets"
|
||||
USER_PROJECTS_FIELD_NAME = "user_projects"
|
||||
PERSONAS_FIELD_NAME = "personas"
|
||||
DOCUMENT_ID_FIELD_NAME = "document_id"
|
||||
CHUNK_INDEX_FIELD_NAME = "chunk_index"
|
||||
MAX_CHUNK_SIZE_FIELD_NAME = "max_chunk_size"
|
||||
@@ -156,6 +157,7 @@ class DocumentChunk(BaseModel):
|
||||
|
||||
document_sets: list[str] | None = None
|
||||
user_projects: list[int] | None = None
|
||||
personas: list[int] | None = None
|
||||
primary_owners: list[str] | None = None
|
||||
secondary_owners: list[str] | None = None
|
||||
|
||||
@@ -485,6 +487,7 @@ class DocumentSchema:
|
||||
# Product-specific fields.
|
||||
DOCUMENT_SETS_FIELD_NAME: {"type": "keyword"},
|
||||
USER_PROJECTS_FIELD_NAME: {"type": "integer"},
|
||||
PERSONAS_FIELD_NAME: {"type": "integer"},
|
||||
PRIMARY_OWNERS_FIELD_NAME: {"type": "keyword"},
|
||||
SECONDARY_OWNERS_FIELD_NAME: {"type": "keyword"},
|
||||
# OpenSearch metadata fields.
|
||||
|
||||
@@ -181,6 +181,11 @@ schema {{ schema_name }} {
|
||||
rank: filter
|
||||
attribute: fast-search
|
||||
}
|
||||
field personas type array<int> {
|
||||
indexing: summary | attribute
|
||||
rank: filter
|
||||
attribute: fast-search
|
||||
}
|
||||
}
|
||||
|
||||
# If using different tokenization settings, the fieldset has to be removed, and the field must
|
||||
|
||||
@@ -689,6 +689,9 @@ class VespaIndex(DocumentIndex):
|
||||
project_ids: set[int] | None = None
|
||||
if user_fields is not None and user_fields.user_projects is not None:
|
||||
project_ids = set(user_fields.user_projects)
|
||||
persona_ids: set[int] | None = None
|
||||
if user_fields is not None and user_fields.personas is not None:
|
||||
persona_ids = set(user_fields.personas)
|
||||
update_request = MetadataUpdateRequest(
|
||||
document_ids=[doc_id],
|
||||
doc_id_to_chunk_cnt={
|
||||
@@ -699,6 +702,7 @@ class VespaIndex(DocumentIndex):
|
||||
boost=fields.boost if fields is not None else None,
|
||||
hidden=fields.hidden if fields is not None else None,
|
||||
project_ids=project_ids,
|
||||
persona_ids=persona_ids,
|
||||
)
|
||||
|
||||
vespa_document_index.update([update_request])
|
||||
|
||||
@@ -46,6 +46,7 @@ from onyx.document_index.vespa_constants import METADATA
|
||||
from onyx.document_index.vespa_constants import METADATA_LIST
|
||||
from onyx.document_index.vespa_constants import METADATA_SUFFIX
|
||||
from onyx.document_index.vespa_constants import NUM_THREADS
|
||||
from onyx.document_index.vespa_constants import PERSONAS
|
||||
from onyx.document_index.vespa_constants import PRIMARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SECONDARY_OWNERS
|
||||
from onyx.document_index.vespa_constants import SECTION_CONTINUATION
|
||||
@@ -218,6 +219,7 @@ def _index_vespa_chunk(
|
||||
# still called `image_file_name` in Vespa for backwards compatibility
|
||||
IMAGE_FILE_NAME: chunk.image_file_id,
|
||||
USER_PROJECT: chunk.user_project if chunk.user_project is not None else [],
|
||||
PERSONAS: chunk.personas if chunk.personas is not None else [],
|
||||
BOOST: chunk.boost,
|
||||
AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor,
|
||||
}
|
||||
|
||||
@@ -183,6 +183,10 @@ def _update_single_chunk(
|
||||
model_config = {"frozen": True}
|
||||
assign: list[int]
|
||||
|
||||
class _Personas(BaseModel):
|
||||
model_config = {"frozen": True}
|
||||
assign: list[int]
|
||||
|
||||
class _VespaPutFields(BaseModel):
|
||||
model_config = {"frozen": True}
|
||||
# The names of these fields are based the Vespa schema. Changes to the
|
||||
@@ -193,6 +197,7 @@ def _update_single_chunk(
|
||||
access_control_list: _AccessControl | None = None
|
||||
hidden: _Hidden | None = None
|
||||
user_project: _UserProjects | None = None
|
||||
personas: _Personas | None = None
|
||||
|
||||
class _VespaPutRequest(BaseModel):
|
||||
model_config = {"frozen": True}
|
||||
@@ -227,6 +232,11 @@ def _update_single_chunk(
|
||||
if update_request.project_ids is not None
|
||||
else None
|
||||
)
|
||||
personas_update: _Personas | None = (
|
||||
_Personas(assign=list(update_request.persona_ids))
|
||||
if update_request.persona_ids is not None
|
||||
else None
|
||||
)
|
||||
|
||||
vespa_put_fields = _VespaPutFields(
|
||||
boost=boost_update,
|
||||
@@ -234,6 +244,7 @@ def _update_single_chunk(
|
||||
access_control_list=access_update,
|
||||
hidden=hidden_update,
|
||||
user_project=user_projects_update,
|
||||
personas=personas_update,
|
||||
)
|
||||
|
||||
vespa_put_request = _VespaPutRequest(
|
||||
|
||||
@@ -58,6 +58,7 @@ DOCUMENT_SETS = "document_sets"
|
||||
USER_FILE = "user_file"
|
||||
USER_FOLDER = "user_folder"
|
||||
USER_PROJECT = "user_project"
|
||||
PERSONAS = "personas"
|
||||
LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids"
|
||||
METADATA = "metadata"
|
||||
METADATA_LIST = "metadata_list"
|
||||
|
||||
@@ -12,6 +12,9 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class AzureImageGenerationProvider(ImageGenerationProvider):
|
||||
_GPT_IMAGE_MODEL_PREFIX = "gpt-image-"
|
||||
_DALL_E_2_MODEL_NAME = "dall-e-2"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
@@ -53,6 +56,25 @@ class AzureImageGenerationProvider(ImageGenerationProvider):
|
||||
deployment_name=credentials.deployment_name,
|
||||
)
|
||||
|
||||
@property
|
||||
def supports_reference_images(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def max_reference_images(self) -> int:
|
||||
# Azure GPT image models support up to 16 input images for edits.
|
||||
return 16
|
||||
|
||||
def _normalize_model_name(self, model: str) -> str:
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
def _model_supports_image_edits(self, model: str) -> bool:
|
||||
normalized_model = self._normalize_model_name(model)
|
||||
return (
|
||||
normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)
|
||||
or normalized_model == self._DALL_E_2_MODEL_NAME
|
||||
)
|
||||
|
||||
def generate_image(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -60,14 +82,44 @@ class AzureImageGenerationProvider(ImageGenerationProvider):
|
||||
size: str,
|
||||
n: int,
|
||||
quality: str | None = None,
|
||||
reference_images: list[ReferenceImage] | None = None, # noqa: ARG002
|
||||
reference_images: list[ReferenceImage] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ImageGenerationResponse:
|
||||
from litellm import image_generation
|
||||
|
||||
deployment = self._deployment_name or model
|
||||
model_name = f"azure/{deployment}"
|
||||
|
||||
if reference_images:
|
||||
if not self._model_supports_image_edits(model):
|
||||
raise ValueError(
|
||||
f"Model '{model}' does not support image edits with reference images."
|
||||
)
|
||||
|
||||
normalized_model = self._normalize_model_name(model)
|
||||
if (
|
||||
normalized_model == self._DALL_E_2_MODEL_NAME
|
||||
and len(reference_images) > 1
|
||||
):
|
||||
raise ValueError(
|
||||
"Model 'dall-e-2' only supports a single reference image for edits."
|
||||
)
|
||||
|
||||
from litellm import image_edit
|
||||
|
||||
return image_edit(
|
||||
image=[image.data for image in reference_images],
|
||||
prompt=prompt,
|
||||
model=model_name,
|
||||
api_key=self._api_key,
|
||||
api_base=self._api_base,
|
||||
api_version=self._api_version,
|
||||
size=size,
|
||||
n=n,
|
||||
quality=quality,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
from litellm import image_generation
|
||||
|
||||
return image_generation(
|
||||
prompt=prompt,
|
||||
model=model_name,
|
||||
|
||||
@@ -12,6 +12,9 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class OpenAIImageGenerationProvider(ImageGenerationProvider):
|
||||
_GPT_IMAGE_MODEL_PREFIX = "gpt-image-"
|
||||
_DALL_E_2_MODEL_NAME = "dall-e-2"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
@@ -39,6 +42,25 @@ class OpenAIImageGenerationProvider(ImageGenerationProvider):
|
||||
api_base=credentials.api_base,
|
||||
)
|
||||
|
||||
@property
|
||||
def supports_reference_images(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def max_reference_images(self) -> int:
|
||||
# GPT image models support up to 16 input images for edits.
|
||||
return 16
|
||||
|
||||
def _normalize_model_name(self, model: str) -> str:
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
def _model_supports_image_edits(self, model: str) -> bool:
|
||||
normalized_model = self._normalize_model_name(model)
|
||||
return (
|
||||
normalized_model.startswith(self._GPT_IMAGE_MODEL_PREFIX)
|
||||
or normalized_model == self._DALL_E_2_MODEL_NAME
|
||||
)
|
||||
|
||||
def generate_image(
|
||||
self,
|
||||
prompt: str,
|
||||
@@ -46,9 +68,38 @@ class OpenAIImageGenerationProvider(ImageGenerationProvider):
|
||||
size: str,
|
||||
n: int,
|
||||
quality: str | None = None,
|
||||
reference_images: list[ReferenceImage] | None = None, # noqa: ARG002
|
||||
reference_images: list[ReferenceImage] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ImageGenerationResponse:
|
||||
if reference_images:
|
||||
if not self._model_supports_image_edits(model):
|
||||
raise ValueError(
|
||||
f"Model '{model}' does not support image edits with reference images."
|
||||
)
|
||||
|
||||
normalized_model = self._normalize_model_name(model)
|
||||
if (
|
||||
normalized_model == self._DALL_E_2_MODEL_NAME
|
||||
and len(reference_images) > 1
|
||||
):
|
||||
raise ValueError(
|
||||
"Model 'dall-e-2' only supports a single reference image for edits."
|
||||
)
|
||||
|
||||
from litellm import image_edit
|
||||
|
||||
return image_edit(
|
||||
image=[image.data for image in reference_images],
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
api_key=self._api_key,
|
||||
api_base=self._api_base,
|
||||
size=size,
|
||||
n=n,
|
||||
quality=quality,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
from litellm import image_generation
|
||||
|
||||
return image_generation(
|
||||
|
||||
@@ -146,6 +146,7 @@ class DocumentIndexingBatchAdapter:
|
||||
doc_id_to_document_set.get(chunk.source_document.id, [])
|
||||
),
|
||||
user_project=[],
|
||||
personas=[],
|
||||
boost=(
|
||||
context.id_to_boost_map[chunk.source_document.id]
|
||||
if chunk.source_document.id in context.id_to_boost_map
|
||||
|
||||
@@ -182,7 +182,7 @@ class UserFileIndexingAdapter:
|
||||
user_project=user_file_id_to_project_ids.get(
|
||||
chunk.source_document.id, []
|
||||
),
|
||||
# we are going to index userfiles only once, so we just set the boost to the default
|
||||
personas=[],
|
||||
boost=DEFAULT_BOOST,
|
||||
tenant_id=tenant_id,
|
||||
aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
|
||||
|
||||
@@ -49,6 +49,7 @@ from onyx.indexing.embedder import IndexingEmbedder
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
from onyx.indexing.models import IndexingBatchAdapter
|
||||
from onyx.indexing.models import UpdatableChunkData
|
||||
from onyx.indexing.postgres_sanitization import sanitize_documents_for_postgres
|
||||
from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
|
||||
from onyx.llm.factory import get_default_llm_with_vision
|
||||
from onyx.llm.factory import get_llm_for_contextual_rag
|
||||
@@ -228,6 +229,8 @@ def index_doc_batch_prepare(
|
||||
) -> DocumentBatchPrepareContext | None:
|
||||
"""Sets up the documents in the relational DB (source of truth) for permissions, metadata, etc.
|
||||
This preceeds indexing it into the actual document index."""
|
||||
documents = sanitize_documents_for_postgres(documents)
|
||||
|
||||
# Create a trimmed list of docs that don't have a newer updated at
|
||||
# Shortcuts the time-consuming flow on connector index retries
|
||||
document_ids: list[str] = [document.id for document in documents]
|
||||
|
||||
@@ -112,6 +112,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
|
||||
access: "DocumentAccess"
|
||||
document_sets: set[str]
|
||||
user_project: list[int]
|
||||
personas: list[int]
|
||||
boost: int
|
||||
aggregated_chunk_boost_factor: float
|
||||
# Full ancestor path from root hierarchy node to document's parent.
|
||||
@@ -126,6 +127,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
|
||||
access: "DocumentAccess",
|
||||
document_sets: set[str],
|
||||
user_project: list[int],
|
||||
personas: list[int],
|
||||
boost: int,
|
||||
aggregated_chunk_boost_factor: float,
|
||||
tenant_id: str,
|
||||
@@ -137,6 +139,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
|
||||
access=access,
|
||||
document_sets=document_sets,
|
||||
user_project=user_project,
|
||||
personas=personas,
|
||||
boost=boost,
|
||||
aggregated_chunk_boost_factor=aggregated_chunk_boost_factor,
|
||||
tenant_id=tenant_id,
|
||||
|
||||
150
backend/onyx/indexing/postgres_sanitization.py
Normal file
150
backend/onyx/indexing/postgres_sanitization.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from typing import Any
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
|
||||
|
||||
def _sanitize_string(value: str) -> str:
|
||||
return value.replace("\x00", "")
|
||||
|
||||
|
||||
def _sanitize_json_like(value: Any) -> Any:
|
||||
if isinstance(value, str):
|
||||
return _sanitize_string(value)
|
||||
|
||||
if isinstance(value, list):
|
||||
return [_sanitize_json_like(item) for item in value]
|
||||
|
||||
if isinstance(value, tuple):
|
||||
return tuple(_sanitize_json_like(item) for item in value)
|
||||
|
||||
if isinstance(value, dict):
|
||||
sanitized: dict[Any, Any] = {}
|
||||
for key, nested_value in value.items():
|
||||
cleaned_key = _sanitize_string(key) if isinstance(key, str) else key
|
||||
sanitized[cleaned_key] = _sanitize_json_like(nested_value)
|
||||
return sanitized
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def _sanitize_expert_info(expert: BasicExpertInfo) -> BasicExpertInfo:
|
||||
return expert.model_copy(
|
||||
update={
|
||||
"display_name": (
|
||||
_sanitize_string(expert.display_name)
|
||||
if expert.display_name is not None
|
||||
else None
|
||||
),
|
||||
"first_name": (
|
||||
_sanitize_string(expert.first_name)
|
||||
if expert.first_name is not None
|
||||
else None
|
||||
),
|
||||
"middle_initial": (
|
||||
_sanitize_string(expert.middle_initial)
|
||||
if expert.middle_initial is not None
|
||||
else None
|
||||
),
|
||||
"last_name": (
|
||||
_sanitize_string(expert.last_name)
|
||||
if expert.last_name is not None
|
||||
else None
|
||||
),
|
||||
"email": (
|
||||
_sanitize_string(expert.email) if expert.email is not None else None
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_external_access(external_access: ExternalAccess) -> ExternalAccess:
|
||||
return ExternalAccess(
|
||||
external_user_emails={
|
||||
_sanitize_string(email) for email in external_access.external_user_emails
|
||||
},
|
||||
external_user_group_ids={
|
||||
_sanitize_string(group_id)
|
||||
for group_id in external_access.external_user_group_ids
|
||||
},
|
||||
is_public=external_access.is_public,
|
||||
)
|
||||
|
||||
|
||||
def sanitize_document_for_postgres(document: Document) -> Document:
|
||||
cleaned_doc = document.model_copy(deep=True)
|
||||
|
||||
cleaned_doc.id = _sanitize_string(cleaned_doc.id)
|
||||
cleaned_doc.semantic_identifier = _sanitize_string(cleaned_doc.semantic_identifier)
|
||||
if cleaned_doc.title is not None:
|
||||
cleaned_doc.title = _sanitize_string(cleaned_doc.title)
|
||||
if cleaned_doc.parent_hierarchy_raw_node_id is not None:
|
||||
cleaned_doc.parent_hierarchy_raw_node_id = _sanitize_string(
|
||||
cleaned_doc.parent_hierarchy_raw_node_id
|
||||
)
|
||||
|
||||
cleaned_doc.metadata = {
|
||||
_sanitize_string(key): (
|
||||
[_sanitize_string(item) for item in value]
|
||||
if isinstance(value, list)
|
||||
else _sanitize_string(value)
|
||||
)
|
||||
for key, value in cleaned_doc.metadata.items()
|
||||
}
|
||||
|
||||
if cleaned_doc.doc_metadata is not None:
|
||||
cleaned_doc.doc_metadata = _sanitize_json_like(cleaned_doc.doc_metadata)
|
||||
|
||||
if cleaned_doc.primary_owners is not None:
|
||||
cleaned_doc.primary_owners = [
|
||||
_sanitize_expert_info(expert) for expert in cleaned_doc.primary_owners
|
||||
]
|
||||
if cleaned_doc.secondary_owners is not None:
|
||||
cleaned_doc.secondary_owners = [
|
||||
_sanitize_expert_info(expert) for expert in cleaned_doc.secondary_owners
|
||||
]
|
||||
|
||||
if cleaned_doc.external_access is not None:
|
||||
cleaned_doc.external_access = _sanitize_external_access(
|
||||
cleaned_doc.external_access
|
||||
)
|
||||
|
||||
for section in cleaned_doc.sections:
|
||||
if section.link is not None:
|
||||
section.link = _sanitize_string(section.link)
|
||||
if section.text is not None:
|
||||
section.text = _sanitize_string(section.text)
|
||||
if section.image_file_id is not None:
|
||||
section.image_file_id = _sanitize_string(section.image_file_id)
|
||||
|
||||
return cleaned_doc
|
||||
|
||||
|
||||
def sanitize_documents_for_postgres(documents: list[Document]) -> list[Document]:
|
||||
return [sanitize_document_for_postgres(document) for document in documents]
|
||||
|
||||
|
||||
def sanitize_hierarchy_node_for_postgres(node: HierarchyNode) -> HierarchyNode:
|
||||
cleaned_node = node.model_copy(deep=True)
|
||||
|
||||
cleaned_node.raw_node_id = _sanitize_string(cleaned_node.raw_node_id)
|
||||
cleaned_node.display_name = _sanitize_string(cleaned_node.display_name)
|
||||
if cleaned_node.raw_parent_id is not None:
|
||||
cleaned_node.raw_parent_id = _sanitize_string(cleaned_node.raw_parent_id)
|
||||
if cleaned_node.link is not None:
|
||||
cleaned_node.link = _sanitize_string(cleaned_node.link)
|
||||
|
||||
if cleaned_node.external_access is not None:
|
||||
cleaned_node.external_access = _sanitize_external_access(
|
||||
cleaned_node.external_access
|
||||
)
|
||||
|
||||
return cleaned_node
|
||||
|
||||
|
||||
def sanitize_hierarchy_nodes_for_postgres(
|
||||
nodes: list[HierarchyNode],
|
||||
) -> list[HierarchyNode]:
|
||||
return [sanitize_hierarchy_node_for_postgres(node) for node in nodes]
|
||||
@@ -97,6 +97,9 @@ from onyx.server.features.web_search.api import router as web_search_router
|
||||
from onyx.server.federated.api import router as federated_router
|
||||
from onyx.server.kg.api import admin_router as kg_admin_router
|
||||
from onyx.server.manage.administrative import router as admin_router
|
||||
from onyx.server.manage.code_interpreter.api import (
|
||||
admin_router as code_interpreter_admin_router,
|
||||
)
|
||||
from onyx.server.manage.discord_bot.api import router as discord_bot_router
|
||||
from onyx.server.manage.embedding.api import admin_router as embedding_admin_router
|
||||
from onyx.server.manage.embedding.api import basic_router as embedding_router
|
||||
@@ -421,6 +424,9 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
|
||||
include_router_with_global_prefix_prepended(application, llm_admin_router)
|
||||
include_router_with_global_prefix_prepended(application, kg_admin_router)
|
||||
include_router_with_global_prefix_prepended(application, llm_router)
|
||||
include_router_with_global_prefix_prepended(
|
||||
application, code_interpreter_admin_router
|
||||
)
|
||||
include_router_with_global_prefix_prepended(
|
||||
application, image_generation_admin_router
|
||||
)
|
||||
|
||||
@@ -1,14 +1,68 @@
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from mistune import create_markdown
|
||||
from mistune import HTMLRenderer
|
||||
|
||||
_CITATION_LINK_PATTERN = re.compile(r"\[\[\d+\]\]\(")
|
||||
|
||||
|
||||
def _extract_link_destination(message: str, start_idx: int) -> tuple[str, int | None]:
|
||||
"""Extract markdown link destination, allowing nested parentheses in the URL."""
|
||||
depth = 0
|
||||
i = start_idx
|
||||
|
||||
while i < len(message):
|
||||
curr = message[i]
|
||||
if curr == "\\":
|
||||
i += 2
|
||||
continue
|
||||
|
||||
if curr == "(":
|
||||
depth += 1
|
||||
elif curr == ")":
|
||||
if depth == 0:
|
||||
return message[start_idx:i], i
|
||||
depth -= 1
|
||||
i += 1
|
||||
|
||||
return message[start_idx:], None
|
||||
|
||||
|
||||
def _normalize_citation_link_destinations(message: str) -> str:
|
||||
"""Wrap citation URLs in angle brackets so markdown parsers handle parentheses safely."""
|
||||
if "[[" not in message:
|
||||
return message
|
||||
|
||||
normalized_parts: list[str] = []
|
||||
cursor = 0
|
||||
|
||||
while match := _CITATION_LINK_PATTERN.search(message, cursor):
|
||||
normalized_parts.append(message[cursor : match.end()])
|
||||
destination_start = match.end()
|
||||
destination, end_idx = _extract_link_destination(message, destination_start)
|
||||
if end_idx is None:
|
||||
normalized_parts.append(message[destination_start:])
|
||||
return "".join(normalized_parts)
|
||||
|
||||
already_wrapped = destination.startswith("<") and destination.endswith(">")
|
||||
if destination and not already_wrapped:
|
||||
destination = f"<{destination}>"
|
||||
|
||||
normalized_parts.append(destination)
|
||||
normalized_parts.append(")")
|
||||
cursor = end_idx + 1
|
||||
|
||||
normalized_parts.append(message[cursor:])
|
||||
return "".join(normalized_parts)
|
||||
|
||||
|
||||
def format_slack_message(message: str | None) -> str:
|
||||
if message is None:
|
||||
return ""
|
||||
md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough"])
|
||||
result = md(message)
|
||||
normalized_message = _normalize_citation_link_destinations(message)
|
||||
result = md(normalized_message)
|
||||
# With HTMLRenderer, result is always str (not AST list)
|
||||
assert isinstance(result, str)
|
||||
return result
|
||||
|
||||
@@ -762,6 +762,43 @@ def download_webapp(
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{session_id}/download-directory/{path:path}")
|
||||
def download_directory(
|
||||
session_id: UUID,
|
||||
path: str,
|
||||
user: User = Depends(current_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> Response:
|
||||
"""
|
||||
Download a directory as a zip file.
|
||||
|
||||
Returns the specified directory as a zip archive.
|
||||
"""
|
||||
user_id: UUID = user.id
|
||||
session_manager = SessionManager(db_session)
|
||||
|
||||
try:
|
||||
result = session_manager.download_directory(session_id, user_id, path)
|
||||
except ValueError as e:
|
||||
error_message = str(e)
|
||||
if "path traversal" in error_message.lower():
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
raise HTTPException(status_code=400, detail=error_message)
|
||||
|
||||
if result is None:
|
||||
raise HTTPException(status_code=404, detail="Directory not found")
|
||||
|
||||
zip_bytes, filename = result
|
||||
|
||||
return Response(
|
||||
content=zip_bytes,
|
||||
media_type="application/zip",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{session_id}/upload", response_model=UploadResponse)
|
||||
def upload_file_endpoint(
|
||||
session_id: UUID,
|
||||
|
||||
@@ -107,27 +107,23 @@ def get_or_create_craft_connector(db_session: Session, user: User) -> tuple[int,
|
||||
)
|
||||
|
||||
for cc_pair in cc_pairs:
|
||||
if cc_pair.connector.source == DocumentSource.CRAFT_FILE:
|
||||
if (
|
||||
cc_pair.connector.source == DocumentSource.CRAFT_FILE
|
||||
and cc_pair.creator_id == user.id
|
||||
):
|
||||
return cc_pair.connector.id, cc_pair.credential.id
|
||||
|
||||
# Check for orphaned connector (created but cc_pair creation failed previously)
|
||||
# No cc_pair for this user — find or create the shared CRAFT_FILE connector
|
||||
existing_connectors = fetch_connectors(
|
||||
db_session, sources=[DocumentSource.CRAFT_FILE]
|
||||
)
|
||||
orphaned_connector = None
|
||||
connector_id: int | None = None
|
||||
for conn in existing_connectors:
|
||||
if conn.name != USER_LIBRARY_CONNECTOR_NAME:
|
||||
continue
|
||||
if not conn.credentials:
|
||||
orphaned_connector = conn
|
||||
if conn.name == USER_LIBRARY_CONNECTOR_NAME:
|
||||
connector_id = conn.id
|
||||
break
|
||||
|
||||
if orphaned_connector:
|
||||
connector_id = orphaned_connector.id
|
||||
logger.info(
|
||||
f"Found orphaned User Library connector {connector_id}, completing setup"
|
||||
)
|
||||
else:
|
||||
if connector_id is None:
|
||||
connector_data = ConnectorBase(
|
||||
name=USER_LIBRARY_CONNECTOR_NAME,
|
||||
source=DocumentSource.CRAFT_FILE,
|
||||
|
||||
Binary file not shown.
@@ -1,15 +1,19 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate AGENTS.md by scanning the files directory and populating the template.
|
||||
|
||||
This script runs at container startup, AFTER the init container has synced files
|
||||
from S3. It scans the /workspace/files directory to discover what knowledge sources
|
||||
are available and generates appropriate documentation.
|
||||
This script runs during session setup, AFTER files have been synced from S3
|
||||
and the files symlink has been created. It reads the template from stdin,
|
||||
replaces the {{KNOWLEDGE_SOURCES_SECTION}} placeholder by scanning the
|
||||
knowledge source directory, and writes the final AGENTS.md to the output path.
|
||||
|
||||
Environment variables:
|
||||
- AGENT_INSTRUCTIONS: The template content with placeholders to replace
|
||||
Usage:
|
||||
printf '%s' "$TEMPLATE" | python3 generate_agents_md.py <output_path> <files_path>
|
||||
|
||||
Arguments:
|
||||
output_path: Path to write the final AGENTS.md
|
||||
files_path: Path to the files directory to scan for knowledge sources
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
@@ -189,49 +193,39 @@ def build_knowledge_sources_section(files_path: Path) -> str:
|
||||
def main() -> None:
|
||||
"""Main entry point for container startup script.
|
||||
|
||||
Is called by the container startup script to scan /workspace/files and populate
|
||||
the knowledge sources section.
|
||||
Reads the template from stdin, replaces the {{KNOWLEDGE_SOURCES_SECTION}}
|
||||
placeholder by scanning the files directory, and writes the result.
|
||||
|
||||
Usage:
|
||||
printf '%s' "$TEMPLATE" | python3 generate_agents_md.py <output_path> <files_path>
|
||||
"""
|
||||
# Read template from environment variable
|
||||
template = os.environ.get("AGENT_INSTRUCTIONS", "")
|
||||
if len(sys.argv) != 3:
|
||||
print(
|
||||
f"Usage: {sys.argv[0]} <output_path> <files_path>",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
output_path = Path(sys.argv[1])
|
||||
files_path = Path(sys.argv[2])
|
||||
|
||||
# Read template from stdin
|
||||
template = sys.stdin.read()
|
||||
if not template:
|
||||
print("Warning: No AGENT_INSTRUCTIONS template provided", file=sys.stderr)
|
||||
template = "# Agent Instructions\n\nNo instructions provided."
|
||||
print("Error: No template content provided on stdin", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Scan files directory - check /workspace/files first, then /workspace/demo_data
|
||||
files_path = Path("/workspace/files")
|
||||
demo_data_path = Path("/workspace/demo_data")
|
||||
# Resolve symlinks (handles both direct symlinks and dirs containing symlinks)
|
||||
resolved_files_path = files_path.resolve()
|
||||
|
||||
# Use demo_data if files doesn't exist or is empty
|
||||
if not files_path.exists() or not any(files_path.iterdir()):
|
||||
if demo_data_path.exists():
|
||||
files_path = demo_data_path
|
||||
knowledge_sources_section = build_knowledge_sources_section(resolved_files_path)
|
||||
|
||||
knowledge_sources_section = build_knowledge_sources_section(files_path)
|
||||
|
||||
# Replace placeholders
|
||||
content = template
|
||||
content = content.replace(
|
||||
# Replace placeholder and write final file
|
||||
content = template.replace(
|
||||
"{{KNOWLEDGE_SOURCES_SECTION}}", knowledge_sources_section
|
||||
)
|
||||
|
||||
# Write AGENTS.md
|
||||
output_path = Path("/workspace/AGENTS.md")
|
||||
output_path.write_text(content)
|
||||
|
||||
# Log result
|
||||
source_count = 0
|
||||
if files_path.exists():
|
||||
source_count = len(
|
||||
[
|
||||
d
|
||||
for d in files_path.iterdir()
|
||||
if d.is_dir() and not d.name.startswith(".")
|
||||
]
|
||||
)
|
||||
print(
|
||||
f"Generated AGENTS.md with {source_count} knowledge sources from {files_path}"
|
||||
)
|
||||
print(f"Generated {output_path} (scanned {resolved_files_path})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1348,9 +1348,10 @@ if [ -d /workspace/skills ]; then
|
||||
echo "Linked skills to /workspace/skills"
|
||||
fi
|
||||
|
||||
# Write agent instructions
|
||||
# Write agent instructions (scans files dir to populate knowledge sources)
|
||||
echo "Writing AGENTS.md"
|
||||
printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md
|
||||
printf '%s' '{agent_instructions_escaped}' \
|
||||
| python3 /usr/local/bin/generate_agents_md.py {session_path}/AGENTS.md {session_path}/files
|
||||
|
||||
# Write opencode config
|
||||
echo "Writing opencode.json"
|
||||
@@ -1776,9 +1777,11 @@ set -e
|
||||
echo "Creating files symlink to {symlink_target}"
|
||||
ln -sf {symlink_target} {session_path}/files
|
||||
|
||||
# Write agent instructions
|
||||
# Write agent instructions (scans files dir to populate knowledge sources)
|
||||
echo "Writing AGENTS.md"
|
||||
printf '%s' '{agent_instructions_escaped}' > {session_path}/AGENTS.md
|
||||
printf '%s' '{agent_instructions_escaped}' \
|
||||
| python3 /usr/local/bin/generate_agents_md.py \
|
||||
{session_path}/AGENTS.md {session_path}/files
|
||||
|
||||
# Write opencode config
|
||||
echo "Writing opencode.json"
|
||||
|
||||
@@ -68,6 +68,7 @@ from onyx.server.features.build.db.sandbox import create_sandbox__no_commit
|
||||
from onyx.server.features.build.db.sandbox import get_running_sandbox_count_by_tenant
|
||||
from onyx.server.features.build.db.sandbox import get_sandbox_by_session_id
|
||||
from onyx.server.features.build.db.sandbox import get_sandbox_by_user_id
|
||||
from onyx.server.features.build.db.sandbox import get_snapshots_for_session
|
||||
from onyx.server.features.build.db.sandbox import update_sandbox_heartbeat
|
||||
from onyx.server.features.build.db.sandbox import update_sandbox_status__no_commit
|
||||
from onyx.server.features.build.sandbox import get_sandbox_manager
|
||||
@@ -646,16 +647,30 @@ class SessionManager:
|
||||
|
||||
if sandbox and sandbox.status.is_active():
|
||||
# Quick health check to verify sandbox is actually responsive
|
||||
if self._sandbox_manager.health_check(sandbox.id, timeout=5.0):
|
||||
# AND verify the session workspace still exists on disk
|
||||
# (it may have been wiped if the sandbox was re-provisioned)
|
||||
is_healthy = self._sandbox_manager.health_check(sandbox.id, timeout=5.0)
|
||||
workspace_exists = (
|
||||
is_healthy
|
||||
and self._sandbox_manager.session_workspace_exists(
|
||||
sandbox.id, existing.id
|
||||
)
|
||||
)
|
||||
if is_healthy and workspace_exists:
|
||||
logger.info(
|
||||
f"Returning existing empty session {existing.id} for user {user_id}"
|
||||
)
|
||||
return existing
|
||||
else:
|
||||
elif not is_healthy:
|
||||
logger.warning(
|
||||
f"Empty session {existing.id} has unhealthy sandbox {sandbox.id}. "
|
||||
f"Deleting and creating fresh session."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Empty session {existing.id} workspace missing in sandbox "
|
||||
f"{sandbox.id}. Deleting and creating fresh session."
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Empty session {existing.id} has no active sandbox "
|
||||
@@ -1035,6 +1050,23 @@ class SessionManager:
|
||||
# workspace cleanup fails (e.g., if pod is already terminated)
|
||||
logger.warning(f"Failed to cleanup session workspace {session_id}: {e}")
|
||||
|
||||
# Delete snapshot files from S3 before removing DB records
|
||||
snapshots = get_snapshots_for_session(self._db_session, session_id)
|
||||
if snapshots:
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.server.features.build.sandbox.manager.snapshot_manager import (
|
||||
SnapshotManager,
|
||||
)
|
||||
|
||||
snapshot_manager = SnapshotManager(get_default_file_store())
|
||||
for snapshot in snapshots:
|
||||
try:
|
||||
snapshot_manager.delete_snapshot(snapshot.storage_path)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete snapshot file {snapshot.storage_path}: {e}"
|
||||
)
|
||||
|
||||
# Delete session (uses flush, caller commits)
|
||||
return delete_build_session__no_commit(session_id, user_id, self._db_session)
|
||||
|
||||
@@ -1903,6 +1935,94 @@ class SessionManager:
|
||||
|
||||
return zip_buffer.getvalue(), filename
|
||||
|
||||
def download_directory(
|
||||
self,
|
||||
session_id: UUID,
|
||||
user_id: UUID,
|
||||
path: str,
|
||||
) -> tuple[bytes, str] | None:
|
||||
"""
|
||||
Create a zip file of an arbitrary directory in the session workspace.
|
||||
|
||||
Args:
|
||||
session_id: The session UUID
|
||||
user_id: The user ID to verify ownership
|
||||
path: Relative path to the directory (within session workspace)
|
||||
|
||||
Returns:
|
||||
Tuple of (zip_bytes, filename) or None if session not found
|
||||
|
||||
Raises:
|
||||
ValueError: If path traversal attempted or path is not a directory
|
||||
"""
|
||||
# Verify session ownership
|
||||
session = get_build_session(session_id, user_id, self._db_session)
|
||||
if session is None:
|
||||
return None
|
||||
|
||||
sandbox = get_sandbox_by_user_id(self._db_session, user_id)
|
||||
if sandbox is None:
|
||||
return None
|
||||
|
||||
# Check if directory exists
|
||||
try:
|
||||
self._sandbox_manager.list_directory(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=session_id,
|
||||
path=path,
|
||||
)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
# Recursively collect all files
|
||||
def collect_files(dir_path: str) -> list[tuple[str, str]]:
|
||||
"""Collect all files recursively, returning (full_path, arcname) tuples."""
|
||||
files: list[tuple[str, str]] = []
|
||||
try:
|
||||
entries = self._sandbox_manager.list_directory(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=session_id,
|
||||
path=dir_path,
|
||||
)
|
||||
for entry in entries:
|
||||
if entry.is_directory:
|
||||
files.extend(collect_files(entry.path))
|
||||
else:
|
||||
# arcname is relative to the target directory
|
||||
prefix_len = len(path) + 1 # +1 for trailing slash
|
||||
arcname = entry.path[prefix_len:]
|
||||
files.append((entry.path, arcname))
|
||||
except ValueError:
|
||||
pass
|
||||
return files
|
||||
|
||||
file_list = collect_files(path)
|
||||
|
||||
# Create zip file in memory
|
||||
zip_buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
|
||||
for full_path, arcname in file_list:
|
||||
try:
|
||||
content = self._sandbox_manager.read_file(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=session_id,
|
||||
path=full_path,
|
||||
)
|
||||
zip_file.writestr(arcname, content)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
zip_buffer.seek(0)
|
||||
|
||||
# Use the directory name for the zip filename
|
||||
dir_name = Path(path).name
|
||||
safe_name = "".join(
|
||||
c if c.isalnum() or c in ("-", "_", ".") else "_" for c in dir_name
|
||||
)
|
||||
filename = f"{safe_name}.zip"
|
||||
|
||||
return zip_buffer.getvalue(), filename
|
||||
|
||||
# =========================================================================
|
||||
# File System Operations
|
||||
# =========================================================================
|
||||
@@ -1937,11 +2057,18 @@ class SessionManager:
|
||||
return None
|
||||
|
||||
# Use sandbox manager to list directory (works for both local and K8s)
|
||||
raw_entries = self._sandbox_manager.list_directory(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=session_id,
|
||||
path=path,
|
||||
)
|
||||
# If the directory doesn't exist (e.g., session workspace not yet loaded),
|
||||
# return an empty listing rather than erroring out.
|
||||
try:
|
||||
raw_entries = self._sandbox_manager.list_directory(
|
||||
sandbox_id=sandbox.id,
|
||||
session_id=session_id,
|
||||
path=path,
|
||||
)
|
||||
except ValueError as e:
|
||||
if "path traversal" in str(e).lower():
|
||||
raise
|
||||
return DirectoryListing(path=path, entries=[])
|
||||
|
||||
# Filter hidden files and directories
|
||||
entries: list[FileSystemEntry] = [
|
||||
|
||||
@@ -12,11 +12,18 @@ from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_user
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
enqueue_user_file_project_sync_task,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
get_user_file_project_sync_queue_depth,
|
||||
)
|
||||
from onyx.background.celery.versioned_apps.client import app as client_app
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import PUBLIC_API_TAGS
|
||||
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.enums import UserFileStatus
|
||||
from onyx.db.models import ChatSession
|
||||
@@ -27,6 +34,7 @@ from onyx.db.models import UserProject
|
||||
from onyx.db.persona import get_personas_by_ids
|
||||
from onyx.db.projects import get_project_token_count
|
||||
from onyx.db.projects import upload_files_to_user_files_with_indexing
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.server.features.projects.models import CategorizedFilesSnapshot
|
||||
from onyx.server.features.projects.models import ChatSessionRequest
|
||||
from onyx.server.features.projects.models import TokenCountResponse
|
||||
@@ -47,6 +55,33 @@ class UserFileDeleteResult(BaseModel):
|
||||
assistant_names: list[str] = []
|
||||
|
||||
|
||||
def _trigger_user_file_project_sync(user_file_id: UUID, tenant_id: str) -> None:
|
||||
queue_depth = get_user_file_project_sync_queue_depth(client_app)
|
||||
if queue_depth > USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH:
|
||||
logger.warning(
|
||||
f"Skipping immediate project sync for user_file_id={user_file_id} due to "
|
||||
f"queue depth {queue_depth}>{USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH}. "
|
||||
"It will be picked up by beat later."
|
||||
)
|
||||
return
|
||||
|
||||
redis_client = get_redis_client(tenant_id=tenant_id)
|
||||
enqueued = enqueue_user_file_project_sync_task(
|
||||
celery_app=client_app,
|
||||
redis_client=redis_client,
|
||||
user_file_id=user_file_id,
|
||||
tenant_id=tenant_id,
|
||||
priority=OnyxCeleryPriority.HIGHEST,
|
||||
)
|
||||
if not enqueued:
|
||||
logger.info(
|
||||
f"Skipped duplicate project sync enqueue for user_file_id={user_file_id}"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Triggered project sync for user_file_id={user_file_id}")
|
||||
|
||||
|
||||
@router.get("", tags=PUBLIC_API_TAGS)
|
||||
def get_projects(
|
||||
user: User = Depends(current_user),
|
||||
@@ -189,15 +224,7 @@ def unlink_user_file_from_project(
|
||||
db_session.commit()
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
task = client_app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
|
||||
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
|
||||
priority=OnyxCeleryPriority.HIGHEST,
|
||||
)
|
||||
logger.info(
|
||||
f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"
|
||||
)
|
||||
_trigger_user_file_project_sync(user_file.id, tenant_id)
|
||||
|
||||
return Response(status_code=204)
|
||||
|
||||
@@ -241,15 +268,7 @@ def link_user_file_to_project(
|
||||
db_session.commit()
|
||||
|
||||
tenant_id = get_current_tenant_id()
|
||||
task = client_app.send_task(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
|
||||
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
|
||||
priority=OnyxCeleryPriority.HIGHEST,
|
||||
)
|
||||
logger.info(
|
||||
f"Triggered project sync for user_file_id={user_file.id} with task_id={task.id}"
|
||||
)
|
||||
_trigger_user_file_project_sync(user_file.id, tenant_id)
|
||||
|
||||
return UserFileSnapshot.from_model(user_file)
|
||||
|
||||
|
||||
47
backend/onyx/server/manage/code_interpreter/api.py
Normal file
47
backend/onyx/server/manage/code_interpreter/api.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.users import current_admin_user
|
||||
from onyx.db.code_interpreter import fetch_code_interpreter_server
|
||||
from onyx.db.code_interpreter import update_code_interpreter_server_enabled
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import User
|
||||
from onyx.server.manage.code_interpreter.models import CodeInterpreterServer
|
||||
from onyx.server.manage.code_interpreter.models import CodeInterpreterServerHealth
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
CodeInterpreterClient,
|
||||
)
|
||||
|
||||
admin_router = APIRouter(prefix="/admin/code-interpreter")
|
||||
|
||||
|
||||
@admin_router.get("/health")
|
||||
def get_code_interpreter_health(
|
||||
_: User = Depends(current_admin_user),
|
||||
) -> CodeInterpreterServerHealth:
|
||||
try:
|
||||
client = CodeInterpreterClient()
|
||||
return CodeInterpreterServerHealth(healthy=client.health())
|
||||
except ValueError:
|
||||
return CodeInterpreterServerHealth(healthy=False)
|
||||
|
||||
|
||||
@admin_router.get("")
|
||||
def get_code_interpreter(
|
||||
_: User = Depends(current_admin_user), db_session: Session = Depends(get_session)
|
||||
) -> CodeInterpreterServer:
|
||||
ci_server = fetch_code_interpreter_server(db_session)
|
||||
return CodeInterpreterServer(enabled=ci_server.server_enabled)
|
||||
|
||||
|
||||
@admin_router.put("")
|
||||
def update_code_interpreter(
|
||||
update: CodeInterpreterServer,
|
||||
_: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
update_code_interpreter_server_enabled(
|
||||
db_session=db_session,
|
||||
enabled=update.enabled,
|
||||
)
|
||||
9
backend/onyx/server/manage/code_interpreter/models.py
Normal file
9
backend/onyx/server/manage/code_interpreter/models.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class CodeInterpreterServer(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
|
||||
class CodeInterpreterServerHealth(BaseModel):
|
||||
healthy: bool
|
||||
@@ -35,6 +35,18 @@ if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class EmailInviteStatus(str, Enum):
|
||||
SENT = "SENT"
|
||||
NOT_CONFIGURED = "NOT_CONFIGURED"
|
||||
SEND_FAILED = "SEND_FAILED"
|
||||
DISABLED = "DISABLED"
|
||||
|
||||
|
||||
class BulkInviteResponse(BaseModel):
|
||||
invited_count: int
|
||||
email_invite_status: EmailInviteStatus
|
||||
|
||||
|
||||
class VersionResponse(BaseModel):
|
||||
backend_version: str
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ from onyx.configs.app_configs import AUTH_BACKEND
|
||||
from onyx.configs.app_configs import AUTH_TYPE
|
||||
from onyx.configs.app_configs import AuthBackend
|
||||
from onyx.configs.app_configs import DEV_MODE
|
||||
from onyx.configs.app_configs import EMAIL_CONFIGURED
|
||||
from onyx.configs.app_configs import ENABLE_EMAIL_INVITES
|
||||
from onyx.configs.app_configs import NUM_FREE_TRIAL_USER_INVITES
|
||||
from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
|
||||
@@ -78,8 +79,10 @@ from onyx.server.documents.models import PaginatedReturn
|
||||
from onyx.server.features.projects.models import UserFileSnapshot
|
||||
from onyx.server.manage.models import AllUsersResponse
|
||||
from onyx.server.manage.models import AutoScrollRequest
|
||||
from onyx.server.manage.models import BulkInviteResponse
|
||||
from onyx.server.manage.models import ChatBackgroundRequest
|
||||
from onyx.server.manage.models import DefaultAppModeRequest
|
||||
from onyx.server.manage.models import EmailInviteStatus
|
||||
from onyx.server.manage.models import MemoryItem
|
||||
from onyx.server.manage.models import PersonalizationUpdateRequest
|
||||
from onyx.server.manage.models import TenantInfo
|
||||
@@ -368,7 +371,7 @@ def bulk_invite_users(
|
||||
emails: list[str] = Body(..., embed=True),
|
||||
current_user: User = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> int:
|
||||
) -> BulkInviteResponse:
|
||||
"""emails are string validated. If any email fails validation, no emails are
|
||||
invited and an exception is raised."""
|
||||
tenant_id = get_current_tenant_id()
|
||||
@@ -427,34 +430,41 @@ def bulk_invite_users(
|
||||
number_of_invited_users = write_invited_users(all_emails)
|
||||
|
||||
# send out email invitations only to new users (not already invited or existing)
|
||||
if ENABLE_EMAIL_INVITES:
|
||||
if not ENABLE_EMAIL_INVITES:
|
||||
email_invite_status = EmailInviteStatus.DISABLED
|
||||
elif not EMAIL_CONFIGURED:
|
||||
email_invite_status = EmailInviteStatus.NOT_CONFIGURED
|
||||
else:
|
||||
try:
|
||||
for email in emails_needing_seats:
|
||||
send_user_email_invite(email, current_user, AUTH_TYPE)
|
||||
email_invite_status = EmailInviteStatus.SENT
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending email invite to invited users: {e}")
|
||||
email_invite_status = EmailInviteStatus.SEND_FAILED
|
||||
|
||||
if not MULTI_TENANT or DEV_MODE:
|
||||
return number_of_invited_users
|
||||
if MULTI_TENANT and not DEV_MODE:
|
||||
# for billing purposes, write to the control plane about the number of new users
|
||||
try:
|
||||
logger.info("Registering tenant users")
|
||||
fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.billing", "register_tenant_users", None
|
||||
)(tenant_id, get_live_users_count(db_session))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register tenant users: {str(e)}")
|
||||
logger.info(
|
||||
"Reverting changes: removing users from tenant and resetting invited users"
|
||||
)
|
||||
write_invited_users(initial_invited_users) # Reset to original state
|
||||
fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
|
||||
)(new_invited_emails, tenant_id)
|
||||
raise e
|
||||
|
||||
# for billing purposes, write to the control plane about the number of new users
|
||||
try:
|
||||
logger.info("Registering tenant users")
|
||||
fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.billing", "register_tenant_users", None
|
||||
)(tenant_id, get_live_users_count(db_session))
|
||||
|
||||
return number_of_invited_users
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register tenant users: {str(e)}")
|
||||
logger.info(
|
||||
"Reverting changes: removing users from tenant and resetting invited users"
|
||||
)
|
||||
write_invited_users(initial_invited_users) # Reset to original state
|
||||
fetch_ee_implementation_or_noop(
|
||||
"onyx.server.tenants.user_mapping", "remove_users_from_tenant", None
|
||||
)(new_invited_emails, tenant_id)
|
||||
raise e
|
||||
return BulkInviteResponse(
|
||||
invited_count=number_of_invited_users,
|
||||
email_invite_status=email_invite_status,
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/manage/admin/remove-invited-user", tags=PUBLIC_API_TAGS)
|
||||
|
||||
@@ -587,6 +587,7 @@ def handle_send_chat_message(
|
||||
request.headers
|
||||
),
|
||||
mcp_headers=chat_message_req.mcp_headers,
|
||||
additional_context=chat_message_req.additional_context,
|
||||
external_state_container=state_container,
|
||||
)
|
||||
result = gather_stream_full(packets, state_container)
|
||||
@@ -609,6 +610,7 @@ def handle_send_chat_message(
|
||||
request.headers
|
||||
),
|
||||
mcp_headers=chat_message_req.mcp_headers,
|
||||
additional_context=chat_message_req.additional_context,
|
||||
external_state_container=state_container,
|
||||
):
|
||||
yield get_json_line(obj.model_dump())
|
||||
|
||||
@@ -125,6 +125,11 @@ class SendMessageRequest(BaseModel):
|
||||
# - No CitationInfo packets are emitted during streaming
|
||||
include_citations: bool = True
|
||||
|
||||
# Additional context injected into the LLM call but NOT stored in the DB
|
||||
# (not shown in chat history). Used e.g. by the Chrome extension to pass
|
||||
# the current tab URL when "Read this tab" is enabled.
|
||||
additional_context: str | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def check_chat_session_id_or_info(self) -> "SendMessageRequest":
|
||||
# If neither is provided, default to creating a new chat session using the
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import json
|
||||
from collections.abc import Generator
|
||||
from typing import Literal
|
||||
from typing import TypedDict
|
||||
from typing import Union
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
@@ -36,6 +39,39 @@ class ExecuteResponse(BaseModel):
|
||||
files: list[WorkspaceFile]
|
||||
|
||||
|
||||
class StreamOutputEvent(BaseModel):
|
||||
"""SSE 'output' event: a chunk of stdout or stderr"""
|
||||
|
||||
stream: Literal["stdout", "stderr"]
|
||||
data: str
|
||||
|
||||
|
||||
class StreamResultEvent(BaseModel):
|
||||
"""SSE 'result' event: final execution result"""
|
||||
|
||||
exit_code: int | None
|
||||
timed_out: bool
|
||||
duration_ms: int
|
||||
files: list[WorkspaceFile]
|
||||
|
||||
|
||||
class StreamErrorEvent(BaseModel):
|
||||
"""SSE 'error' event: execution-level error"""
|
||||
|
||||
message: str
|
||||
|
||||
|
||||
StreamEvent = Union[StreamOutputEvent, StreamResultEvent, StreamErrorEvent]
|
||||
|
||||
_SSE_EVENT_MAP: dict[
|
||||
str, type[StreamOutputEvent | StreamResultEvent | StreamErrorEvent]
|
||||
] = {
|
||||
"output": StreamOutputEvent,
|
||||
"result": StreamResultEvent,
|
||||
"error": StreamErrorEvent,
|
||||
}
|
||||
|
||||
|
||||
class CodeInterpreterClient:
|
||||
"""Client for Code Interpreter service"""
|
||||
|
||||
@@ -45,6 +81,34 @@ class CodeInterpreterClient:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.session = requests.Session()
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
code: str,
|
||||
stdin: str | None,
|
||||
timeout_ms: int,
|
||||
files: list[FileInput] | None,
|
||||
) -> dict:
|
||||
payload: dict = {
|
||||
"code": code,
|
||||
"timeout_ms": timeout_ms,
|
||||
}
|
||||
if stdin is not None:
|
||||
payload["stdin"] = stdin
|
||||
if files:
|
||||
payload["files"] = files
|
||||
return payload
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Check if the Code Interpreter service is healthy"""
|
||||
url = f"{self.base_url}/health"
|
||||
try:
|
||||
response = self.session.get(url, timeout=5)
|
||||
response.raise_for_status()
|
||||
return response.json().get("status") == "ok"
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception caught when checking health, e={e}")
|
||||
return False
|
||||
|
||||
def execute(
|
||||
self,
|
||||
code: str,
|
||||
@@ -52,25 +116,110 @@ class CodeInterpreterClient:
|
||||
timeout_ms: int = 30000,
|
||||
files: list[FileInput] | None = None,
|
||||
) -> ExecuteResponse:
|
||||
"""Execute Python code"""
|
||||
"""Execute Python code (batch)"""
|
||||
url = f"{self.base_url}/v1/execute"
|
||||
|
||||
payload = {
|
||||
"code": code,
|
||||
"timeout_ms": timeout_ms,
|
||||
}
|
||||
|
||||
if stdin is not None:
|
||||
payload["stdin"] = stdin
|
||||
|
||||
if files:
|
||||
payload["files"] = files
|
||||
payload = self._build_payload(code, stdin, timeout_ms, files)
|
||||
|
||||
response = self.session.post(url, json=payload, timeout=timeout_ms / 1000 + 10)
|
||||
response.raise_for_status()
|
||||
|
||||
return ExecuteResponse(**response.json())
|
||||
|
||||
def execute_streaming(
|
||||
self,
|
||||
code: str,
|
||||
stdin: str | None = None,
|
||||
timeout_ms: int = 30000,
|
||||
files: list[FileInput] | None = None,
|
||||
) -> Generator[StreamEvent, None, None]:
|
||||
"""Execute Python code with streaming SSE output.
|
||||
|
||||
Yields StreamEvent objects (StreamOutputEvent, StreamResultEvent,
|
||||
StreamErrorEvent) as execution progresses. Falls back to batch
|
||||
execution if the streaming endpoint is not available (older
|
||||
code-interpreter versions).
|
||||
"""
|
||||
url = f"{self.base_url}/v1/execute/stream"
|
||||
payload = self._build_payload(code, stdin, timeout_ms, files)
|
||||
|
||||
response = self.session.post(
|
||||
url,
|
||||
json=payload,
|
||||
stream=True,
|
||||
timeout=timeout_ms / 1000 + 10,
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.info(
|
||||
"Streaming endpoint not available, " "falling back to batch execution"
|
||||
)
|
||||
response.close()
|
||||
yield from self._batch_as_stream(code, stdin, timeout_ms, files)
|
||||
return
|
||||
|
||||
response.raise_for_status()
|
||||
yield from self._parse_sse(response)
|
||||
|
||||
def _parse_sse(
|
||||
self, response: requests.Response
|
||||
) -> Generator[StreamEvent, None, None]:
|
||||
"""Parse SSE streaming response into StreamEvent objects.
|
||||
|
||||
Expected format per event:
|
||||
event: <type>
|
||||
data: <json>
|
||||
<blank line>
|
||||
"""
|
||||
event_type: str | None = None
|
||||
data_lines: list[str] = []
|
||||
|
||||
for line in response.iter_lines(decode_unicode=True):
|
||||
if line is None:
|
||||
continue
|
||||
|
||||
if line == "":
|
||||
# Blank line marks end of an SSE event
|
||||
if event_type is not None and data_lines:
|
||||
data = "\n".join(data_lines)
|
||||
model_cls = _SSE_EVENT_MAP.get(event_type)
|
||||
if model_cls is not None:
|
||||
yield model_cls(**json.loads(data))
|
||||
else:
|
||||
logger.warning(f"Unknown SSE event type: {event_type}")
|
||||
event_type = None
|
||||
data_lines = []
|
||||
elif line.startswith("event:"):
|
||||
event_type = line[len("event:") :].strip()
|
||||
elif line.startswith("data:"):
|
||||
data_lines.append(line[len("data:") :].strip())
|
||||
|
||||
if event_type is not None or data_lines:
|
||||
logger.warning(
|
||||
f"SSE stream ended with incomplete event: "
|
||||
f"event_type={event_type}, data_lines={data_lines}"
|
||||
)
|
||||
|
||||
def _batch_as_stream(
|
||||
self,
|
||||
code: str,
|
||||
stdin: str | None,
|
||||
timeout_ms: int,
|
||||
files: list[FileInput] | None,
|
||||
) -> Generator[StreamEvent, None, None]:
|
||||
"""Execute via batch endpoint and yield results as stream events."""
|
||||
result = self.execute(code, stdin, timeout_ms, files)
|
||||
|
||||
if result.stdout:
|
||||
yield StreamOutputEvent(stream="stdout", data=result.stdout)
|
||||
if result.stderr:
|
||||
yield StreamOutputEvent(stream="stderr", data=result.stderr)
|
||||
yield StreamResultEvent(
|
||||
exit_code=result.exit_code,
|
||||
timed_out=result.timed_out,
|
||||
duration_ms=result.duration_ms,
|
||||
files=result.files,
|
||||
)
|
||||
|
||||
def upload_file(self, file_content: bytes, filename: str) -> str:
|
||||
"""Upload file to Code Interpreter and return file_id"""
|
||||
url = f"{self.base_url}/v1/files"
|
||||
|
||||
@@ -12,6 +12,7 @@ from onyx.configs.app_configs import CODE_INTERPRETER_BASE_URL
|
||||
from onyx.configs.app_configs import CODE_INTERPRETER_DEFAULT_TIMEOUT_MS
|
||||
from onyx.configs.app_configs import CODE_INTERPRETER_MAX_OUTPUT_LENGTH
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.db.code_interpreter import fetch_code_interpreter_server
|
||||
from onyx.file_store.utils import build_full_frontend_file_url
|
||||
from onyx.file_store.utils import get_default_file_store
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
@@ -28,6 +29,15 @@ from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
CodeInterpreterClient,
|
||||
)
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import FileInput
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
StreamErrorEvent,
|
||||
)
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
StreamOutputEvent,
|
||||
)
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
StreamResultEvent,
|
||||
)
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
@@ -94,8 +104,10 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
|
||||
@override
|
||||
@classmethod
|
||||
def is_available(cls, db_session: Session) -> bool:
|
||||
is_available = bool(CODE_INTERPRETER_BASE_URL)
|
||||
return is_available
|
||||
if not CODE_INTERPRETER_BASE_URL:
|
||||
return False
|
||||
server = fetch_code_interpreter_server(db_session)
|
||||
return server.server_enabled
|
||||
|
||||
def tool_definition(self) -> dict:
|
||||
return {
|
||||
@@ -181,19 +193,50 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
|
||||
try:
|
||||
logger.debug(f"Executing code: {code}")
|
||||
|
||||
# Execute code with timeout
|
||||
response = client.execute(
|
||||
# Execute code with streaming (falls back to batch if unavailable)
|
||||
stdout_parts: list[str] = []
|
||||
stderr_parts: list[str] = []
|
||||
result_event: StreamResultEvent | None = None
|
||||
|
||||
for event in client.execute_streaming(
|
||||
code=code,
|
||||
timeout_ms=CODE_INTERPRETER_DEFAULT_TIMEOUT_MS,
|
||||
files=files_to_stage or None,
|
||||
)
|
||||
):
|
||||
if isinstance(event, StreamOutputEvent):
|
||||
if event.stream == "stdout":
|
||||
stdout_parts.append(event.data)
|
||||
else:
|
||||
stderr_parts.append(event.data)
|
||||
# Emit incremental delta to frontend
|
||||
self.emitter.emit(
|
||||
Packet(
|
||||
placement=placement,
|
||||
obj=PythonToolDelta(
|
||||
stdout=event.data if event.stream == "stdout" else "",
|
||||
stderr=event.data if event.stream == "stderr" else "",
|
||||
),
|
||||
)
|
||||
)
|
||||
elif isinstance(event, StreamResultEvent):
|
||||
result_event = event
|
||||
elif isinstance(event, StreamErrorEvent):
|
||||
raise RuntimeError(f"Code interpreter error: {event.message}")
|
||||
|
||||
if result_event is None:
|
||||
raise RuntimeError(
|
||||
"Code interpreter stream ended without a result event"
|
||||
)
|
||||
|
||||
full_stdout = "".join(stdout_parts)
|
||||
full_stderr = "".join(stderr_parts)
|
||||
|
||||
# Truncate output for LLM consumption
|
||||
truncated_stdout = _truncate_output(
|
||||
response.stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stdout"
|
||||
full_stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stdout"
|
||||
)
|
||||
truncated_stderr = _truncate_output(
|
||||
response.stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stderr"
|
||||
full_stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stderr"
|
||||
)
|
||||
|
||||
# Handle generated files
|
||||
@@ -202,7 +245,7 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
|
||||
file_ids_to_cleanup: list[str] = []
|
||||
file_store = get_default_file_store()
|
||||
|
||||
for workspace_file in response.files:
|
||||
for workspace_file in result_event.files:
|
||||
if workspace_file.kind != "file" or not workspace_file.file_id:
|
||||
continue
|
||||
|
||||
@@ -258,26 +301,23 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
|
||||
f"Failed to delete Code Interpreter staged file {file_mapping['file_id']}: {e}"
|
||||
)
|
||||
|
||||
# Emit delta with stdout/stderr and generated files
|
||||
self.emitter.emit(
|
||||
Packet(
|
||||
placement=placement,
|
||||
obj=PythonToolDelta(
|
||||
stdout=truncated_stdout,
|
||||
stderr=truncated_stderr,
|
||||
file_ids=generated_file_ids,
|
||||
),
|
||||
# Emit file_ids once files are processed
|
||||
if generated_file_ids:
|
||||
self.emitter.emit(
|
||||
Packet(
|
||||
placement=placement,
|
||||
obj=PythonToolDelta(file_ids=generated_file_ids),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Build result
|
||||
result = LlmPythonExecutionResult(
|
||||
stdout=truncated_stdout,
|
||||
stderr=truncated_stderr,
|
||||
exit_code=response.exit_code,
|
||||
timed_out=response.timed_out,
|
||||
exit_code=result_event.exit_code,
|
||||
timed_out=result_event.timed_out,
|
||||
generated_files=generated_files,
|
||||
error=None if response.exit_code == 0 else truncated_stderr,
|
||||
error=None if result_event.exit_code == 0 else truncated_stderr,
|
||||
)
|
||||
|
||||
# Serialize result for LLM
|
||||
|
||||
@@ -6,6 +6,8 @@ aioboto3==15.1.0
|
||||
# via onyx
|
||||
aiobotocore==2.24.0
|
||||
# via aioboto3
|
||||
aiofile==3.9.0
|
||||
# via py-key-value-aio
|
||||
aiofiles==25.1.0
|
||||
# via
|
||||
# aioboto3
|
||||
@@ -40,8 +42,10 @@ anyio==4.11.0
|
||||
# httpx
|
||||
# mcp
|
||||
# openai
|
||||
# py-key-value-aio
|
||||
# sse-starlette
|
||||
# starlette
|
||||
# watchfiles
|
||||
argon2-cffi==23.1.0
|
||||
# via pwdlib
|
||||
argon2-cffi-bindings==25.1.0
|
||||
@@ -74,9 +78,7 @@ backports-tarfile==1.2.0 ; python_full_version < '3.12'
|
||||
bcrypt==4.3.0
|
||||
# via pwdlib
|
||||
beartype==0.22.6
|
||||
# via
|
||||
# py-key-value-aio
|
||||
# py-key-value-shared
|
||||
# via py-key-value-aio
|
||||
beautifulsoup4==4.12.3
|
||||
# via
|
||||
# atlassian-python-api
|
||||
@@ -110,6 +112,8 @@ cachetools==6.2.2
|
||||
# via
|
||||
# google-auth
|
||||
# py-key-value-aio
|
||||
caio==0.9.25
|
||||
# via aiofile
|
||||
celery==5.5.1
|
||||
# via onyx
|
||||
certifi==2025.11.12
|
||||
@@ -170,7 +174,6 @@ cloudpickle==3.1.2
|
||||
# via
|
||||
# dask
|
||||
# distributed
|
||||
# pydocket
|
||||
cobble==0.1.4
|
||||
# via mammoth
|
||||
cohere==5.6.1
|
||||
@@ -218,8 +221,6 @@ deprecated==1.3.1
|
||||
# pygithub
|
||||
discord-py==2.4.0
|
||||
# via onyx
|
||||
diskcache==5.6.3
|
||||
# via py-key-value-aio
|
||||
distributed==2026.1.1
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
@@ -256,8 +257,6 @@ exceptiongroup==1.3.0
|
||||
# via
|
||||
# braintrust
|
||||
# fastmcp
|
||||
fakeredis==2.33.0
|
||||
# via pydocket
|
||||
fastapi==0.128.0
|
||||
# via
|
||||
# fastapi-limiter
|
||||
@@ -273,7 +272,7 @@ fastapi-users-db-sqlalchemy==7.0.0
|
||||
# via onyx
|
||||
fastavro==1.12.1
|
||||
# via cohere
|
||||
fastmcp==2.14.2
|
||||
fastmcp==3.0.2
|
||||
# via onyx
|
||||
fastuuid==0.14.0
|
||||
# via litellm
|
||||
@@ -478,7 +477,9 @@ jsonpatch==1.33
|
||||
jsonpointer==3.0.0
|
||||
# via jsonpatch
|
||||
jsonref==1.1.0
|
||||
# via onyx
|
||||
# via
|
||||
# fastmcp
|
||||
# onyx
|
||||
jsonschema==4.25.1
|
||||
# via
|
||||
# litellm
|
||||
@@ -513,8 +514,6 @@ locket==1.0.0
|
||||
# via
|
||||
# distributed
|
||||
# partd
|
||||
lupa==2.6
|
||||
# via fakeredis
|
||||
lxml==5.3.0
|
||||
# via
|
||||
# htmldate
|
||||
@@ -556,7 +555,7 @@ marshmallow==3.26.2
|
||||
# via dataclasses-json
|
||||
matrix-client==0.3.2
|
||||
# via zulip
|
||||
mcp==1.25.0
|
||||
mcp==1.26.0
|
||||
# via
|
||||
# claude-agent-sdk
|
||||
# fastmcp
|
||||
@@ -613,7 +612,7 @@ oauthlib==3.2.2
|
||||
# kubernetes
|
||||
# onyx
|
||||
# requests-oauthlib
|
||||
office365-rest-python-client==2.5.9
|
||||
office365-rest-python-client==2.6.2
|
||||
# via onyx
|
||||
olefile==0.47
|
||||
# via
|
||||
@@ -642,22 +641,16 @@ opensearch-py==3.0.0
|
||||
opentelemetry-api==1.39.1
|
||||
# via
|
||||
# ddtrace
|
||||
# fastmcp
|
||||
# langfuse
|
||||
# openinference-instrumentation
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# opentelemetry-exporter-prometheus
|
||||
# opentelemetry-instrumentation
|
||||
# opentelemetry-sdk
|
||||
# opentelemetry-semantic-conventions
|
||||
# pydocket
|
||||
opentelemetry-exporter-otlp-proto-common==1.39.1
|
||||
# via opentelemetry-exporter-otlp-proto-http
|
||||
opentelemetry-exporter-otlp-proto-http==1.39.1
|
||||
# via langfuse
|
||||
opentelemetry-exporter-prometheus==0.60b1
|
||||
# via pydocket
|
||||
opentelemetry-instrumentation==0.60b1
|
||||
# via pydocket
|
||||
opentelemetry-proto==1.39.1
|
||||
# via
|
||||
# onyx
|
||||
@@ -668,17 +661,15 @@ opentelemetry-sdk==1.39.1
|
||||
# langfuse
|
||||
# openinference-instrumentation
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# opentelemetry-exporter-prometheus
|
||||
opentelemetry-semantic-conventions==0.60b1
|
||||
# via
|
||||
# opentelemetry-instrumentation
|
||||
# opentelemetry-sdk
|
||||
# via opentelemetry-sdk
|
||||
orjson==3.11.4 ; platform_python_implementation != 'PyPy'
|
||||
# via langsmith
|
||||
packaging==24.2
|
||||
# via
|
||||
# dask
|
||||
# distributed
|
||||
# fastmcp
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
@@ -689,7 +680,6 @@ packaging==24.2
|
||||
# langsmith
|
||||
# marshmallow
|
||||
# onnxruntime
|
||||
# opentelemetry-instrumentation
|
||||
# pytest
|
||||
# pywikibot
|
||||
pandas==2.3.3
|
||||
@@ -702,8 +692,6 @@ passlib==1.7.4
|
||||
# via onyx
|
||||
pathable==0.4.4
|
||||
# via jsonschema-path
|
||||
pathvalidate==3.3.1
|
||||
# via py-key-value-aio
|
||||
pdfminer-six==20251107
|
||||
# via markitdown
|
||||
pillow==12.1.1
|
||||
@@ -723,9 +711,7 @@ ply==3.11
|
||||
prometheus-client==0.23.1
|
||||
# via
|
||||
# onyx
|
||||
# opentelemetry-exporter-prometheus
|
||||
# prometheus-fastapi-instrumentator
|
||||
# pydocket
|
||||
prometheus-fastapi-instrumentator==7.1.0
|
||||
# via onyx
|
||||
prompt-toolkit==3.0.52
|
||||
@@ -764,12 +750,8 @@ pwdlib==0.3.0
|
||||
# via fastapi-users
|
||||
py==1.11.0
|
||||
# via retry
|
||||
py-key-value-aio==0.3.0
|
||||
# via
|
||||
# fastmcp
|
||||
# pydocket
|
||||
py-key-value-shared==0.3.0
|
||||
# via py-key-value-aio
|
||||
py-key-value-aio==0.4.4
|
||||
# via fastmcp
|
||||
pyairtable==3.0.1
|
||||
# via onyx
|
||||
pyasn1==0.6.2
|
||||
@@ -806,8 +788,6 @@ pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.12.0
|
||||
# via mcp
|
||||
pydocket==0.16.3
|
||||
# via fastmcp
|
||||
pyee==13.0.0
|
||||
# via playwright
|
||||
pygithub==2.5.0
|
||||
@@ -879,8 +859,6 @@ python-http-client==3.3.7
|
||||
# via sendgrid
|
||||
python-iso639==2025.11.16
|
||||
# via unstructured
|
||||
python-json-logger==4.0.0
|
||||
# via pydocket
|
||||
python-magic==0.4.27
|
||||
# via unstructured
|
||||
python-multipart==0.0.22
|
||||
@@ -918,6 +896,7 @@ pyyaml==6.0.3
|
||||
# via
|
||||
# dask
|
||||
# distributed
|
||||
# fastmcp
|
||||
# huggingface-hub
|
||||
# jsonschema-path
|
||||
# kubernetes
|
||||
@@ -928,11 +907,8 @@ rapidfuzz==3.13.0
|
||||
# unstructured
|
||||
redis==5.0.8
|
||||
# via
|
||||
# fakeredis
|
||||
# fastapi-limiter
|
||||
# onyx
|
||||
# py-key-value-aio
|
||||
# pydocket
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
@@ -1007,7 +983,6 @@ rich==14.2.0
|
||||
# via
|
||||
# cyclopts
|
||||
# fastmcp
|
||||
# pydocket
|
||||
# rich-rst
|
||||
# typer
|
||||
rich-rst==1.3.2
|
||||
@@ -1056,9 +1031,7 @@ sniffio==1.3.1
|
||||
# anyio
|
||||
# openai
|
||||
sortedcontainers==2.4.0
|
||||
# via
|
||||
# distributed
|
||||
# fakeredis
|
||||
# via distributed
|
||||
soupsieve==2.8
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.15
|
||||
@@ -1124,9 +1097,7 @@ tqdm==4.67.1
|
||||
trafilatura==1.12.2
|
||||
# via onyx
|
||||
typer==0.20.0
|
||||
# via
|
||||
# mcp
|
||||
# pydocket
|
||||
# via mcp
|
||||
types-awscrt==0.28.4
|
||||
# via botocore-stubs
|
||||
types-openpyxl==3.0.4.7
|
||||
@@ -1162,11 +1133,10 @@ typing-extensions==4.15.0
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
# opentelemetry-sdk
|
||||
# opentelemetry-semantic-conventions
|
||||
# py-key-value-shared
|
||||
# py-key-value-aio
|
||||
# pyairtable
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# pydocket
|
||||
# pyee
|
||||
# pygithub
|
||||
# python-docx
|
||||
@@ -1234,6 +1204,8 @@ vine==5.1.0
|
||||
# kombu
|
||||
voyageai==0.2.3
|
||||
# via onyx
|
||||
watchfiles==1.1.1
|
||||
# via fastmcp
|
||||
wcwidth==0.2.14
|
||||
# via prompt-toolkit
|
||||
webencodings==0.5.1
|
||||
@@ -1254,7 +1226,6 @@ wrapt==1.17.3
|
||||
# deprecated
|
||||
# langfuse
|
||||
# openinference-instrumentation
|
||||
# opentelemetry-instrumentation
|
||||
# unstructured
|
||||
xlrd==2.0.2
|
||||
# via markitdown
|
||||
|
||||
@@ -288,7 +288,7 @@ matplotlib-inline==0.2.1
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
mcp==1.25.0
|
||||
mcp==1.26.0
|
||||
# via claude-agent-sdk
|
||||
multidict==6.7.0
|
||||
# via
|
||||
@@ -317,7 +317,7 @@ oauthlib==3.2.2
|
||||
# via
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
onyx-devtools==0.6.0
|
||||
onyx-devtools==0.6.1
|
||||
# via onyx
|
||||
openai==2.14.0
|
||||
# via
|
||||
|
||||
@@ -211,7 +211,7 @@ litellm==1.81.6
|
||||
# via onyx
|
||||
markupsafe==3.0.3
|
||||
# via jinja2
|
||||
mcp==1.25.0
|
||||
mcp==1.26.0
|
||||
# via claude-agent-sdk
|
||||
monotonic==1.6
|
||||
# via posthog
|
||||
|
||||
@@ -246,7 +246,7 @@ litellm==1.81.6
|
||||
# via onyx
|
||||
markupsafe==3.0.3
|
||||
# via jinja2
|
||||
mcp==1.25.0
|
||||
mcp==1.26.0
|
||||
# via claude-agent-sdk
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
|
||||
@@ -95,6 +95,7 @@ def generate_dummy_chunk(
|
||||
return DocMetadataAwareIndexChunk.from_index_chunk(
|
||||
index_chunk=chunk,
|
||||
user_project=[],
|
||||
personas=[],
|
||||
access=DocumentAccess.build(
|
||||
user_emails=user_emails,
|
||||
user_groups=user_groups,
|
||||
|
||||
@@ -3,8 +3,8 @@ set -e
|
||||
|
||||
cleanup() {
|
||||
echo "Error occurred. Cleaning up..."
|
||||
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio 2>/dev/null || true
|
||||
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio 2>/dev/null || true
|
||||
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
|
||||
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Trap errors and output a message, then cleanup
|
||||
@@ -20,8 +20,8 @@ MINIO_VOLUME=${4:-""} # Default is empty if not provided
|
||||
|
||||
# Stop and remove the existing containers
|
||||
echo "Stopping and removing existing containers..."
|
||||
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio 2>/dev/null || true
|
||||
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio 2>/dev/null || true
|
||||
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
|
||||
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
|
||||
|
||||
# Start the PostgreSQL container with optional volume
|
||||
echo "Starting PostgreSQL container..."
|
||||
@@ -55,6 +55,10 @@ else
|
||||
docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin minio/minio server /data --console-address ":9001"
|
||||
fi
|
||||
|
||||
# Start the Code Interpreter container
|
||||
echo "Starting Code Interpreter container..."
|
||||
docker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api
|
||||
|
||||
# Ensure alembic runs in the correct directory (backend/)
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
PARENT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
|
||||
@@ -9,6 +9,7 @@ from collections.abc import AsyncGenerator
|
||||
from collections.abc import Generator
|
||||
from contextlib import asynccontextmanager
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
@@ -46,11 +47,15 @@ def mock_current_admin_user() -> MagicMock:
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def client() -> Generator[TestClient, None, None]:
|
||||
# Initialize TestClient with the FastAPI app using a no-op test lifespan
|
||||
# Initialize TestClient with the FastAPI app using a no-op test lifespan.
|
||||
# Patch out prometheus metrics setup to avoid "Duplicated timeseries in
|
||||
# CollectorRegistry" errors when multiple tests each create a new app
|
||||
# (prometheus registers metrics globally and rejects duplicate names).
|
||||
get_app = fetch_versioned_implementation(
|
||||
module="onyx.main", attribute="get_application"
|
||||
)
|
||||
app: FastAPI = get_app(lifespan_override=test_lifespan)
|
||||
with patch("onyx.main.setup_prometheus_metrics"):
|
||||
app: FastAPI = get_app(lifespan_override=test_lifespan)
|
||||
|
||||
# Override the database session dependency with a mock
|
||||
# (these tests don't actually need DB access)
|
||||
|
||||
@@ -990,6 +990,27 @@ class _MockCIHandler(BaseHTTPRequestHandler):
|
||||
self._respond_json(
|
||||
200, {"file_id": f"mock-ci-file-{self.server._file_counter}"}
|
||||
)
|
||||
elif self.path == "/v1/execute/stream":
|
||||
if self.server.streaming_enabled:
|
||||
self._respond_sse(
|
||||
[
|
||||
(
|
||||
"output",
|
||||
{"stream": "stdout", "data": "mock output\n"},
|
||||
),
|
||||
(
|
||||
"result",
|
||||
{
|
||||
"exit_code": 0,
|
||||
"timed_out": False,
|
||||
"duration_ms": 50,
|
||||
"files": [],
|
||||
},
|
||||
),
|
||||
]
|
||||
)
|
||||
else:
|
||||
self._respond_json(404, {"error": "not found"})
|
||||
elif self.path == "/v1/execute":
|
||||
self._respond_json(
|
||||
200,
|
||||
@@ -1027,6 +1048,17 @@ class _MockCIHandler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _respond_sse(self, events: list[tuple[str, dict[str, Any]]]) -> None:
|
||||
frames = []
|
||||
for event_type, data in events:
|
||||
frames.append(f"event: {event_type}\ndata: {json.dumps(data)}\n\n")
|
||||
payload = "".join(frames).encode()
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.send_header("Content-Length", str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def log_message(self, format: str, *args: Any) -> None: # noqa: A002
|
||||
pass
|
||||
|
||||
@@ -1038,6 +1070,7 @@ class MockCodeInterpreterServer(HTTPServer):
|
||||
super().__init__(("localhost", 0), _MockCIHandler)
|
||||
self.captured_requests: list[CapturedRequest] = []
|
||||
self._file_counter = 0
|
||||
self.streaming_enabled: bool = True
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
@@ -1168,17 +1201,19 @@ def test_code_interpreter_receives_chat_files(
|
||||
finally:
|
||||
ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults
|
||||
|
||||
# Verify: file uploaded, code executed, staged file cleaned up
|
||||
# Verify: file uploaded, code executed via streaming, staged file cleaned up
|
||||
assert len(mock_ci_server.get_requests(method="POST", path="/v1/files")) == 1
|
||||
assert len(mock_ci_server.get_requests(method="POST", path="/v1/execute")) == 1
|
||||
assert (
|
||||
len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
|
||||
)
|
||||
|
||||
delete_requests = mock_ci_server.get_requests(method="DELETE")
|
||||
assert len(delete_requests) == 1
|
||||
assert delete_requests[0].path.startswith("/v1/files/")
|
||||
|
||||
execute_body = mock_ci_server.get_requests(method="POST", path="/v1/execute")[
|
||||
0
|
||||
].json_body()
|
||||
execute_body = mock_ci_server.get_requests(
|
||||
method="POST", path="/v1/execute/stream"
|
||||
)[0].json_body()
|
||||
assert execute_body["code"] == code
|
||||
assert len(execute_body["files"]) == 1
|
||||
assert execute_body["files"][0]["path"] == "data.csv"
|
||||
@@ -1284,7 +1319,9 @@ def test_code_interpreter_replay_packets_include_code_and_output(
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
assert len(mock_ci_server.get_requests(method="POST", path="/v1/execute")) == 1
|
||||
assert (
|
||||
len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
|
||||
)
|
||||
|
||||
# The response contains `packets` — a list of packet-lists, one per
|
||||
# assistant message. We should have exactly one assistant message.
|
||||
@@ -1313,3 +1350,76 @@ def test_code_interpreter_replay_packets_include_code_and_output(
|
||||
delta_obj = delta_packets[0].obj
|
||||
assert isinstance(delta_obj, PythonToolDelta)
|
||||
assert "mock output" in delta_obj.stdout
|
||||
|
||||
|
||||
def test_code_interpreter_streaming_fallback_to_batch(
|
||||
db_session: Session,
|
||||
mock_ci_server: MockCodeInterpreterServer,
|
||||
_attach_python_tool_to_default_persona: None,
|
||||
initialize_file_store: None, # noqa: ARG001
|
||||
) -> None:
|
||||
"""When the streaming endpoint is not available (older code-interpreter),
|
||||
execute_streaming should fall back to the batch /v1/execute endpoint."""
|
||||
mock_ci_server.captured_requests.clear()
|
||||
mock_ci_server._file_counter = 0
|
||||
mock_ci_server.streaming_enabled = False
|
||||
mock_url = mock_ci_server.url
|
||||
|
||||
user = create_test_user(db_session, "ci_fallback_test")
|
||||
chat_session = create_chat_session(db_session=db_session, user=user)
|
||||
|
||||
code = 'print("fallback test")'
|
||||
msg_req = SendMessageRequest(
|
||||
message="Print fallback test",
|
||||
chat_session_id=chat_session.id,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
original_defaults = ci_mod.CodeInterpreterClient.__init__.__defaults__
|
||||
with (
|
||||
use_mock_llm() as mock_llm,
|
||||
patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
mock_url,
|
||||
),
|
||||
patch(
|
||||
"onyx.tools.tool_implementations.python.code_interpreter_client.CODE_INTERPRETER_BASE_URL",
|
||||
mock_url,
|
||||
),
|
||||
):
|
||||
mock_llm.add_response(
|
||||
LLMToolCallResponse(
|
||||
tool_name="python",
|
||||
tool_call_id="call_fallback",
|
||||
tool_call_argument_tokens=[json.dumps({"code": code})],
|
||||
)
|
||||
)
|
||||
mock_llm.forward_till_end()
|
||||
|
||||
ci_mod.CodeInterpreterClient.__init__.__defaults__ = (mock_url,)
|
||||
try:
|
||||
packets = list(
|
||||
handle_stream_message_objects(
|
||||
new_msg_req=msg_req, user=user, db_session=db_session
|
||||
)
|
||||
)
|
||||
finally:
|
||||
ci_mod.CodeInterpreterClient.__init__.__defaults__ = original_defaults
|
||||
mock_ci_server.streaming_enabled = True
|
||||
|
||||
# Streaming was attempted first (returned 404), then fell back to batch
|
||||
assert (
|
||||
len(mock_ci_server.get_requests(method="POST", path="/v1/execute/stream")) == 1
|
||||
)
|
||||
assert len(mock_ci_server.get_requests(method="POST", path="/v1/execute")) == 1
|
||||
|
||||
# Verify output still made it through
|
||||
delta_packets = [
|
||||
p
|
||||
for p in packets
|
||||
if isinstance(p, Packet) and isinstance(p.obj, PythonToolDelta)
|
||||
]
|
||||
assert len(delta_packets) >= 1
|
||||
first_delta = delta_packets[0].obj
|
||||
assert isinstance(first_delta, PythonToolDelta)
|
||||
assert "mock output" in first_delta.stdout
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Tests that PythonTool.is_available() respects the server_enabled DB flag.
|
||||
|
||||
Uses a real DB session with CODE_INTERPRETER_BASE_URL mocked so the
|
||||
environment-variable check passes and the DB flag is the deciding factor.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.code_interpreter import fetch_code_interpreter_server
|
||||
from onyx.db.code_interpreter import update_code_interpreter_server_enabled
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
|
||||
|
||||
def test_python_tool_unavailable_when_server_disabled(
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""With a valid base URL, the tool should be unavailable when
|
||||
server_enabled is False in the DB."""
|
||||
server = fetch_code_interpreter_server(db_session)
|
||||
initial_enabled = server.server_enabled
|
||||
|
||||
try:
|
||||
update_code_interpreter_server_enabled(db_session, enabled=False)
|
||||
|
||||
with patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
"http://fake:8888",
|
||||
):
|
||||
assert PythonTool.is_available(db_session) is False
|
||||
finally:
|
||||
update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)
|
||||
|
||||
|
||||
def test_python_tool_available_when_server_enabled(
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""With a valid base URL, the tool should be available when
|
||||
server_enabled is True in the DB."""
|
||||
server = fetch_code_interpreter_server(db_session)
|
||||
initial_enabled = server.server_enabled
|
||||
|
||||
try:
|
||||
update_code_interpreter_server_enabled(db_session, enabled=True)
|
||||
|
||||
with patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
"http://fake:8888",
|
||||
):
|
||||
assert PythonTool.is_available(db_session) is True
|
||||
finally:
|
||||
update_code_interpreter_server_enabled(db_session, enabled=initial_enabled)
|
||||
@@ -38,5 +38,5 @@ COPY --from=openapi-client /local/onyx_openapi_client /app/generated/onyx_openap
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
ENTRYPOINT ["pytest", "-s"]
|
||||
ENTRYPOINT ["pytest", "-s", "-rs"]
|
||||
CMD ["/app/tests/integration", "--ignore=/app/tests/integration/multitenant_tests"]
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import time
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from uuid import UUID
|
||||
@@ -8,8 +9,10 @@ from requests.models import CaseInsensitiveDict
|
||||
from ee.onyx.server.query_history.models import ChatSessionMinimal
|
||||
from ee.onyx.server.query_history.models import ChatSessionSnapshot
|
||||
from onyx.configs.constants import QAFeedbackType
|
||||
from onyx.db.enums import TaskStatus
|
||||
from onyx.server.documents.models import PaginatedReturn
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.constants import MAX_DELAY
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
|
||||
@@ -69,9 +72,42 @@ class QueryHistoryManager:
|
||||
if end_time:
|
||||
query_params["end"] = end_time.isoformat()
|
||||
|
||||
response = requests.get(
|
||||
url=f"{API_SERVER_URL}/admin/query-history-csv?{urlencode(query_params, doseq=True)}",
|
||||
start_response = requests.post(
|
||||
url=f"{API_SERVER_URL}/admin/query-history/start-export?{urlencode(query_params, doseq=True)}",
|
||||
headers=user_performing_action.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.headers, response.content.decode()
|
||||
start_response.raise_for_status()
|
||||
request_id = start_response.json()["request_id"]
|
||||
|
||||
deadline = time.time() + MAX_DELAY
|
||||
while time.time() < deadline:
|
||||
status_response = requests.get(
|
||||
url=f"{API_SERVER_URL}/admin/query-history/export-status",
|
||||
params={"request_id": request_id},
|
||||
headers=user_performing_action.headers,
|
||||
)
|
||||
status_response.raise_for_status()
|
||||
status = status_response.json()["status"]
|
||||
if status == TaskStatus.SUCCESS:
|
||||
break
|
||||
if status == TaskStatus.FAILURE:
|
||||
raise RuntimeError("Query history export task failed")
|
||||
time.sleep(2)
|
||||
else:
|
||||
raise TimeoutError(
|
||||
f"Query history export not completed within {MAX_DELAY} seconds"
|
||||
)
|
||||
|
||||
download_response = requests.get(
|
||||
url=f"{API_SERVER_URL}/admin/query-history/download",
|
||||
params={"request_id": request_id},
|
||||
headers=user_performing_action.headers,
|
||||
)
|
||||
download_response.raise_for_status()
|
||||
|
||||
if not download_response.content:
|
||||
raise RuntimeError(
|
||||
"Query history CSV download returned zero-length content"
|
||||
)
|
||||
|
||||
return download_response.headers, download_response.content.decode()
|
||||
|
||||
@@ -6,16 +6,26 @@ import pytest
|
||||
from onyx.connectors.slack.models import ChannelType
|
||||
from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager
|
||||
|
||||
# from tests.load_env_vars import load_env_vars
|
||||
|
||||
# load_env_vars()
|
||||
SLACK_ADMIN_EMAIL = os.environ.get("SLACK_ADMIN_EMAIL", "evan@onyx.app")
|
||||
SLACK_TEST_USER_1_EMAIL = os.environ.get("SLACK_TEST_USER_1_EMAIL", "evan+1@onyx.app")
|
||||
SLACK_TEST_USER_2_EMAIL = os.environ.get("SLACK_TEST_USER_2_EMAIL", "justin@onyx.app")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:
|
||||
slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"])
|
||||
def _provision_slack_channels(
|
||||
bot_token: str,
|
||||
) -> Generator[tuple[ChannelType, ChannelType], None, None]:
|
||||
slack_client = SlackManager.get_slack_client(bot_token)
|
||||
|
||||
auth_info = slack_client.auth_test()
|
||||
print(f"\nSlack workspace: {auth_info.get('team')} ({auth_info.get('url')})")
|
||||
|
||||
user_map = SlackManager.build_slack_user_email_id_map(slack_client)
|
||||
admin_user_id = user_map["admin@example.com"]
|
||||
if SLACK_ADMIN_EMAIL not in user_map:
|
||||
raise KeyError(
|
||||
f"'{SLACK_ADMIN_EMAIL}' not found in Slack workspace. "
|
||||
f"Available emails: {sorted(user_map.keys())}"
|
||||
)
|
||||
admin_user_id = user_map[SLACK_ADMIN_EMAIL]
|
||||
|
||||
(
|
||||
public_channel,
|
||||
@@ -27,5 +37,16 @@ def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]
|
||||
|
||||
yield public_channel, private_channel
|
||||
|
||||
# This part will always run after the test, even if it fails
|
||||
SlackManager.cleanup_after_test(slack_client=slack_client, test_id=run_id)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def slack_test_setup() -> Generator[tuple[ChannelType, ChannelType], None, None]:
|
||||
yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN"])
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def slack_perm_sync_test_setup() -> (
|
||||
Generator[tuple[ChannelType, ChannelType], None, None]
|
||||
):
|
||||
yield from _provision_slack_channels(os.environ["SLACK_BOT_TOKEN_TEST_SPACE"])
|
||||
|
||||
@@ -22,6 +22,9 @@ from tests.integration.common_utils.test_models import DATestConnector
|
||||
from tests.integration.common_utils.test_models import DATestCredential
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
from tests.integration.common_utils.vespa import vespa_fixture
|
||||
from tests.integration.connector_job_tests.slack.conftest import SLACK_ADMIN_EMAIL
|
||||
from tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_1_EMAIL
|
||||
from tests.integration.connector_job_tests.slack.conftest import SLACK_TEST_USER_2_EMAIL
|
||||
from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager
|
||||
|
||||
|
||||
@@ -34,26 +37,24 @@ from tests.integration.connector_job_tests.slack.slack_api_utils import SlackMan
|
||||
def test_slack_permission_sync(
|
||||
reset: None, # noqa: ARG001
|
||||
vespa_client: vespa_fixture, # noqa: ARG001
|
||||
slack_test_setup: tuple[ChannelType, ChannelType],
|
||||
slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],
|
||||
) -> None:
|
||||
public_channel, private_channel = slack_test_setup
|
||||
public_channel, private_channel = slack_perm_sync_test_setup
|
||||
|
||||
# Creating an admin user (first user created is automatically an admin)
|
||||
admin_user: DATestUser = UserManager.create(
|
||||
email="admin@example.com",
|
||||
email=SLACK_ADMIN_EMAIL,
|
||||
)
|
||||
|
||||
# Creating a non-admin user
|
||||
test_user_1: DATestUser = UserManager.create(
|
||||
email="test_user_1@example.com",
|
||||
email=SLACK_TEST_USER_1_EMAIL,
|
||||
)
|
||||
|
||||
# Creating a non-admin user
|
||||
test_user_2: DATestUser = UserManager.create(
|
||||
email="test_user_2@example.com",
|
||||
email=SLACK_TEST_USER_2_EMAIL,
|
||||
)
|
||||
|
||||
slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"])
|
||||
bot_token = os.environ["SLACK_BOT_TOKEN_TEST_SPACE"]
|
||||
slack_client = SlackManager.get_slack_client(bot_token)
|
||||
email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)
|
||||
admin_user_id = email_id_map[admin_user.email]
|
||||
|
||||
@@ -63,7 +64,7 @@ def test_slack_permission_sync(
|
||||
credential: DATestCredential = CredentialManager.create(
|
||||
source=DocumentSource.SLACK,
|
||||
credential_json={
|
||||
"slack_bot_token": os.environ["SLACK_BOT_TOKEN"],
|
||||
"slack_bot_token": bot_token,
|
||||
},
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
@@ -73,6 +74,7 @@ def test_slack_permission_sync(
|
||||
source=DocumentSource.SLACK,
|
||||
connector_specific_config={
|
||||
"channels": [public_channel["name"], private_channel["name"]],
|
||||
"include_bot_messages": True,
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
groups=[],
|
||||
@@ -102,14 +104,11 @@ def test_slack_permission_sync(
|
||||
public_message = "Steve's favorite number is 809752"
|
||||
private_message = "Sara's favorite number is 346794"
|
||||
|
||||
# Add messages to channels
|
||||
print(f"\n Adding public message to channel: {public_message}")
|
||||
SlackManager.add_message_to_channel(
|
||||
slack_client=slack_client,
|
||||
channel=public_channel,
|
||||
message=public_message,
|
||||
)
|
||||
print(f"\n Adding private message to channel: {private_message}")
|
||||
SlackManager.add_message_to_channel(
|
||||
slack_client=slack_client,
|
||||
channel=private_channel,
|
||||
@@ -127,7 +126,9 @@ def test_slack_permission_sync(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Run permission sync
|
||||
# Run permission sync. Since initial_index_should_sync=True for Slack,
|
||||
# permissions were already set during indexing above — the explicit sync
|
||||
# should find no changes to apply.
|
||||
CCPairManager.sync(
|
||||
cc_pair=cc_pair,
|
||||
user_performing_action=admin_user,
|
||||
@@ -135,59 +136,38 @@ def test_slack_permission_sync(
|
||||
CCPairManager.wait_for_sync(
|
||||
cc_pair=cc_pair,
|
||||
after=before,
|
||||
number_of_updated_docs=2,
|
||||
number_of_updated_docs=0,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
should_wait_for_vespa_sync=False,
|
||||
)
|
||||
|
||||
# Search as admin with access to both channels
|
||||
print("\nSearching as admin user")
|
||||
onyx_doc_message_strings = DocumentSearchManager.search_documents(
|
||||
# Verify admin can see messages from both channels
|
||||
admin_docs = DocumentSearchManager.search_documents(
|
||||
query="favorite number",
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
print(
|
||||
"\n documents retrieved by admin user: ",
|
||||
onyx_doc_message_strings,
|
||||
)
|
||||
assert public_message in admin_docs
|
||||
assert private_message in admin_docs
|
||||
|
||||
# Ensure admin user can see messages from both channels
|
||||
assert public_message in onyx_doc_message_strings
|
||||
assert private_message in onyx_doc_message_strings
|
||||
|
||||
# Search as test_user_2 with access to only the public channel
|
||||
print("\n Searching as test_user_2")
|
||||
onyx_doc_message_strings = DocumentSearchManager.search_documents(
|
||||
# Verify test_user_2 can only see public channel messages
|
||||
user_2_docs = DocumentSearchManager.search_documents(
|
||||
query="favorite number",
|
||||
user_performing_action=test_user_2,
|
||||
)
|
||||
print(
|
||||
"\n documents retrieved by test_user_2: ",
|
||||
onyx_doc_message_strings,
|
||||
)
|
||||
assert public_message in user_2_docs
|
||||
assert private_message not in user_2_docs
|
||||
|
||||
# Ensure test_user_2 can only see messages from the public channel
|
||||
assert public_message in onyx_doc_message_strings
|
||||
assert private_message not in onyx_doc_message_strings
|
||||
|
||||
# Search as test_user_1 with access to both channels
|
||||
print("\n Searching as test_user_1")
|
||||
onyx_doc_message_strings = DocumentSearchManager.search_documents(
|
||||
# Verify test_user_1 can see both channels (member of private channel)
|
||||
user_1_docs = DocumentSearchManager.search_documents(
|
||||
query="favorite number",
|
||||
user_performing_action=test_user_1,
|
||||
)
|
||||
print(
|
||||
"\n documents retrieved by test_user_1 before being removed from private channel: ",
|
||||
onyx_doc_message_strings,
|
||||
)
|
||||
assert public_message in user_1_docs
|
||||
assert private_message in user_1_docs
|
||||
|
||||
# Ensure test_user_1 can see messages from both channels
|
||||
assert public_message in onyx_doc_message_strings
|
||||
assert private_message in onyx_doc_message_strings
|
||||
|
||||
# ----------------------MAKE THE CHANGES--------------------------
|
||||
print("\n Removing test_user_1 from the private channel")
|
||||
before = datetime.now(timezone.utc)
|
||||
# Remove test_user_1 from the private channel
|
||||
before = datetime.now(timezone.utc)
|
||||
desired_channel_members = [admin_user]
|
||||
SlackManager.set_channel_members(
|
||||
slack_client=slack_client,
|
||||
@@ -206,24 +186,16 @@ def test_slack_permission_sync(
|
||||
after=before,
|
||||
number_of_updated_docs=1,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
)
|
||||
|
||||
# ----------------------------VERIFY THE CHANGES---------------------------
|
||||
# Ensure test_user_1 can no longer see messages from the private channel
|
||||
# Search as test_user_1 with access to only the public channel
|
||||
|
||||
onyx_doc_message_strings = DocumentSearchManager.search_documents(
|
||||
# Verify test_user_1 can no longer see private channel after removal
|
||||
user_1_docs = DocumentSearchManager.search_documents(
|
||||
query="favorite number",
|
||||
user_performing_action=test_user_1,
|
||||
)
|
||||
print(
|
||||
"\n documents retrieved by test_user_1 after being removed from private channel: ",
|
||||
onyx_doc_message_strings,
|
||||
)
|
||||
|
||||
# Ensure test_user_1 can only see messages from the public channel
|
||||
assert public_message in onyx_doc_message_strings
|
||||
assert private_message not in onyx_doc_message_strings
|
||||
assert public_message in user_1_docs
|
||||
assert private_message not in user_1_docs
|
||||
|
||||
|
||||
# NOTE(rkuo): it isn't yet clear if the reason these were previously xfail'd
|
||||
@@ -235,21 +207,19 @@ def test_slack_permission_sync(
|
||||
def test_slack_group_permission_sync(
|
||||
reset: None, # noqa: ARG001
|
||||
vespa_client: vespa_fixture, # noqa: ARG001
|
||||
slack_test_setup: tuple[ChannelType, ChannelType],
|
||||
slack_perm_sync_test_setup: tuple[ChannelType, ChannelType],
|
||||
) -> None:
|
||||
"""
|
||||
This test ensures that permission sync overrides onyx group access.
|
||||
"""
|
||||
public_channel, private_channel = slack_test_setup
|
||||
public_channel, private_channel = slack_perm_sync_test_setup
|
||||
|
||||
# Creating an admin user (first user created is automatically an admin)
|
||||
admin_user: DATestUser = UserManager.create(
|
||||
email="admin@example.com",
|
||||
email=SLACK_ADMIN_EMAIL,
|
||||
)
|
||||
|
||||
# Creating a non-admin user
|
||||
test_user_1: DATestUser = UserManager.create(
|
||||
email="test_user_1@example.com",
|
||||
email=SLACK_TEST_USER_1_EMAIL,
|
||||
)
|
||||
|
||||
# Create a user group and adding the non-admin user to it
|
||||
@@ -264,7 +234,8 @@ def test_slack_group_permission_sync(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"])
|
||||
bot_token = os.environ["SLACK_BOT_TOKEN_TEST_SPACE"]
|
||||
slack_client = SlackManager.get_slack_client(bot_token)
|
||||
email_id_map = SlackManager.build_slack_user_email_id_map(slack_client)
|
||||
admin_user_id = email_id_map[admin_user.email]
|
||||
|
||||
@@ -282,7 +253,7 @@ def test_slack_group_permission_sync(
|
||||
credential = CredentialManager.create(
|
||||
source=DocumentSource.SLACK,
|
||||
credential_json={
|
||||
"slack_bot_token": os.environ["SLACK_BOT_TOKEN"],
|
||||
"slack_bot_token": bot_token,
|
||||
},
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
@@ -294,6 +265,7 @@ def test_slack_group_permission_sync(
|
||||
source=DocumentSource.SLACK,
|
||||
connector_specific_config={
|
||||
"channels": [private_channel["name"]],
|
||||
"include_bot_messages": True,
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
groups=[user_group.id],
|
||||
@@ -326,7 +298,8 @@ def test_slack_group_permission_sync(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Run permission sync
|
||||
# Run permission sync. Since initial_index_should_sync=True for Slack,
|
||||
# permissions were already set during indexing — no changes expected.
|
||||
CCPairManager.sync(
|
||||
cc_pair=cc_pair,
|
||||
user_performing_action=admin_user,
|
||||
@@ -334,8 +307,10 @@ def test_slack_group_permission_sync(
|
||||
CCPairManager.wait_for_sync(
|
||||
cc_pair=cc_pair,
|
||||
after=before,
|
||||
number_of_updated_docs=1,
|
||||
number_of_updated_docs=0,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
should_wait_for_vespa_sync=False,
|
||||
)
|
||||
|
||||
# Verify admin can see the message
|
||||
|
||||
@@ -5,22 +5,17 @@ from fastapi import FastAPI
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from fastmcp import FastMCP
|
||||
from fastmcp.server.auth import StaticTokenVerifier
|
||||
from fastmcp.server.server import FunctionTool
|
||||
|
||||
|
||||
def make_many_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
def make_tool(i: int) -> FunctionTool:
|
||||
def make_many_tools(mcp: FastMCP) -> None:
|
||||
def make_tool(i: int) -> None:
|
||||
@mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
|
||||
def tool_name(name: str) -> str: # noqa: ARG001
|
||||
"""Get secret value."""
|
||||
return f"Secret value {200 - i}!"
|
||||
|
||||
return tool_name
|
||||
|
||||
tools = []
|
||||
for i in range(100):
|
||||
tools.append(make_tool(i))
|
||||
return tools
|
||||
make_tool(i)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -28,7 +28,6 @@ from fastmcp import FastMCP
|
||||
from fastmcp.server.auth import AccessToken
|
||||
from fastmcp.server.auth import TokenVerifier
|
||||
from fastmcp.server.dependencies import get_access_token
|
||||
from fastmcp.server.server import FunctionTool
|
||||
|
||||
# Google's tokeninfo endpoint for validating access tokens
|
||||
GOOGLE_TOKENINFO_URL = "https://oauth2.googleapis.com/tokeninfo"
|
||||
@@ -148,24 +147,19 @@ class GoogleOAuthTokenVerifier(TokenVerifier):
|
||||
await self._http_client.aclose()
|
||||
|
||||
|
||||
def make_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
def make_tools(mcp: FastMCP) -> None:
|
||||
"""Create test tools for the MCP server."""
|
||||
tools: list[FunctionTool] = []
|
||||
|
||||
@mcp.tool(name="echo", description="Echo back the input message")
|
||||
def echo(message: str) -> str:
|
||||
"""Echo the message back to the caller."""
|
||||
return f"You said: {message}"
|
||||
|
||||
tools.append(echo)
|
||||
|
||||
@mcp.tool(name="get_secret", description="Get a secret value (requires auth)")
|
||||
def get_secret(secret_name: str) -> str:
|
||||
"""Get a secret value. This proves the token was validated."""
|
||||
return f"Secret value for '{secret_name}': super-secret-value-12345"
|
||||
|
||||
tools.append(get_secret)
|
||||
|
||||
@mcp.tool(name="whoami", description="Get information about the authenticated user")
|
||||
async def whoami() -> dict[str, Any]:
|
||||
"""Get information about the authenticated user from their Google token."""
|
||||
@@ -182,9 +176,6 @@ def make_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
"access_type": tok.claims.get("access_type"),
|
||||
}
|
||||
|
||||
tools.append(whoami)
|
||||
|
||||
# Add some numbered tools for testing tool discovery
|
||||
for i in range(5):
|
||||
|
||||
@mcp.tool(name=f"oauth_tool_{i}", description=f"Test tool number {i}")
|
||||
@@ -192,10 +183,6 @@ def make_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
"""A numbered test tool."""
|
||||
return f"Tool {_i} says hello to {name}!"
|
||||
|
||||
tools.append(numbered_tool)
|
||||
|
||||
return tools
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(sys.argv[1] if len(sys.argv) > 1 else "8006")
|
||||
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
import sys
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from fastmcp.server.server import FunctionTool
|
||||
|
||||
mcp = FastMCP("My HTTP MCP")
|
||||
|
||||
@@ -13,19 +12,15 @@ def hello(name: str) -> str:
|
||||
return f"Hello, {name}!"
|
||||
|
||||
|
||||
def make_many_tools() -> list[FunctionTool]:
|
||||
def make_tool(i: int) -> FunctionTool:
|
||||
def make_many_tools() -> None:
|
||||
def make_tool(i: int) -> None:
|
||||
@mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
|
||||
def tool_name(name: str) -> str: # noqa: ARG001
|
||||
"""Get secret value."""
|
||||
return f"Secret value {100 - i}!"
|
||||
|
||||
return tool_name
|
||||
|
||||
tools = []
|
||||
for i in range(100):
|
||||
tools.append(make_tool(i))
|
||||
return tools
|
||||
make_tool(i)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -15,7 +15,6 @@ from fastapi.responses import Response
|
||||
from fastmcp import FastMCP
|
||||
from fastmcp.server.auth.providers.jwt import JWTVerifier
|
||||
from fastmcp.server.dependencies import get_access_token
|
||||
from fastmcp.server.server import FunctionTool
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
# uncomment for debug logs
|
||||
@@ -37,18 +36,15 @@ Enable authorization code and store the client id and secret.
|
||||
"""
|
||||
|
||||
|
||||
def make_many_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
def make_tool(i: int) -> FunctionTool:
|
||||
def make_many_tools(mcp: FastMCP) -> None:
|
||||
def make_tool(i: int) -> None:
|
||||
@mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
|
||||
def tool_name(name: str) -> str: # noqa: ARG001
|
||||
"""Get secret value."""
|
||||
return f"Secret value {500 - i}!"
|
||||
|
||||
return tool_name
|
||||
|
||||
tools = []
|
||||
for i in range(100):
|
||||
tools.append(make_tool(i))
|
||||
make_tool(i)
|
||||
|
||||
@mcp.tool
|
||||
async def whoami() -> dict[str, Any]:
|
||||
@@ -59,9 +55,6 @@ def make_many_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
"claims": tok.claims if tok else {},
|
||||
}
|
||||
|
||||
tools.append(whoami)
|
||||
return tools
|
||||
|
||||
|
||||
# ---------- FASTAPI APP ----------
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from fastmcp import FastMCP
|
||||
from fastmcp.server.auth.auth import AccessToken
|
||||
from fastmcp.server.auth.auth import TokenVerifier
|
||||
from fastmcp.server.dependencies import get_access_token
|
||||
from fastmcp.server.server import FunctionTool
|
||||
|
||||
# pip install fastmcp bcrypt
|
||||
|
||||
@@ -93,19 +92,15 @@ class ApiKeyVerifier(TokenVerifier):
|
||||
# ---- server -----------------------------------------------------------------
|
||||
|
||||
|
||||
def make_many_tools(mcp: FastMCP) -> list[FunctionTool]:
|
||||
def make_tool(i: int) -> FunctionTool:
|
||||
def make_many_tools(mcp: FastMCP) -> None:
|
||||
def make_tool(i: int) -> None:
|
||||
@mcp.tool(name=f"tool_{i}", description=f"Get secret value {i}")
|
||||
def tool_name(name: str) -> str: # noqa: ARG001
|
||||
"""Get secret value."""
|
||||
return f"Secret value {400 - i}!"
|
||||
|
||||
return tool_name
|
||||
|
||||
tools = []
|
||||
for i in range(100):
|
||||
tools.append(make_tool(i))
|
||||
return tools
|
||||
make_tool(i)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -4,75 +4,84 @@ import time
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from onyx.db.chat import delete_chat_session
|
||||
from onyx.db.chat import get_chat_sessions_older_than
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from tests.integration.common_utils.managers.chat import ChatSessionManager
|
||||
from tests.integration.common_utils.managers.settings import SettingsManager
|
||||
from tests.integration.common_utils.test_models import DATestLLMProvider
|
||||
from tests.integration.common_utils.test_models import DATestSettings
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
RETENTION_SECONDS = 10
|
||||
|
||||
|
||||
def _run_ttl_cleanup(retention_days: int) -> None:
|
||||
"""Directly execute TTL cleanup logic, bypassing Celery task infrastructure."""
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
old_chat_sessions = get_chat_sessions_older_than(retention_days, db_session)
|
||||
|
||||
for user_id, session_id in old_chat_sessions:
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
delete_chat_session(
|
||||
user_id,
|
||||
session_id,
|
||||
db_session,
|
||||
include_deleted=True,
|
||||
hard_delete=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Chat retention tests are enterprise only",
|
||||
)
|
||||
def test_chat_retention(reset: None, admin_user: DATestUser) -> None: # noqa: ARG001
|
||||
def test_chat_retention(
|
||||
reset: None, admin_user: DATestUser, llm_provider: DATestLLMProvider # noqa: ARG001
|
||||
) -> None: # noqa: ARG001
|
||||
"""Test that chat sessions are deleted after the retention period expires."""
|
||||
|
||||
# Set chat retention period to 10 seconds
|
||||
retention_days = 10 / 86400 # 10 seconds in days (10 / 24 / 60 / 60)
|
||||
retention_days = RETENTION_SECONDS // 86400
|
||||
settings = DATestSettings(maximum_chat_retention_days=retention_days)
|
||||
SettingsManager.update_settings(settings, user_performing_action=admin_user)
|
||||
|
||||
# Create a chat session
|
||||
chat_session = ChatSessionManager.create(
|
||||
persona_id=0,
|
||||
description="Test chat retention",
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Send a message
|
||||
ChatSessionManager.send_message(
|
||||
response = ChatSessionManager.send_message(
|
||||
chat_session_id=chat_session.id,
|
||||
message="This message should be deleted soon",
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert (
|
||||
response.error is None
|
||||
), f"Chat response should not have an error: {response.error}"
|
||||
|
||||
# Verify the chat session exists
|
||||
chat_history = ChatSessionManager.get_chat_history(
|
||||
chat_session=chat_session,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert len(chat_history) > 0, "Chat session should have messages"
|
||||
|
||||
# Wait for TTL task to run (give it ~60 seconds)
|
||||
print("Waiting for chat retention TTL task to run...")
|
||||
max_wait_time = 60 # maximum time to wait in seconds
|
||||
start_time = time.time()
|
||||
# Wait for the retention period to elapse, then directly run TTL cleanup
|
||||
time.sleep(RETENTION_SECONDS + 2)
|
||||
_run_ttl_cleanup(retention_days)
|
||||
|
||||
# Verify the chat session was deleted
|
||||
session_deleted = False
|
||||
try:
|
||||
chat_history = ChatSessionManager.get_chat_history(
|
||||
chat_session=chat_session,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
session_deleted = len(chat_history) == 0
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code in (404, 400):
|
||||
session_deleted = True
|
||||
else:
|
||||
raise
|
||||
|
||||
while not session_deleted and (time.time() - start_time < max_wait_time):
|
||||
# Check if chat session is deleted
|
||||
try:
|
||||
# Attempt to get chat history - this should 404
|
||||
chat_history = ChatSessionManager.get_chat_history(
|
||||
chat_session=chat_session,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# If we got no messages or an empty response, session might be deleted
|
||||
if not chat_history:
|
||||
session_deleted = True
|
||||
break
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# If we get a 404 or other error, the session is gone
|
||||
if e.response.status_code in (404, 400):
|
||||
session_deleted = True
|
||||
break
|
||||
raise # Re-raise other errors
|
||||
|
||||
# Wait a bit before checking again
|
||||
time.sleep(5)
|
||||
print(f"Waited {time.time() - start_time:.1f} seconds for chat deletion...")
|
||||
|
||||
# Assert that the chat session was deleted
|
||||
assert session_deleted, "Chat session was not deleted within the expected time"
|
||||
assert session_deleted, "Chat session was not deleted after retention period"
|
||||
|
||||
32
backend/tests/integration/tests/code_interpreter/conftest.py
Normal file
32
backend/tests/integration/tests/code_interpreter/conftest.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
CODE_INTERPRETER_URL = f"{API_SERVER_URL}/admin/code-interpreter"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def preserve_code_interpreter_state(
|
||||
admin_user: DATestUser,
|
||||
) -> Generator[None, None, None]:
|
||||
"""Capture the code interpreter enabled state before a test and restore it
|
||||
afterwards, so that tests that toggle the setting cannot leak state."""
|
||||
response = requests.get(
|
||||
CODE_INTERPRETER_URL,
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
initial_enabled = response.json()["enabled"]
|
||||
|
||||
yield
|
||||
|
||||
restore = requests.put(
|
||||
CODE_INTERPRETER_URL,
|
||||
json={"enabled": initial_enabled},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
restore.raise_for_status()
|
||||
@@ -0,0 +1,97 @@
|
||||
import requests
|
||||
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
CODE_INTERPRETER_URL = f"{API_SERVER_URL}/admin/code-interpreter"
|
||||
CODE_INTERPRETER_HEALTH_URL = f"{CODE_INTERPRETER_URL}/health"
|
||||
|
||||
|
||||
def test_get_code_interpreter_health_as_admin(
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Health endpoint should return a JSON object with a 'healthy' boolean."""
|
||||
response = requests.get(
|
||||
CODE_INTERPRETER_HEALTH_URL,
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "healthy" in data
|
||||
assert isinstance(data["healthy"], bool)
|
||||
|
||||
|
||||
def test_get_code_interpreter_status_as_admin(
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""GET endpoint should return a JSON object with an 'enabled' boolean."""
|
||||
response = requests.get(
|
||||
CODE_INTERPRETER_URL,
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "enabled" in data
|
||||
assert isinstance(data["enabled"], bool)
|
||||
|
||||
|
||||
def test_update_code_interpreter_disable_and_enable(
|
||||
admin_user: DATestUser,
|
||||
preserve_code_interpreter_state: None, # noqa: ARG001
|
||||
) -> None:
|
||||
"""PUT endpoint should update the enabled flag and persist across reads."""
|
||||
# Disable
|
||||
response = requests.put(
|
||||
CODE_INTERPRETER_URL,
|
||||
json={"enabled": False},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify disabled
|
||||
response = requests.get(
|
||||
CODE_INTERPRETER_URL,
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["enabled"] is False
|
||||
|
||||
# Re-enable
|
||||
response = requests.put(
|
||||
CODE_INTERPRETER_URL,
|
||||
json={"enabled": True},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify enabled
|
||||
response = requests.get(
|
||||
CODE_INTERPRETER_URL,
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["enabled"] is True
|
||||
|
||||
|
||||
def test_code_interpreter_endpoints_require_admin(
|
||||
basic_user: DATestUser,
|
||||
) -> None:
|
||||
"""All code interpreter endpoints should reject non-admin users."""
|
||||
health_response = requests.get(
|
||||
CODE_INTERPRETER_HEALTH_URL,
|
||||
headers=basic_user.headers,
|
||||
)
|
||||
assert health_response.status_code == 403
|
||||
|
||||
get_response = requests.get(
|
||||
CODE_INTERPRETER_URL,
|
||||
headers=basic_user.headers,
|
||||
)
|
||||
assert get_response.status_code == 403
|
||||
|
||||
put_response = requests.put(
|
||||
CODE_INTERPRETER_URL,
|
||||
json={"enabled": True},
|
||||
headers=basic_user.headers,
|
||||
)
|
||||
assert put_response.status_code == 403
|
||||
@@ -1,195 +0,0 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from onyx.configs.constants import MessageType
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.managers.api_key import APIKeyManager
|
||||
from tests.integration.common_utils.managers.cc_pair import CCPairManager
|
||||
from tests.integration.common_utils.managers.document import DocumentManager
|
||||
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
|
||||
from tests.integration.common_utils.managers.user import UserManager
|
||||
from tests.integration.common_utils.test_models import DATestAPIKey
|
||||
from tests.integration.common_utils.test_models import DATestCCPair
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="/chat/send-message-simple-with-history is enterprise only",
|
||||
)
|
||||
def test_all_stream_chat_message_objects_outputs(reset: None) -> None: # noqa: ARG001
|
||||
# Creating an admin user (first user created is automatically an admin)
|
||||
admin_user: DATestUser = UserManager.create(name="admin_user")
|
||||
|
||||
# create connector
|
||||
cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
api_key: DATestAPIKey = APIKeyManager.create(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
LLMProviderManager.create(user_performing_action=admin_user)
|
||||
|
||||
# SEEDING DOCUMENTS
|
||||
cc_pair_1.documents = []
|
||||
cc_pair_1.documents.append(
|
||||
DocumentManager.seed_doc_with_content(
|
||||
cc_pair=cc_pair_1,
|
||||
content="Pablo's favorite color is blue",
|
||||
api_key=api_key,
|
||||
)
|
||||
)
|
||||
cc_pair_1.documents.append(
|
||||
DocumentManager.seed_doc_with_content(
|
||||
cc_pair=cc_pair_1,
|
||||
content="Chris's favorite color is red",
|
||||
api_key=api_key,
|
||||
)
|
||||
)
|
||||
cc_pair_1.documents.append(
|
||||
DocumentManager.seed_doc_with_content(
|
||||
cc_pair=cc_pair_1,
|
||||
content="Pika's favorite color is green",
|
||||
api_key=api_key,
|
||||
)
|
||||
)
|
||||
|
||||
# TESTING RESPONSE FOR QUESTION 1
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Pablo's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
|
||||
# check that the answer is correct
|
||||
answer_1 = response_json["answer"]
|
||||
assert "blue" in answer_1.lower()
|
||||
|
||||
# FLAKY - check that the llm selected a document
|
||||
# assert 0 in response_json["llm_selected_doc_indices"]
|
||||
|
||||
# check that the final context documents are correct
|
||||
# (it should contain all documents because there arent enough to exclude any)
|
||||
assert 0 in response_json["final_context_doc_indices"]
|
||||
assert 1 in response_json["final_context_doc_indices"]
|
||||
assert 2 in response_json["final_context_doc_indices"]
|
||||
|
||||
# FLAKY - check that the cited documents are correct
|
||||
# assert cc_pair_1.documents[0].id in response_json["cited_documents"].values()
|
||||
|
||||
# flakiness likely due to non-deterministic rephrasing
|
||||
# FLAKY - check that the top documents are correct
|
||||
# assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id
|
||||
print("response 1/3 passed")
|
||||
|
||||
# TESTING RESPONSE FOR QUESTION 2
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Pablo's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
},
|
||||
{
|
||||
"message": answer_1,
|
||||
"role": MessageType.ASSISTANT.value,
|
||||
},
|
||||
{
|
||||
"message": "What is Chris's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
},
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
|
||||
# check that the answer is correct
|
||||
answer_2 = response_json["answer"]
|
||||
assert "red" in answer_2.lower()
|
||||
|
||||
# FLAKY - check that the llm selected a document
|
||||
# assert 0 in response_json["llm_selected_doc_indices"]
|
||||
|
||||
# check that the final context documents are correct
|
||||
# (it should contain all documents because there arent enough to exclude any)
|
||||
assert 0 in response_json["final_context_doc_indices"]
|
||||
assert 1 in response_json["final_context_doc_indices"]
|
||||
assert 2 in response_json["final_context_doc_indices"]
|
||||
|
||||
# FLAKY - check that the cited documents are correct
|
||||
# assert cc_pair_1.documents[1].id in response_json["cited_documents"].values()
|
||||
|
||||
# flakiness likely due to non-deterministic rephrasing
|
||||
# FLAKY - check that the top documents are correct
|
||||
# assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[1].id
|
||||
print("response 2/3 passed")
|
||||
|
||||
# TESTING RESPONSE FOR QUESTION 3
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Pablo's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
},
|
||||
{
|
||||
"message": answer_1,
|
||||
"role": MessageType.ASSISTANT.value,
|
||||
},
|
||||
{
|
||||
"message": "What is Chris's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
},
|
||||
{
|
||||
"message": answer_2,
|
||||
"role": MessageType.ASSISTANT.value,
|
||||
},
|
||||
{
|
||||
"message": "What is Pika's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
},
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
|
||||
# check that the answer is correct
|
||||
answer_3 = response_json["answer"]
|
||||
assert "green" in answer_3.lower()
|
||||
|
||||
# FLAKY - check that the llm selected a document
|
||||
# assert 0 in response_json["llm_selected_doc_indices"]
|
||||
|
||||
# check that the final context documents are correct
|
||||
# (it should contain all documents because there arent enough to exclude any)
|
||||
assert 0 in response_json["final_context_doc_indices"]
|
||||
assert 1 in response_json["final_context_doc_indices"]
|
||||
assert 2 in response_json["final_context_doc_indices"]
|
||||
|
||||
# FLAKY - check that the cited documents are correct
|
||||
# assert cc_pair_1.documents[2].id in response_json["cited_documents"].values()
|
||||
|
||||
# flakiness likely due to non-deterministic rephrasing
|
||||
# FLAKY - check that the top documents are correct
|
||||
# assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id
|
||||
print("response 3/3 passed")
|
||||
@@ -1,250 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from onyx.configs.constants import MessageType
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.constants import NUM_DOCS
|
||||
from tests.integration.common_utils.test_models import DATestLLMProvider
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
from tests.integration.conftest import DocumentBuilderType
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="/chat/send-message-simple-with-history tests are enterprise only",
|
||||
)
|
||||
def test_send_message_simple_with_history(
|
||||
reset: None, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
llm_provider: DATestLLMProvider, # noqa: ARG001
|
||||
document_builder: DocumentBuilderType,
|
||||
) -> None:
|
||||
# create documents using the document builder
|
||||
# Create NUM_DOCS number of documents with dummy content
|
||||
content_list = [f"Document {i} content" for i in range(NUM_DOCS)]
|
||||
docs = document_builder(content_list)
|
||||
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": docs[0].content,
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
response_json = response.json()
|
||||
|
||||
# Check that the top document is the correct document
|
||||
assert response_json["top_documents"][0]["document_id"] == docs[0].id
|
||||
|
||||
# assert that the metadata is correct
|
||||
for doc in docs:
|
||||
found_doc = next(
|
||||
(x for x in response_json["top_documents"] if x["document_id"] == doc.id),
|
||||
None,
|
||||
)
|
||||
assert found_doc
|
||||
assert found_doc["metadata"]["document_id"] == doc.id
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="/chat/send-message-simple-with-history tests are enterprise only",
|
||||
)
|
||||
def test_using_reference_docs_with_simple_with_history_api_flow(
|
||||
reset: None, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
llm_provider: DATestLLMProvider, # noqa: ARG001
|
||||
document_builder: DocumentBuilderType,
|
||||
) -> None:
|
||||
# SEEDING DOCUMENTS
|
||||
docs = document_builder(
|
||||
[
|
||||
"Chris's favorite color is blue",
|
||||
"Hagen's favorite color is red",
|
||||
"Pablo's favorite color is green",
|
||||
]
|
||||
)
|
||||
|
||||
# SEINDING MESSAGE 1
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Pablo's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
|
||||
# get the db_doc_id of the top document to use as a search doc id for second message
|
||||
first_db_doc_id = response_json["top_documents"][0]["db_doc_id"]
|
||||
|
||||
# SEINDING MESSAGE 2
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Pablo's favorite color?",
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
"search_doc_ids": [first_db_doc_id],
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
|
||||
# make sure there is an answer
|
||||
assert response_json["answer"]
|
||||
|
||||
# This ensures the the document we think we are referencing when we send the search_doc_ids in the second
|
||||
# message is the document that we expect it to be
|
||||
assert response_json["top_documents"][0]["document_id"] == docs[2].id
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="We don't support this anymore with the DR flow :(")
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="/chat/send-message-simple-with-history tests are enterprise only",
|
||||
)
|
||||
def test_send_message_simple_with_history_strict_json(
|
||||
reset: None, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
llm_provider: DATestLLMProvider, # noqa: ARG001
|
||||
) -> None:
|
||||
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/chat/send-message-simple-with-history",
|
||||
json={
|
||||
# intentionally not relevant prompt to ensure that the
|
||||
# structured response format is actually used
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is green?",
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
"structured_response_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "presidents",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"presidents": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "List of the first three US presidents",
|
||||
}
|
||||
},
|
||||
"required": ["presidents"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
response_json = response.json()
|
||||
|
||||
# Check that the answer is present
|
||||
assert "answer" in response_json
|
||||
assert response_json["answer"] is not None
|
||||
|
||||
# helper
|
||||
def clean_json_string(json_string: str) -> str:
|
||||
return json_string.strip().removeprefix("```json").removesuffix("```").strip()
|
||||
|
||||
# Attempt to parse the answer as JSON
|
||||
try:
|
||||
clean_answer = clean_json_string(response_json["answer"])
|
||||
parsed_answer = json.loads(clean_answer)
|
||||
|
||||
# NOTE: do not check content, just the structure
|
||||
assert isinstance(parsed_answer, dict)
|
||||
assert "presidents" in parsed_answer
|
||||
assert isinstance(parsed_answer["presidents"], list)
|
||||
for president in parsed_answer["presidents"]:
|
||||
assert isinstance(president, str)
|
||||
except json.JSONDecodeError:
|
||||
assert (
|
||||
False
|
||||
), f"The answer is not a valid JSON object - '{response_json['answer']}'"
|
||||
|
||||
# Check that the answer_citationless is also valid JSON
|
||||
assert "answer_citationless" in response_json
|
||||
assert response_json["answer_citationless"] is not None
|
||||
try:
|
||||
clean_answer_citationless = clean_json_string(
|
||||
response_json["answer_citationless"]
|
||||
)
|
||||
parsed_answer_citationless = json.loads(clean_answer_citationless)
|
||||
assert isinstance(parsed_answer_citationless, dict)
|
||||
except json.JSONDecodeError:
|
||||
assert False, "The answer_citationless is not a valid JSON object"
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="/query/answer-with-citation tests are enterprise only",
|
||||
)
|
||||
def test_answer_with_citation_api(
|
||||
reset: None, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
llm_provider: DATestLLMProvider, # noqa: ARG001
|
||||
document_builder: DocumentBuilderType,
|
||||
) -> None:
|
||||
|
||||
# create docs
|
||||
docs = document_builder(["Chris' favorite color is green"])
|
||||
|
||||
# send a message
|
||||
response = requests.post(
|
||||
f"{API_SERVER_URL}/query/answer-with-citation",
|
||||
json={
|
||||
"messages": [
|
||||
{
|
||||
"message": "What is Chris' favorite color? Make sure to cite the document.",
|
||||
"role": MessageType.USER.value,
|
||||
}
|
||||
],
|
||||
"persona_id": 0,
|
||||
},
|
||||
headers=admin_user.headers,
|
||||
cookies=admin_user.cookies,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
response_json = response.json()
|
||||
assert response_json["answer"]
|
||||
|
||||
has_correct_citation = False
|
||||
for citation in response_json["citations"]:
|
||||
if citation["document_id"] == docs[0].id:
|
||||
has_correct_citation = True
|
||||
break
|
||||
|
||||
assert has_correct_citation
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
@@ -12,6 +11,7 @@ from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.mock_connector.connector import EXTERNAL_USER_EMAILS
|
||||
from onyx.connectors.mock_connector.connector import EXTERNAL_USER_GROUP_IDS
|
||||
from onyx.connectors.mock_connector.connector import MockConnectorCheckpoint
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import InputType
|
||||
from onyx.db.document import get_documents_by_ids
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
@@ -25,128 +25,16 @@ from tests.integration.common_utils.managers.cc_pair import CCPairManager
|
||||
from tests.integration.common_utils.managers.document import DocumentManager
|
||||
from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
|
||||
from tests.integration.common_utils.test_document_utils import create_test_document
|
||||
from tests.integration.common_utils.test_models import DATestCCPair
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
from tests.integration.common_utils.vespa import vespa_fixture
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync is enterprise only",
|
||||
)
|
||||
def test_mock_connector_initial_permission_sync(
|
||||
def _setup_mock_connector(
|
||||
mock_server_client: httpx.Client,
|
||||
vespa_client: vespa_fixture,
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that the MockConnector fetches and sets permissions during initial indexing when AccessType.SYNC is used"""
|
||||
|
||||
# Set up mock server behavior
|
||||
doc_uuid = uuid.uuid4()
|
||||
test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")
|
||||
|
||||
response = mock_server_client.post(
|
||||
"/set-behavior",
|
||||
json=[
|
||||
{
|
||||
"documents": [test_doc.model_dump(mode="json")],
|
||||
"checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
|
||||
mode="json"
|
||||
),
|
||||
"failures": [],
|
||||
}
|
||||
],
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Create CC Pair with SYNC access type to enable permissions during indexing
|
||||
cc_pair = CCPairManager.create_from_scratch(
|
||||
name=f"mock-connector-permissions-{uuid.uuid4()}",
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
input_type=InputType.POLL,
|
||||
connector_specific_config={
|
||||
"mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
|
||||
"mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
|
||||
},
|
||||
access_type=AccessType.SYNC, # This enables permissions during indexing
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Wait for index attempt to start
|
||||
index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Wait for index attempt to finish
|
||||
IndexAttemptManager.wait_for_index_attempt_completion(
|
||||
index_attempt_id=index_attempt.id,
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Validate status
|
||||
finished_index_attempt = IndexAttemptManager.get_index_attempt_by_id(
|
||||
index_attempt_id=index_attempt.id,
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert finished_index_attempt.status == IndexingStatus.SUCCESS
|
||||
|
||||
# Verify document was indexed
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
documents = DocumentManager.fetch_documents_for_cc_pair(
|
||||
cc_pair_id=cc_pair.id,
|
||||
db_session=db_session,
|
||||
vespa_client=vespa_client,
|
||||
)
|
||||
assert len(documents) == 1
|
||||
assert documents[0].id == test_doc.id
|
||||
|
||||
# Verify no errors occurred
|
||||
errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert len(errors) == 0
|
||||
|
||||
# Verify permissions were set during indexing by checking the document in the database
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
db_docs = get_documents_by_ids(
|
||||
db_session=db_session,
|
||||
document_ids=[test_doc.id],
|
||||
)
|
||||
assert len(db_docs) == 1
|
||||
db_doc = db_docs[0]
|
||||
|
||||
assert db_doc.external_user_emails is not None
|
||||
assert db_doc.external_user_group_ids is not None
|
||||
|
||||
# Check the specific permissions that MockConnector sets
|
||||
assert set(db_doc.external_user_emails) == EXTERNAL_USER_EMAILS
|
||||
assert set(db_doc.external_user_group_ids) == EXTERNAL_USER_GROUP_IDS
|
||||
|
||||
# Verify the document is not public (as set by MockConnector)
|
||||
assert db_doc.is_public is False
|
||||
|
||||
# Verify that the cc_pair was marked as permissions synced
|
||||
updated_cc_pair_info = CCPairManager.get_single(
|
||||
cc_pair.id, user_performing_action=admin_user
|
||||
)
|
||||
assert updated_cc_pair_info is not None
|
||||
assert updated_cc_pair_info.last_full_permission_sync is not None
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync attempt tracking is enterprise only",
|
||||
)
|
||||
def test_permission_sync_attempt_tracking_integration(
|
||||
mock_server_client: httpx.Client,
|
||||
vespa_client: vespa_fixture, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that permission sync attempts are properly tracked during real sync workflows."""
|
||||
|
||||
) -> tuple[DATestCCPair, Document]:
|
||||
"""Common setup: create a test doc, configure mock server, create cc_pair, wait for indexing."""
|
||||
doc_uuid = uuid.uuid4()
|
||||
test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")
|
||||
|
||||
@@ -165,7 +53,7 @@ def test_permission_sync_attempt_tracking_integration(
|
||||
assert response.status_code == 200
|
||||
|
||||
cc_pair = CCPairManager.create_from_scratch(
|
||||
name=f"mock-connector-attempt-tracking-{uuid.uuid4()}",
|
||||
name=f"mock-connector-{uuid.uuid4()}",
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
input_type=InputType.POLL,
|
||||
connector_specific_config={
|
||||
@@ -187,6 +75,95 @@ def test_permission_sync_attempt_tracking_integration(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
finished = IndexAttemptManager.get_index_attempt_by_id(
|
||||
index_attempt_id=index_attempt.id,
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert finished.status == IndexingStatus.SUCCESS
|
||||
return cc_pair, test_doc
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync is enterprise only",
|
||||
)
|
||||
def test_mock_connector_initial_permission_sync(
|
||||
mock_server_client: httpx.Client,
|
||||
vespa_client: vespa_fixture,
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that the MockConnector fetches and sets permissions during initial indexing
|
||||
when AccessType.SYNC is used."""
|
||||
|
||||
cc_pair, test_doc = _setup_mock_connector(mock_server_client, admin_user)
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
documents = DocumentManager.fetch_documents_for_cc_pair(
|
||||
cc_pair_id=cc_pair.id,
|
||||
db_session=db_session,
|
||||
vespa_client=vespa_client,
|
||||
)
|
||||
assert len(documents) == 1
|
||||
assert documents[0].id == test_doc.id
|
||||
|
||||
errors = IndexAttemptManager.get_index_attempt_errors_for_cc_pair(
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
assert len(errors) == 0
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
db_docs = get_documents_by_ids(
|
||||
db_session=db_session,
|
||||
document_ids=[test_doc.id],
|
||||
)
|
||||
assert len(db_docs) == 1
|
||||
db_doc = db_docs[0]
|
||||
|
||||
assert db_doc.external_user_emails is not None
|
||||
assert db_doc.external_user_group_ids is not None
|
||||
assert set(db_doc.external_user_emails) == EXTERNAL_USER_EMAILS
|
||||
assert set(db_doc.external_user_group_ids) == EXTERNAL_USER_GROUP_IDS
|
||||
assert db_doc.is_public is False
|
||||
|
||||
# After initial indexing, the beat task detects last_time_perm_sync is None
|
||||
# and triggers a doc permission sync. Explicitly trigger it to avoid
|
||||
# waiting for the 30s beat interval.
|
||||
before = datetime.now(timezone.utc)
|
||||
CCPairManager.sync(
|
||||
cc_pair=cc_pair,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
CCPairManager.wait_for_sync(
|
||||
cc_pair=cc_pair,
|
||||
after=before,
|
||||
number_of_updated_docs=1,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
should_wait_for_vespa_sync=False,
|
||||
)
|
||||
|
||||
updated_cc_pair_info = CCPairManager.get_single(
|
||||
cc_pair.id, user_performing_action=admin_user
|
||||
)
|
||||
assert updated_cc_pair_info is not None
|
||||
assert updated_cc_pair_info.last_full_permission_sync is not None
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync attempt tracking is enterprise only",
|
||||
)
|
||||
def test_permission_sync_attempt_tracking_integration(
|
||||
mock_server_client: httpx.Client,
|
||||
vespa_client: vespa_fixture, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that permission sync attempts are properly tracked during real sync workflows."""
|
||||
|
||||
cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)
|
||||
|
||||
before = datetime.now(timezone.utc)
|
||||
CCPairManager.sync(
|
||||
cc_pair=cc_pair,
|
||||
@@ -198,6 +175,8 @@ def test_permission_sync_attempt_tracking_integration(
|
||||
after=before,
|
||||
number_of_updated_docs=1,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
should_wait_for_vespa_sync=False,
|
||||
)
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
@@ -219,88 +198,6 @@ def test_permission_sync_attempt_tracking_integration(
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync attempt tracking is enterprise only",
|
||||
)
|
||||
def test_permission_sync_attempt_tracking_with_mocked_failure(
|
||||
mock_server_client: httpx.Client,
|
||||
vespa_client: vespa_fixture, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that permission sync attempts are properly tracked when sync fails."""
|
||||
|
||||
doc_uuid = uuid.uuid4()
|
||||
test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")
|
||||
|
||||
response = mock_server_client.post(
|
||||
"/set-behavior",
|
||||
json=[
|
||||
{
|
||||
"documents": [test_doc.model_dump(mode="json")],
|
||||
"checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
|
||||
mode="json"
|
||||
),
|
||||
"failures": [],
|
||||
}
|
||||
],
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
cc_pair = CCPairManager.create_from_scratch(
|
||||
name=f"mock-connector-attempt-failure-{uuid.uuid4()}",
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
input_type=InputType.POLL,
|
||||
connector_specific_config={
|
||||
"mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
|
||||
"mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
IndexAttemptManager.wait_for_index_attempt_completion(
|
||||
index_attempt_id=index_attempt.id,
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Mock the permission sync to force a failure and verify attempt tracking
|
||||
with patch(
|
||||
"ee.onyx.background.celery.tasks.doc_permission_syncing.tasks.validate_ccpair_for_user"
|
||||
) as mock_validate:
|
||||
mock_validate.side_effect = Exception("Validation failed for testing")
|
||||
|
||||
try:
|
||||
before = datetime.now(timezone.utc)
|
||||
CCPairManager.sync(
|
||||
cc_pair=cc_pair,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
CCPairManager.wait_for_sync(
|
||||
cc_pair=cc_pair,
|
||||
after=before,
|
||||
number_of_updated_docs=0,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
attempt = db_session.execute(
|
||||
select(DocPermissionSyncAttempt).where(
|
||||
DocPermissionSyncAttempt.connector_credential_pair_id == cc_pair.id
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
assert attempt.status == PermissionSyncStatus.FAILED
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
|
||||
reason="Permission sync attempt tracking is enterprise only",
|
||||
@@ -311,45 +208,8 @@ def test_permission_sync_attempt_status_success(
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Test that permission sync attempts are marked as SUCCESS when sync completes without errors."""
|
||||
doc_uuid = uuid.uuid4()
|
||||
test_doc = create_test_document(doc_id=f"test-doc-{doc_uuid}")
|
||||
|
||||
response = mock_server_client.post(
|
||||
"/set-behavior",
|
||||
json=[
|
||||
{
|
||||
"documents": [test_doc.model_dump(mode="json")],
|
||||
"checkpoint": MockConnectorCheckpoint(has_more=False).model_dump(
|
||||
mode="json"
|
||||
),
|
||||
"failures": [],
|
||||
}
|
||||
],
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
cc_pair = CCPairManager.create_from_scratch(
|
||||
name=f"mock-connector-success-{uuid.uuid4()}",
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
input_type=InputType.POLL,
|
||||
connector_specific_config={
|
||||
"mock_server_host": MOCK_CONNECTOR_SERVER_HOST,
|
||||
"mock_server_port": MOCK_CONNECTOR_SERVER_PORT,
|
||||
},
|
||||
access_type=AccessType.SYNC,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
index_attempt = IndexAttemptManager.wait_for_index_attempt_start(
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
IndexAttemptManager.wait_for_index_attempt_completion(
|
||||
index_attempt_id=index_attempt.id,
|
||||
cc_pair_id=cc_pair.id,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
cc_pair, _test_doc = _setup_mock_connector(mock_server_client, admin_user)
|
||||
|
||||
before = datetime.now(timezone.utc)
|
||||
CCPairManager.sync(
|
||||
@@ -362,6 +222,8 @@ def test_permission_sync_attempt_status_success(
|
||||
after=before,
|
||||
number_of_updated_docs=1,
|
||||
user_performing_action=admin_user,
|
||||
should_wait_for_group_sync=False,
|
||||
should_wait_for_vespa_sync=False,
|
||||
)
|
||||
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
|
||||
@@ -6,11 +6,14 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.context.search.enums import RecencyBiasSetting
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.enums import LLMModelFlowType
|
||||
from onyx.db.llm import can_user_access_llm_provider
|
||||
from onyx.db.llm import fetch_user_group_ids
|
||||
from onyx.db.models import LLMModelFlow
|
||||
from onyx.db.models import LLMProvider as LLMProviderModel
|
||||
from onyx.db.models import LLMProvider__Persona
|
||||
from onyx.db.models import LLMProvider__UserGroup
|
||||
from onyx.db.models import ModelConfiguration
|
||||
from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.db.models import User__UserGroup
|
||||
@@ -267,6 +270,24 @@ def test_get_llm_for_persona_falls_back_when_access_denied(
|
||||
provider_name=restricted_provider.name,
|
||||
)
|
||||
|
||||
# Set up ModelConfiguration + LLMModelFlow so get_default_llm() can
|
||||
# resolve the default provider when the fallback path is triggered.
|
||||
default_model_config = ModelConfiguration(
|
||||
llm_provider_id=default_provider.id,
|
||||
name=default_provider.default_model_name,
|
||||
is_visible=True,
|
||||
)
|
||||
db_session.add(default_model_config)
|
||||
db_session.flush()
|
||||
db_session.add(
|
||||
LLMModelFlow(
|
||||
model_configuration_id=default_model_config.id,
|
||||
llm_model_flow_type=LLMModelFlowType.CHAT,
|
||||
is_default=True,
|
||||
)
|
||||
)
|
||||
db_session.flush()
|
||||
|
||||
access_group = UserGroup(name="persona-group")
|
||||
db_session.add(access_group)
|
||||
db_session.flush()
|
||||
|
||||
@@ -0,0 +1,322 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from onyx.configs import app_configs
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.tools.constants import SEARCH_TOOL_ID
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.managers.cc_pair import CCPairManager
|
||||
from tests.integration.common_utils.managers.chat import ChatSessionManager
|
||||
from tests.integration.common_utils.managers.tool import ToolManager
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
from tests.integration.common_utils.test_models import ToolName
|
||||
|
||||
|
||||
_ENV_PROVIDER = "NIGHTLY_LLM_PROVIDER"
|
||||
_ENV_MODELS = "NIGHTLY_LLM_MODELS"
|
||||
_ENV_API_KEY = "NIGHTLY_LLM_API_KEY"
|
||||
_ENV_API_BASE = "NIGHTLY_LLM_API_BASE"
|
||||
_ENV_CUSTOM_CONFIG_JSON = "NIGHTLY_LLM_CUSTOM_CONFIG_JSON"
|
||||
_ENV_STRICT = "NIGHTLY_LLM_STRICT"
|
||||
|
||||
|
||||
class NightlyProviderConfig(BaseModel):
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
provider: str
|
||||
model_names: list[str]
|
||||
api_key: str | None
|
||||
api_base: str | None
|
||||
custom_config: dict[str, str] | None
|
||||
strict: bool
|
||||
|
||||
|
||||
def _env_true(env_var: str, default: bool = False) -> bool:
|
||||
value = os.environ.get(env_var)
|
||||
if value is None:
|
||||
return default
|
||||
return value.strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _split_csv_env(env_var: str) -> list[str]:
|
||||
return [
|
||||
part.strip() for part in os.environ.get(env_var, "").split(",") if part.strip()
|
||||
]
|
||||
|
||||
|
||||
def _load_provider_config() -> NightlyProviderConfig:
|
||||
provider = os.environ.get(_ENV_PROVIDER, "").strip().lower()
|
||||
model_names = _split_csv_env(_ENV_MODELS)
|
||||
api_key = os.environ.get(_ENV_API_KEY) or None
|
||||
api_base = os.environ.get(_ENV_API_BASE) or None
|
||||
strict = _env_true(_ENV_STRICT, default=False)
|
||||
|
||||
custom_config: dict[str, str] | None = None
|
||||
custom_config_json = os.environ.get(_ENV_CUSTOM_CONFIG_JSON, "").strip()
|
||||
if custom_config_json:
|
||||
parsed = json.loads(custom_config_json)
|
||||
if not isinstance(parsed, dict):
|
||||
raise ValueError(f"{_ENV_CUSTOM_CONFIG_JSON} must be a JSON object")
|
||||
custom_config = {str(key): str(value) for key, value in parsed.items()}
|
||||
|
||||
if provider == "ollama_chat" and api_key and not custom_config:
|
||||
custom_config = {"OLLAMA_API_KEY": api_key}
|
||||
|
||||
return NightlyProviderConfig(
|
||||
provider=provider,
|
||||
model_names=model_names,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
custom_config=custom_config,
|
||||
strict=strict,
|
||||
)
|
||||
|
||||
|
||||
def _skip_or_fail(strict: bool, message: str) -> None:
|
||||
if strict:
|
||||
pytest.fail(message)
|
||||
pytest.skip(message)
|
||||
|
||||
|
||||
def _validate_provider_config(config: NightlyProviderConfig) -> None:
|
||||
if not config.provider:
|
||||
_skip_or_fail(strict=config.strict, message=f"{_ENV_PROVIDER} must be set")
|
||||
|
||||
if not config.model_names:
|
||||
_skip_or_fail(
|
||||
strict=config.strict,
|
||||
message=f"{_ENV_MODELS} must include at least one model",
|
||||
)
|
||||
|
||||
if config.provider != "ollama_chat" and not config.api_key:
|
||||
_skip_or_fail(
|
||||
strict=config.strict,
|
||||
message=(f"{_ENV_API_KEY} is required for provider '{config.provider}'"),
|
||||
)
|
||||
|
||||
if config.provider == "ollama_chat" and not (
|
||||
config.api_base or _default_api_base_for_provider(config.provider)
|
||||
):
|
||||
_skip_or_fail(
|
||||
strict=config.strict,
|
||||
message=(f"{_ENV_API_BASE} is required for provider '{config.provider}'"),
|
||||
)
|
||||
|
||||
|
||||
def _assert_integration_mode_enabled() -> None:
|
||||
assert (
|
||||
app_configs.INTEGRATION_TESTS_MODE is True
|
||||
), "Integration tests require INTEGRATION_TESTS_MODE=true."
|
||||
|
||||
|
||||
def _seed_connector_for_search_tool(admin_user: DATestUser) -> None:
|
||||
# SearchTool is only exposed when at least one non-default connector exists.
|
||||
CCPairManager.create_from_scratch(
|
||||
source=DocumentSource.INGESTION_API,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
|
||||
def _get_internal_search_tool_id(admin_user: DATestUser) -> int:
|
||||
tools = ToolManager.list_tools(user_performing_action=admin_user)
|
||||
for tool in tools:
|
||||
if tool.in_code_tool_id == SEARCH_TOOL_ID:
|
||||
return tool.id
|
||||
raise AssertionError("SearchTool must exist for this test")
|
||||
|
||||
|
||||
def _default_api_base_for_provider(provider: str) -> str | None:
|
||||
if provider == "openrouter":
|
||||
return "https://openrouter.ai/api/v1"
|
||||
if provider == "ollama_chat":
|
||||
# host.docker.internal works when tests are running inside the integration test container.
|
||||
return "http://host.docker.internal:11434"
|
||||
return None
|
||||
|
||||
|
||||
def _create_provider_payload(
|
||||
provider: str,
|
||||
provider_name: str,
|
||||
model_name: str,
|
||||
api_key: str | None,
|
||||
api_base: str | None,
|
||||
custom_config: dict[str, str] | None,
|
||||
) -> dict:
|
||||
return {
|
||||
"name": provider_name,
|
||||
"provider": provider,
|
||||
"api_key": api_key,
|
||||
"api_base": api_base,
|
||||
"custom_config": custom_config,
|
||||
"default_model_name": model_name,
|
||||
"is_public": True,
|
||||
"groups": [],
|
||||
"personas": [],
|
||||
"model_configurations": [{"name": model_name, "is_visible": True}],
|
||||
"api_key_changed": bool(api_key),
|
||||
"custom_config_changed": bool(custom_config),
|
||||
}
|
||||
|
||||
|
||||
def _ensure_provider_is_default(provider_id: int, admin_user: DATestUser) -> None:
|
||||
list_response = requests.get(
|
||||
f"{API_SERVER_URL}/admin/llm/provider",
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
list_response.raise_for_status()
|
||||
providers = list_response.json()
|
||||
|
||||
current_default = next(
|
||||
(provider for provider in providers if provider.get("is_default_provider")),
|
||||
None,
|
||||
)
|
||||
assert (
|
||||
current_default is not None
|
||||
), "Expected a default provider after setting provider as default"
|
||||
assert (
|
||||
current_default["id"] == provider_id
|
||||
), f"Expected provider {provider_id} to be default, found {current_default['id']}"
|
||||
|
||||
|
||||
def _run_chat_assertions(
|
||||
admin_user: DATestUser,
|
||||
search_tool_id: int,
|
||||
provider: str,
|
||||
model_name: str,
|
||||
) -> None:
|
||||
last_error: str | None = None
|
||||
# Retry once to reduce transient nightly flakes due provider-side blips.
|
||||
for attempt in range(1, 3):
|
||||
chat_session = ChatSessionManager.create(user_performing_action=admin_user)
|
||||
|
||||
response = ChatSessionManager.send_message(
|
||||
chat_session_id=chat_session.id,
|
||||
message=(
|
||||
"Use internal_search to search for 'nightly-provider-regression-sentinel', "
|
||||
"then summarize the result in one short sentence."
|
||||
),
|
||||
user_performing_action=admin_user,
|
||||
forced_tool_ids=[search_tool_id],
|
||||
)
|
||||
|
||||
if response.error is None:
|
||||
used_internal_search = any(
|
||||
used_tool.tool_name == ToolName.INTERNAL_SEARCH
|
||||
for used_tool in response.used_tools
|
||||
)
|
||||
debug_has_internal_search = any(
|
||||
debug_tool_call.tool_name == "internal_search"
|
||||
for debug_tool_call in response.tool_call_debug
|
||||
)
|
||||
has_answer = bool(response.full_message.strip())
|
||||
|
||||
if used_internal_search and debug_has_internal_search and has_answer:
|
||||
return
|
||||
|
||||
last_error = (
|
||||
f"attempt={attempt} provider={provider} model={model_name} "
|
||||
f"used_internal_search={used_internal_search} "
|
||||
f"debug_internal_search={debug_has_internal_search} "
|
||||
f"has_answer={has_answer} "
|
||||
f"tool_call_debug={response.tool_call_debug}"
|
||||
)
|
||||
else:
|
||||
last_error = (
|
||||
f"attempt={attempt} provider={provider} model={model_name} "
|
||||
f"stream_error={response.error.error}"
|
||||
)
|
||||
|
||||
time.sleep(attempt)
|
||||
|
||||
pytest.fail(f"Chat/tool-call assertions failed: {last_error}")
|
||||
|
||||
|
||||
def _create_and_test_provider_for_model(
|
||||
admin_user: DATestUser,
|
||||
config: NightlyProviderConfig,
|
||||
model_name: str,
|
||||
search_tool_id: int,
|
||||
) -> None:
|
||||
provider_name = f"nightly-{config.provider}-{uuid4().hex[:12]}"
|
||||
resolved_api_base = config.api_base or _default_api_base_for_provider(
|
||||
config.provider
|
||||
)
|
||||
|
||||
provider_payload = _create_provider_payload(
|
||||
provider=config.provider,
|
||||
provider_name=provider_name,
|
||||
model_name=model_name,
|
||||
api_key=config.api_key,
|
||||
api_base=resolved_api_base,
|
||||
custom_config=config.custom_config,
|
||||
)
|
||||
|
||||
test_response = requests.post(
|
||||
f"{API_SERVER_URL}/admin/llm/test",
|
||||
headers=admin_user.headers,
|
||||
json=provider_payload,
|
||||
)
|
||||
assert test_response.status_code == 200, (
|
||||
f"Provider test endpoint failed for provider={config.provider} "
|
||||
f"model={model_name}: {test_response.status_code} {test_response.text}"
|
||||
)
|
||||
|
||||
create_response = requests.put(
|
||||
f"{API_SERVER_URL}/admin/llm/provider?is_creation=true",
|
||||
headers=admin_user.headers,
|
||||
json=provider_payload,
|
||||
)
|
||||
assert create_response.status_code == 200, (
|
||||
f"Provider creation failed for provider={config.provider} "
|
||||
f"model={model_name}: {create_response.status_code} {create_response.text}"
|
||||
)
|
||||
provider_id = create_response.json()["id"]
|
||||
|
||||
try:
|
||||
set_default_response = requests.post(
|
||||
f"{API_SERVER_URL}/admin/llm/provider/{provider_id}/default",
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
assert set_default_response.status_code == 200, (
|
||||
f"Setting default provider failed for provider={config.provider} "
|
||||
f"model={model_name}: {set_default_response.status_code} "
|
||||
f"{set_default_response.text}"
|
||||
)
|
||||
|
||||
_ensure_provider_is_default(provider_id=provider_id, admin_user=admin_user)
|
||||
_run_chat_assertions(
|
||||
admin_user=admin_user,
|
||||
search_tool_id=search_tool_id,
|
||||
provider=config.provider,
|
||||
model_name=model_name,
|
||||
)
|
||||
finally:
|
||||
requests.delete(
|
||||
f"{API_SERVER_URL}/admin/llm/provider/{provider_id}",
|
||||
headers=admin_user.headers,
|
||||
)
|
||||
|
||||
|
||||
def test_nightly_provider_chat_workflow(admin_user: DATestUser) -> None:
|
||||
"""Nightly regression test for provider setup + default selection + chat tool calls."""
|
||||
_assert_integration_mode_enabled()
|
||||
config = _load_provider_config()
|
||||
_validate_provider_config(config)
|
||||
|
||||
_seed_connector_for_search_tool(admin_user)
|
||||
search_tool_id = _get_internal_search_tool_id(admin_user)
|
||||
|
||||
for model_name in config.model_names:
|
||||
_create_and_test_provider_for_model(
|
||||
admin_user=admin_user,
|
||||
config=config,
|
||||
model_name=model_name,
|
||||
search_tool_id=search_tool_id,
|
||||
)
|
||||
@@ -6,7 +6,7 @@ the permissions of the curator manipulating connector-credential pairs.
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from requests.exceptions import HTTPError
|
||||
from onyx_openapi_client.exceptions import ApiException # type: ignore[import-untyped,unused-ignore,import-not-found]
|
||||
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.server.documents.models import DocumentSource
|
||||
@@ -93,20 +93,9 @@ def test_cc_pair_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
"""Tests for things Curators should not be able to do"""
|
||||
|
||||
# Curators should not be able to create a public cc pair
|
||||
with pytest.raises(HTTPError):
|
||||
CCPairManager.create(
|
||||
connector_id=connector_1.id,
|
||||
credential_id=credential_1.id,
|
||||
name="invalid_cc_pair_1",
|
||||
access_type=AccessType.PUBLIC,
|
||||
groups=[user_group_1.id],
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
# Curators should not be able to create a cc
|
||||
# pair for a user group they are not a curator of
|
||||
with pytest.raises(HTTPError):
|
||||
with pytest.raises(ApiException):
|
||||
CCPairManager.create(
|
||||
connector_id=connector_1.id,
|
||||
credential_id=credential_1.id,
|
||||
@@ -118,7 +107,7 @@ def test_cc_pair_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
# Curators should not be able to create a cc
|
||||
# pair without an attached user group
|
||||
with pytest.raises(HTTPError):
|
||||
with pytest.raises(ApiException):
|
||||
CCPairManager.create(
|
||||
connector_id=connector_1.id,
|
||||
credential_id=credential_1.id,
|
||||
@@ -144,7 +133,7 @@ def test_cc_pair_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
# Curators should not be able to create a cc
|
||||
# pair for a user group that the credential does not belong to
|
||||
with pytest.raises(HTTPError):
|
||||
with pytest.raises(ApiException):
|
||||
CCPairManager.create(
|
||||
connector_id=connector_1.id,
|
||||
credential_id=credential_2.id,
|
||||
@@ -156,6 +145,16 @@ def test_cc_pair_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
"""Tests for things Curators should be able to do"""
|
||||
|
||||
# Re-create connector since the credential_2 validation error above
|
||||
# triggers connector deletion in the exception handler
|
||||
connector_1 = ConnectorManager.create(
|
||||
name="admin_owned_connector_2",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
access_type=AccessType.PRIVATE,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
# Curators should be able to create a private
|
||||
# cc pair for a user group they are a curator of
|
||||
valid_cc_pair = CCPairManager.create(
|
||||
|
||||
@@ -59,17 +59,7 @@ def test_connector_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
"""Tests for things Curators should not be able to do"""
|
||||
|
||||
# Curators should not be able to create a public connector
|
||||
with pytest.raises(HTTPError):
|
||||
ConnectorManager.create(
|
||||
name="invalid_connector_1",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
access_type=AccessType.PUBLIC,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
# Curators should not be able to create a cc pair for a
|
||||
# Curators should not be able to create a connector for a
|
||||
# user group they are not a curator of
|
||||
with pytest.raises(HTTPError):
|
||||
ConnectorManager.create(
|
||||
@@ -133,12 +123,12 @@ def test_connector_permissions(reset: None) -> None: # noqa: ARG001
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
# Test that curator cannot create a public connector
|
||||
with pytest.raises(HTTPError):
|
||||
ConnectorManager.create(
|
||||
name="invalid_connector_4",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
access_type=AccessType.PUBLIC,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
# Curators should be able to create a public connector
|
||||
public_connector = ConnectorManager.create(
|
||||
name="curator_public_connector",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
access_type=AccessType.PUBLIC,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
assert public_connector.id is not None
|
||||
|
||||
@@ -58,16 +58,6 @@ def test_credential_permissions(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
"""Tests for things Curators should not be able to do"""
|
||||
|
||||
# Curators should not be able to create a public credential
|
||||
with pytest.raises(HTTPError):
|
||||
CredentialManager.create(
|
||||
name="invalid_credential_1",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
curator_public=True,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
# Curators should not be able to create a credential for a user group they are not a curator of
|
||||
with pytest.raises(HTTPError):
|
||||
CredentialManager.create(
|
||||
@@ -113,3 +103,16 @@ def test_credential_permissions(reset: None) -> None: # noqa: ARG001
|
||||
verify_deleted=True,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
# Curators should be able to create a public credential
|
||||
public_credential = CredentialManager.create(
|
||||
name="curator_public_credential",
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
groups=[user_group_1.id],
|
||||
curator_public=True,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
CredentialManager.verify(
|
||||
credential=public_credential,
|
||||
user_performing_action=curator,
|
||||
)
|
||||
|
||||
@@ -70,10 +70,11 @@ def test_doc_set_permissions_setup(reset: None) -> None: # noqa: ARG001
|
||||
|
||||
"""Tests for things Curators/Admins should not be able to do"""
|
||||
|
||||
# Test that curator cannot create a document set for the group they don't curate
|
||||
# Test that curator cannot create a non-public document set for the group they don't curate
|
||||
with pytest.raises(HTTPError):
|
||||
DocumentSetManager.create(
|
||||
name="Invalid Document Set 1",
|
||||
is_public=False,
|
||||
groups=[user_group_2.id],
|
||||
cc_pair_ids=[public_cc_pair.id],
|
||||
user_performing_action=curator,
|
||||
|
||||
@@ -6,12 +6,14 @@ from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from io import BytesIO
|
||||
from io import StringIO
|
||||
from uuid import UUID
|
||||
from zipfile import ZipFile
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from ee.onyx.db.usage_export import UsageReportMetadata
|
||||
from onyx.configs.constants import DEFAULT_PERSONA_ID
|
||||
from onyx.db.seeding.chat_history_seeding import seed_chat_history
|
||||
from tests.integration.common_utils.constants import API_SERVER_URL
|
||||
from tests.integration.common_utils.test_models import DATestUser
|
||||
@@ -26,7 +28,13 @@ class TestUsageExportAPI:
|
||||
self, reset: None, admin_user: DATestUser # noqa: ARG002
|
||||
) -> None:
|
||||
# Seed some chat history data for the report
|
||||
seed_chat_history(num_sessions=10, num_messages=4, days=30)
|
||||
seed_chat_history(
|
||||
num_sessions=10,
|
||||
num_messages=4,
|
||||
days=30,
|
||||
user_id=UUID(admin_user.id),
|
||||
persona_id=DEFAULT_PERSONA_ID,
|
||||
)
|
||||
|
||||
# Get initial list of reports
|
||||
initial_response = requests.get(
|
||||
@@ -76,7 +84,13 @@ class TestUsageExportAPI:
|
||||
self, reset: None, admin_user: DATestUser # noqa: ARG002
|
||||
) -> None:
|
||||
# Seed some chat history data
|
||||
seed_chat_history(num_sessions=20, num_messages=4, days=60)
|
||||
seed_chat_history(
|
||||
num_sessions=20,
|
||||
num_messages=4,
|
||||
days=60,
|
||||
user_id=UUID(admin_user.id),
|
||||
persona_id=DEFAULT_PERSONA_ID,
|
||||
)
|
||||
|
||||
# Get initial list of reports
|
||||
initial_response = requests.get(
|
||||
@@ -148,7 +162,13 @@ class TestUsageExportAPI:
|
||||
self, reset: None, admin_user: DATestUser # noqa: ARG002
|
||||
) -> None:
|
||||
# First generate a report to ensure we have at least one
|
||||
seed_chat_history(num_sessions=5, num_messages=4, days=30)
|
||||
seed_chat_history(
|
||||
num_sessions=5,
|
||||
num_messages=4,
|
||||
days=30,
|
||||
user_id=UUID(admin_user.id),
|
||||
persona_id=DEFAULT_PERSONA_ID,
|
||||
)
|
||||
|
||||
# Get initial count
|
||||
initial_response = requests.get(
|
||||
@@ -204,7 +224,13 @@ class TestUsageExportAPI:
|
||||
self, reset: None, admin_user: DATestUser # noqa: ARG002
|
||||
) -> None:
|
||||
# First generate a report
|
||||
seed_chat_history(num_sessions=5, num_messages=4, days=30)
|
||||
seed_chat_history(
|
||||
num_sessions=5,
|
||||
num_messages=4,
|
||||
days=30,
|
||||
user_id=UUID(admin_user.id),
|
||||
persona_id=DEFAULT_PERSONA_ID,
|
||||
)
|
||||
|
||||
# Get initial reports count
|
||||
initial_response = requests.get(
|
||||
@@ -352,7 +378,13 @@ class TestUsageExportAPI:
|
||||
self, reset: None, admin_user: DATestUser # noqa: ARG002
|
||||
) -> None:
|
||||
# Seed some data
|
||||
seed_chat_history(num_sessions=10, num_messages=4, days=30)
|
||||
seed_chat_history(
|
||||
num_sessions=10,
|
||||
num_messages=4,
|
||||
days=30,
|
||||
user_id=UUID(admin_user.id),
|
||||
persona_id=DEFAULT_PERSONA_ID,
|
||||
)
|
||||
|
||||
# Get initial count of reports
|
||||
initial_response = requests.get(
|
||||
|
||||
@@ -25,6 +25,11 @@ def test_add_users_to_group(reset: None) -> None: # noqa: ARG001
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
UserGroupManager.wait_for_sync(
|
||||
user_performing_action=admin_user,
|
||||
user_groups_to_check=[user_group],
|
||||
)
|
||||
|
||||
updated_user_group = UserGroupManager.add_users(
|
||||
user_group=user_group,
|
||||
user_ids=[user_to_add.id],
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
_user_file_project_sync_queued_key,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
check_for_user_file_project_sync,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
enqueue_user_file_project_sync_task,
|
||||
)
|
||||
from onyx.background.celery.tasks.user_file_processing.tasks import (
|
||||
process_single_user_file_project_sync,
|
||||
)
|
||||
from onyx.configs.constants import CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH
|
||||
|
||||
|
||||
def _build_redis_mock_with_lock() -> tuple[MagicMock, MagicMock]:
|
||||
redis_client = MagicMock()
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = True
|
||||
lock.owned.return_value = True
|
||||
redis_client.lock.return_value = lock
|
||||
return redis_client, lock
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.background.celery.tasks.user_file_processing.tasks."
|
||||
"get_user_file_project_sync_queue_depth"
|
||||
)
|
||||
@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
|
||||
def test_check_for_user_file_project_sync_applies_queue_backpressure(
|
||||
mock_get_redis_client: MagicMock,
|
||||
mock_get_queue_depth: MagicMock,
|
||||
) -> None:
|
||||
redis_client, lock = _build_redis_mock_with_lock()
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
mock_get_queue_depth.return_value = USER_FILE_PROJECT_SYNC_MAX_QUEUE_DEPTH + 1
|
||||
|
||||
task_app = MagicMock()
|
||||
with patch.object(check_for_user_file_project_sync, "app", task_app):
|
||||
check_for_user_file_project_sync.run(tenant_id="test-tenant")
|
||||
|
||||
task_app.send_task.assert_not_called()
|
||||
lock.release.assert_called_once()
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.background.celery.tasks.user_file_processing.tasks."
|
||||
"enqueue_user_file_project_sync_task"
|
||||
)
|
||||
@patch(
|
||||
"onyx.background.celery.tasks.user_file_processing.tasks."
|
||||
"get_user_file_project_sync_queue_depth"
|
||||
)
|
||||
@patch(
|
||||
"onyx.background.celery.tasks.user_file_processing.tasks."
|
||||
"get_session_with_current_tenant"
|
||||
)
|
||||
@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
|
||||
def test_check_for_user_file_project_sync_skips_duplicates(
|
||||
mock_get_redis_client: MagicMock,
|
||||
mock_get_session: MagicMock,
|
||||
mock_get_queue_depth: MagicMock,
|
||||
mock_enqueue: MagicMock,
|
||||
) -> None:
|
||||
redis_client, lock = _build_redis_mock_with_lock()
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
mock_get_queue_depth.return_value = 0
|
||||
|
||||
user_file_id_one = uuid4()
|
||||
user_file_id_two = uuid4()
|
||||
|
||||
session = MagicMock()
|
||||
session.execute.return_value.scalars.return_value.all.return_value = [
|
||||
user_file_id_one,
|
||||
user_file_id_two,
|
||||
]
|
||||
mock_get_session.return_value.__enter__.return_value = session
|
||||
mock_enqueue.side_effect = [True, False]
|
||||
|
||||
task_app = MagicMock()
|
||||
with patch.object(check_for_user_file_project_sync, "app", task_app):
|
||||
check_for_user_file_project_sync.run(tenant_id="test-tenant")
|
||||
|
||||
assert mock_enqueue.call_count == 2
|
||||
lock.release.assert_called_once()
|
||||
|
||||
|
||||
def test_enqueue_user_file_project_sync_task_sets_guard_and_expiry() -> None:
|
||||
redis_client = MagicMock()
|
||||
redis_client.set.return_value = True
|
||||
celery_app = MagicMock()
|
||||
user_file_id = str(uuid4())
|
||||
|
||||
enqueued = enqueue_user_file_project_sync_task(
|
||||
celery_app=celery_app,
|
||||
redis_client=redis_client,
|
||||
user_file_id=user_file_id,
|
||||
tenant_id="test-tenant",
|
||||
priority=OnyxCeleryPriority.HIGHEST,
|
||||
)
|
||||
|
||||
assert enqueued is True
|
||||
redis_client.set.assert_called_once_with(
|
||||
_user_file_project_sync_queued_key(user_file_id),
|
||||
1,
|
||||
nx=True,
|
||||
ex=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
|
||||
)
|
||||
celery_app.send_task.assert_called_once_with(
|
||||
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
|
||||
kwargs={"user_file_id": user_file_id, "tenant_id": "test-tenant"},
|
||||
queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
|
||||
priority=OnyxCeleryPriority.HIGHEST,
|
||||
expires=CELERY_USER_FILE_PROJECT_SYNC_TASK_EXPIRES,
|
||||
)
|
||||
|
||||
|
||||
def test_enqueue_user_file_project_sync_task_rolls_back_guard_on_publish_failure() -> (
|
||||
None
|
||||
):
|
||||
redis_client = MagicMock()
|
||||
redis_client.set.return_value = True
|
||||
celery_app = MagicMock()
|
||||
celery_app.send_task.side_effect = RuntimeError("publish failed")
|
||||
|
||||
user_file_id = str(uuid4())
|
||||
with pytest.raises(RuntimeError):
|
||||
enqueue_user_file_project_sync_task(
|
||||
celery_app=celery_app,
|
||||
redis_client=redis_client,
|
||||
user_file_id=user_file_id,
|
||||
tenant_id="test-tenant",
|
||||
)
|
||||
|
||||
redis_client.delete.assert_called_once_with(
|
||||
_user_file_project_sync_queued_key(user_file_id)
|
||||
)
|
||||
|
||||
|
||||
@patch("onyx.background.celery.tasks.user_file_processing.tasks.get_redis_client")
|
||||
def test_process_single_user_file_project_sync_clears_queued_guard_on_pickup(
|
||||
mock_get_redis_client: MagicMock,
|
||||
) -> None:
|
||||
redis_client = MagicMock()
|
||||
lock = MagicMock()
|
||||
lock.acquire.return_value = False
|
||||
redis_client.lock.return_value = lock
|
||||
mock_get_redis_client.return_value = redis_client
|
||||
|
||||
user_file_id = str(uuid4())
|
||||
process_single_user_file_project_sync.run(
|
||||
user_file_id=user_file_id,
|
||||
tenant_id="test-tenant",
|
||||
)
|
||||
|
||||
redis_client.delete.assert_called_once_with(
|
||||
_user_file_project_sync_queued_key(user_file_id)
|
||||
)
|
||||
@@ -0,0 +1,65 @@
|
||||
import json
|
||||
|
||||
import httplib2 # type: ignore[import-untyped]
|
||||
from googleapiclient.errors import HttpError # type: ignore[import-untyped]
|
||||
|
||||
from onyx.connectors.google_utils.google_utils import _is_rate_limit_error
|
||||
|
||||
|
||||
def _make_http_error(
|
||||
status: int,
|
||||
reason: str = "unknown",
|
||||
error_reason: str = "",
|
||||
) -> HttpError:
|
||||
resp = httplib2.Response({"status": status})
|
||||
if error_reason:
|
||||
body = json.dumps(
|
||||
{
|
||||
"error": {
|
||||
"message": reason,
|
||||
"errors": [{"reason": error_reason, "message": reason}],
|
||||
}
|
||||
}
|
||||
).encode()
|
||||
else:
|
||||
body = json.dumps({"error": {"message": reason}}).encode()
|
||||
return HttpError(resp, body)
|
||||
|
||||
|
||||
def test_429_is_rate_limit() -> None:
|
||||
assert _is_rate_limit_error(_make_http_error(429))
|
||||
|
||||
|
||||
def test_403_user_rate_limit_exceeded() -> None:
|
||||
err = _make_http_error(
|
||||
403,
|
||||
reason="User rate limit exceeded.",
|
||||
error_reason="userRateLimitExceeded",
|
||||
)
|
||||
assert _is_rate_limit_error(err)
|
||||
|
||||
|
||||
def test_403_rate_limit_exceeded() -> None:
|
||||
err = _make_http_error(
|
||||
403,
|
||||
reason="Rate limit exceeded.",
|
||||
error_reason="rateLimitExceeded",
|
||||
)
|
||||
assert _is_rate_limit_error(err)
|
||||
|
||||
|
||||
def test_403_permission_denied_is_not_rate_limit() -> None:
|
||||
err = _make_http_error(
|
||||
403,
|
||||
reason="The caller does not have permission",
|
||||
error_reason="forbidden",
|
||||
)
|
||||
assert not _is_rate_limit_error(err)
|
||||
|
||||
|
||||
def test_404_is_not_rate_limit() -> None:
|
||||
assert not _is_rate_limit_error(_make_http_error(404))
|
||||
|
||||
|
||||
def test_500_is_not_rate_limit() -> None:
|
||||
assert not _is_rate_limit_error(_make_http_error(500))
|
||||
@@ -0,0 +1,34 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.slab.connector import SlabConnector
|
||||
|
||||
|
||||
def _build_connector(base_url: str = "https://myteam.slab.com") -> SlabConnector:
|
||||
connector = SlabConnector(base_url=base_url)
|
||||
connector.load_credentials({"slab_bot_token": "fake-token"})
|
||||
return connector
|
||||
|
||||
|
||||
def test_validate_rejects_missing_scheme() -> None:
|
||||
connector = _build_connector(base_url="myteam.slab.com")
|
||||
with pytest.raises(ConnectorValidationError, match="https://"):
|
||||
connector.validate_connector_settings()
|
||||
|
||||
|
||||
@patch("onyx.connectors.slab.connector.get_all_post_ids", return_value=["id1"])
|
||||
def test_validate_success(mock_get_posts: object) -> None: # noqa: ARG001
|
||||
connector = _build_connector()
|
||||
connector.validate_connector_settings()
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.connectors.slab.connector.get_all_post_ids",
|
||||
side_effect=Exception("401 Unauthorized"),
|
||||
)
|
||||
def test_validate_bad_token_raises(mock_get_posts: object) -> None: # noqa: ARG001
|
||||
connector = _build_connector()
|
||||
with pytest.raises(ConnectorValidationError, match="Failed to fetch posts"):
|
||||
connector.validate_connector_settings()
|
||||
@@ -0,0 +1,95 @@
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import DocumentBase
|
||||
from onyx.connectors.models import TextSection
|
||||
|
||||
|
||||
def _minimal_doc_kwargs(metadata: dict) -> dict:
|
||||
return {
|
||||
"id": "test-doc",
|
||||
"sections": [TextSection(text="hello", link="http://example.com")],
|
||||
"source": DocumentSource.NOT_APPLICABLE,
|
||||
"semantic_identifier": "Test Doc",
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
|
||||
def test_int_values_coerced_to_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"count": 42}))
|
||||
assert doc.metadata == {"count": "42"}
|
||||
|
||||
|
||||
def test_float_values_coerced_to_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"score": 3.14}))
|
||||
assert doc.metadata == {"score": "3.14"}
|
||||
|
||||
|
||||
def test_bool_values_coerced_to_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"active": True}))
|
||||
assert doc.metadata == {"active": "True"}
|
||||
|
||||
|
||||
def test_list_of_ints_coerced_to_list_of_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"ids": [1, 2, 3]}))
|
||||
assert doc.metadata == {"ids": ["1", "2", "3"]}
|
||||
|
||||
|
||||
def test_list_of_mixed_types_coerced_to_list_of_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"tags": ["a", 1, True, 2.5]}))
|
||||
assert doc.metadata == {"tags": ["a", "1", "True", "2.5"]}
|
||||
|
||||
|
||||
def test_list_of_dicts_coerced_to_list_of_str() -> None:
|
||||
raw = {"nested": [{"key": "val"}, {"key2": "val2"}]}
|
||||
doc = Document(**_minimal_doc_kwargs(raw))
|
||||
assert doc.metadata == {"nested": ["{'key': 'val'}", "{'key2': 'val2'}"]}
|
||||
|
||||
|
||||
def test_dict_value_coerced_to_str() -> None:
|
||||
raw = {"info": {"inner_key": "inner_val"}}
|
||||
doc = Document(**_minimal_doc_kwargs(raw))
|
||||
assert doc.metadata == {"info": "{'inner_key': 'inner_val'}"}
|
||||
|
||||
|
||||
def test_none_value_coerced_to_str() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"empty": None}))
|
||||
assert doc.metadata == {"empty": "None"}
|
||||
|
||||
|
||||
def test_already_valid_str_values_unchanged() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"key": "value"}))
|
||||
assert doc.metadata == {"key": "value"}
|
||||
|
||||
|
||||
def test_already_valid_list_of_str_unchanged() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({"tags": ["a", "b", "c"]}))
|
||||
assert doc.metadata == {"tags": ["a", "b", "c"]}
|
||||
|
||||
|
||||
def test_empty_metadata_unchanged() -> None:
|
||||
doc = Document(**_minimal_doc_kwargs({}))
|
||||
assert doc.metadata == {}
|
||||
|
||||
|
||||
def test_mixed_metadata_values() -> None:
|
||||
raw = {
|
||||
"str_val": "hello",
|
||||
"int_val": 99,
|
||||
"list_val": [1, "two", 3.0],
|
||||
"dict_val": {"nested": True},
|
||||
}
|
||||
doc = Document(**_minimal_doc_kwargs(raw))
|
||||
assert doc.metadata == {
|
||||
"str_val": "hello",
|
||||
"int_val": "99",
|
||||
"list_val": ["1", "two", "3.0"],
|
||||
"dict_val": "{'nested': True}",
|
||||
}
|
||||
|
||||
|
||||
def test_coercion_works_on_base_class() -> None:
|
||||
kwargs = _minimal_doc_kwargs({"count": 42})
|
||||
kwargs.pop("source")
|
||||
kwargs.pop("id")
|
||||
doc = DocumentBase(**kwargs)
|
||||
assert doc.metadata == {"count": "42"}
|
||||
@@ -0,0 +1,52 @@
|
||||
import pytest
|
||||
from office365.graph_client import AzureEnvironment # type: ignore[import-untyped]
|
||||
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.microsoft_graph_env import resolve_microsoft_environment
|
||||
|
||||
|
||||
def test_resolve_global_defaults() -> None:
|
||||
env = resolve_microsoft_environment(
|
||||
"https://graph.microsoft.com", "https://login.microsoftonline.com"
|
||||
)
|
||||
assert env.environment == AzureEnvironment.Global
|
||||
assert env.sharepoint_domain_suffix == "sharepoint.com"
|
||||
|
||||
|
||||
def test_resolve_gcc_high() -> None:
|
||||
env = resolve_microsoft_environment(
|
||||
"https://graph.microsoft.us", "https://login.microsoftonline.us"
|
||||
)
|
||||
assert env.environment == AzureEnvironment.USGovernmentHigh
|
||||
assert env.graph_host == "https://graph.microsoft.us"
|
||||
assert env.authority_host == "https://login.microsoftonline.us"
|
||||
assert env.sharepoint_domain_suffix == "sharepoint.us"
|
||||
|
||||
|
||||
def test_resolve_dod() -> None:
|
||||
env = resolve_microsoft_environment(
|
||||
"https://dod-graph.microsoft.us", "https://login.microsoftonline.us"
|
||||
)
|
||||
assert env.environment == AzureEnvironment.USGovernmentDoD
|
||||
assert env.sharepoint_domain_suffix == "sharepoint.us"
|
||||
|
||||
|
||||
def test_trailing_slashes_are_stripped() -> None:
|
||||
env = resolve_microsoft_environment(
|
||||
"https://graph.microsoft.us/", "https://login.microsoftonline.us/"
|
||||
)
|
||||
assert env.environment == AzureEnvironment.USGovernmentHigh
|
||||
|
||||
|
||||
def test_mismatched_authority_raises() -> None:
|
||||
with pytest.raises(ConnectorValidationError, match="inconsistent"):
|
||||
resolve_microsoft_environment(
|
||||
"https://graph.microsoft.us", "https://login.microsoftonline.com"
|
||||
)
|
||||
|
||||
|
||||
def test_unknown_graph_host_raises() -> None:
|
||||
with pytest.raises(ConnectorValidationError, match="Unsupported"):
|
||||
resolve_microsoft_environment(
|
||||
"https://graph.example.com", "https://login.example.com"
|
||||
)
|
||||
@@ -1,10 +1,12 @@
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.image_gen.exceptions import ImageProviderCredentialsError
|
||||
from onyx.image_gen.factory import get_image_generation_provider
|
||||
from onyx.image_gen.interfaces import ImageGenerationProviderCredentials
|
||||
from onyx.image_gen.interfaces import ReferenceImage
|
||||
from onyx.image_gen.providers.azure_img_gen import AzureImageGenerationProvider
|
||||
from onyx.image_gen.providers.openai_img_gen import OpenAIImageGenerationProvider
|
||||
from onyx.image_gen.providers.vertex_img_gen import VertexImageGenerationProvider
|
||||
@@ -45,6 +47,8 @@ def test_build_openai_provider_from_api_key_and_base() -> None:
|
||||
assert isinstance(image_gen_provider, OpenAIImageGenerationProvider)
|
||||
assert image_gen_provider._api_key == "test"
|
||||
assert image_gen_provider._api_base == "test"
|
||||
assert image_gen_provider.supports_reference_images is True
|
||||
assert image_gen_provider.max_reference_images == 16
|
||||
|
||||
|
||||
def test_build_openai_provider_fails_no_api_key() -> None:
|
||||
@@ -73,6 +77,8 @@ def test_build_azure_provider_from_api_key_and_base_and_version() -> None:
|
||||
assert image_gen_provider._api_key == "test"
|
||||
assert image_gen_provider._api_base == "test"
|
||||
assert image_gen_provider._api_version == "test"
|
||||
assert image_gen_provider.supports_reference_images is True
|
||||
assert image_gen_provider.max_reference_images == 16
|
||||
|
||||
|
||||
def test_build_azure_provider_fails_missing_credential() -> None:
|
||||
@@ -133,3 +139,195 @@ def test_build_vertex_provider_with_missing_project_id() -> None:
|
||||
|
||||
with pytest.raises(ImageProviderCredentialsError):
|
||||
get_image_generation_provider("vertex_ai", credentials)
|
||||
|
||||
|
||||
def test_openai_provider_uses_image_generation_without_reference_images() -> None:
|
||||
provider = OpenAIImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="test-base",
|
||||
)
|
||||
expected_response = object()
|
||||
|
||||
with (
|
||||
patch("litellm.image_generation", return_value=expected_response) as mock_gen,
|
||||
patch("litellm.image_edit") as mock_edit,
|
||||
):
|
||||
response = provider.generate_image(
|
||||
prompt="draw a mountain",
|
||||
model="gpt-image-1",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
quality="high",
|
||||
)
|
||||
|
||||
assert response is expected_response
|
||||
mock_gen.assert_called_once()
|
||||
mock_edit.assert_not_called()
|
||||
|
||||
|
||||
def test_openai_provider_uses_image_edit_with_reference_images() -> None:
|
||||
provider = OpenAIImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="test-base",
|
||||
)
|
||||
reference_images = [
|
||||
ReferenceImage(data=b"image-1-bytes", mime_type="image/png"),
|
||||
ReferenceImage(data=b"image-2-bytes", mime_type="image/jpeg"),
|
||||
]
|
||||
expected_response = object()
|
||||
|
||||
with (
|
||||
patch("litellm.image_generation") as mock_gen,
|
||||
patch("litellm.image_edit", return_value=expected_response) as mock_edit,
|
||||
):
|
||||
response = provider.generate_image(
|
||||
prompt="make this look watercolor",
|
||||
model="gpt-image-1",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
quality="high",
|
||||
reference_images=reference_images,
|
||||
)
|
||||
|
||||
assert response is expected_response
|
||||
mock_gen.assert_not_called()
|
||||
mock_edit.assert_called_once()
|
||||
assert mock_edit.call_args.kwargs["image"] == [
|
||||
b"image-1-bytes",
|
||||
b"image-2-bytes",
|
||||
]
|
||||
|
||||
|
||||
def test_openai_provider_rejects_reference_images_for_unsupported_model() -> None:
|
||||
provider = OpenAIImageGenerationProvider(api_key="test-key")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
provider.generate_image(
|
||||
prompt="edit this image",
|
||||
model="dall-e-3",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
reference_images=[ReferenceImage(data=b"image-1", mime_type="image/png")],
|
||||
)
|
||||
|
||||
|
||||
def test_openai_provider_rejects_multiple_reference_images_for_dalle3() -> None:
|
||||
provider = OpenAIImageGenerationProvider(api_key="test-key")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="does not support image edits with reference images",
|
||||
):
|
||||
provider.generate_image(
|
||||
prompt="edit this image",
|
||||
model="dall-e-3",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
reference_images=[
|
||||
ReferenceImage(data=b"image-1", mime_type="image/png"),
|
||||
ReferenceImage(data=b"image-2", mime_type="image/png"),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_azure_provider_uses_image_generation_without_reference_images() -> None:
|
||||
provider = AzureImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="https://azure.example.com",
|
||||
api_version="2024-05-01-preview",
|
||||
deployment_name="img-deployment",
|
||||
)
|
||||
expected_response = object()
|
||||
|
||||
with (
|
||||
patch("litellm.image_generation", return_value=expected_response) as mock_gen,
|
||||
patch("litellm.image_edit") as mock_edit,
|
||||
):
|
||||
response = provider.generate_image(
|
||||
prompt="draw a skyline",
|
||||
model="gpt-image-1",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
quality="high",
|
||||
)
|
||||
|
||||
assert response is expected_response
|
||||
mock_gen.assert_called_once()
|
||||
mock_edit.assert_not_called()
|
||||
assert mock_gen.call_args.kwargs["model"] == "azure/img-deployment"
|
||||
|
||||
|
||||
def test_azure_provider_uses_image_edit_with_reference_images() -> None:
|
||||
provider = AzureImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="https://azure.example.com",
|
||||
api_version="2024-05-01-preview",
|
||||
deployment_name="img-deployment",
|
||||
)
|
||||
reference_images = [
|
||||
ReferenceImage(data=b"image-1-bytes", mime_type="image/png"),
|
||||
ReferenceImage(data=b"image-2-bytes", mime_type="image/jpeg"),
|
||||
]
|
||||
expected_response = object()
|
||||
|
||||
with (
|
||||
patch("litellm.image_generation") as mock_gen,
|
||||
patch("litellm.image_edit", return_value=expected_response) as mock_edit,
|
||||
):
|
||||
response = provider.generate_image(
|
||||
prompt="make this noir style",
|
||||
model="gpt-image-1",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
quality="high",
|
||||
reference_images=reference_images,
|
||||
)
|
||||
|
||||
assert response is expected_response
|
||||
mock_gen.assert_not_called()
|
||||
mock_edit.assert_called_once()
|
||||
assert mock_edit.call_args.kwargs["model"] == "azure/img-deployment"
|
||||
assert mock_edit.call_args.kwargs["image"] == [
|
||||
b"image-1-bytes",
|
||||
b"image-2-bytes",
|
||||
]
|
||||
|
||||
|
||||
def test_azure_provider_rejects_reference_images_for_unsupported_model() -> None:
|
||||
provider = AzureImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="https://azure.example.com",
|
||||
api_version="2024-05-01-preview",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
provider.generate_image(
|
||||
prompt="edit this image",
|
||||
model="dall-e-3",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
reference_images=[ReferenceImage(data=b"image-1", mime_type="image/png")],
|
||||
)
|
||||
|
||||
|
||||
def test_azure_provider_rejects_multiple_reference_images_for_dalle3() -> None:
|
||||
provider = AzureImageGenerationProvider(
|
||||
api_key="test-key",
|
||||
api_base="https://azure.example.com",
|
||||
api_version="2024-05-01-preview",
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="does not support image edits with reference images",
|
||||
):
|
||||
provider.generate_image(
|
||||
prompt="edit this image",
|
||||
model="dall-e-3",
|
||||
size="1024x1024",
|
||||
n=1,
|
||||
reference_images=[
|
||||
ReferenceImage(data=b"image-1", mime_type="image/png"),
|
||||
ReferenceImage(data=b"image-2", mime_type="image/png"),
|
||||
],
|
||||
)
|
||||
|
||||
159
backend/tests/unit/onyx/indexing/test_postgres_sanitization.py
Normal file
159
backend/tests/unit/onyx/indexing/test_postgres_sanitization.py
Normal file
@@ -0,0 +1,159 @@
|
||||
from pytest import MonkeyPatch
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.connectors.models import BasicExpertInfo
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import DocumentSource
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import IndexAttemptMetadata
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.db.enums import HierarchyNodeType
|
||||
from onyx.indexing import indexing_pipeline
|
||||
from onyx.indexing.postgres_sanitization import sanitize_document_for_postgres
|
||||
from onyx.indexing.postgres_sanitization import sanitize_hierarchy_node_for_postgres
|
||||
|
||||
|
||||
def test_sanitize_document_for_postgres_removes_nul_bytes() -> None:
|
||||
document = Document(
|
||||
id="doc\x00-id",
|
||||
source=DocumentSource.FILE,
|
||||
semantic_identifier="sem\x00-id",
|
||||
title="ti\x00tle",
|
||||
parent_hierarchy_raw_node_id="parent\x00-id",
|
||||
sections=[TextSection(link="lin\x00k", text="te\x00xt")],
|
||||
metadata={"ke\x00y": "va\x00lue", "list\x00key": ["a\x00", "b"]},
|
||||
doc_metadata={
|
||||
"j\x00son": {
|
||||
"in\x00ner": "va\x00l",
|
||||
"arr": ["x\x00", {"dee\x00p": "y\x00"}],
|
||||
}
|
||||
},
|
||||
primary_owners=[BasicExpertInfo(display_name="Ali\x00ce", email="a\x00@x.com")],
|
||||
secondary_owners=[BasicExpertInfo(first_name="Bo\x00b", last_name="Sm\x00ith")],
|
||||
external_access=ExternalAccess(
|
||||
external_user_emails={"user\x00@example.com"},
|
||||
external_user_group_ids={"gro\x00up-1"},
|
||||
is_public=False,
|
||||
),
|
||||
)
|
||||
|
||||
sanitized = sanitize_document_for_postgres(document)
|
||||
|
||||
assert sanitized.id == "doc-id"
|
||||
assert sanitized.semantic_identifier == "sem-id"
|
||||
assert sanitized.title == "title"
|
||||
assert sanitized.parent_hierarchy_raw_node_id == "parent-id"
|
||||
assert sanitized.sections[0].link == "link"
|
||||
assert sanitized.sections[0].text == "text"
|
||||
assert sanitized.metadata == {"key": "value", "listkey": ["a", "b"]}
|
||||
assert sanitized.doc_metadata == {
|
||||
"json": {"inner": "val", "arr": ["x", {"deep": "y"}]}
|
||||
}
|
||||
assert sanitized.primary_owners is not None
|
||||
assert sanitized.primary_owners[0].display_name == "Alice"
|
||||
assert sanitized.primary_owners[0].email == "a@x.com"
|
||||
assert sanitized.secondary_owners is not None
|
||||
assert sanitized.secondary_owners[0].first_name == "Bob"
|
||||
assert sanitized.secondary_owners[0].last_name == "Smith"
|
||||
assert sanitized.external_access is not None
|
||||
assert sanitized.external_access.external_user_emails == {"user@example.com"}
|
||||
assert sanitized.external_access.external_user_group_ids == {"group-1"}
|
||||
|
||||
# Ensure original document is not mutated
|
||||
assert document.id == "doc\x00-id"
|
||||
assert document.metadata == {"ke\x00y": "va\x00lue", "list\x00key": ["a\x00", "b"]}
|
||||
|
||||
|
||||
def test_sanitize_hierarchy_node_for_postgres_removes_nul_bytes() -> None:
|
||||
node = HierarchyNode(
|
||||
raw_node_id="raw\x00-id",
|
||||
raw_parent_id="paren\x00t-id",
|
||||
display_name="fol\x00der",
|
||||
link="https://exa\x00mple.com",
|
||||
node_type=HierarchyNodeType.FOLDER,
|
||||
external_access=ExternalAccess(
|
||||
external_user_emails={"a\x00@example.com"},
|
||||
external_user_group_ids={"g\x00-1"},
|
||||
is_public=True,
|
||||
),
|
||||
)
|
||||
|
||||
sanitized = sanitize_hierarchy_node_for_postgres(node)
|
||||
|
||||
assert sanitized.raw_node_id == "raw-id"
|
||||
assert sanitized.raw_parent_id == "parent-id"
|
||||
assert sanitized.display_name == "folder"
|
||||
assert sanitized.link == "https://example.com"
|
||||
assert sanitized.external_access is not None
|
||||
assert sanitized.external_access.external_user_emails == {"a@example.com"}
|
||||
assert sanitized.external_access.external_user_group_ids == {"g-1"}
|
||||
|
||||
|
||||
def test_index_doc_batch_prepare_sanitizes_before_db_ops(
|
||||
monkeypatch: MonkeyPatch,
|
||||
) -> None:
|
||||
document = Document(
|
||||
id="doc\x00id",
|
||||
source=DocumentSource.FILE,
|
||||
semantic_identifier="sem\x00id",
|
||||
sections=[TextSection(text="content", link="li\x00nk")],
|
||||
metadata={"ke\x00y": "va\x00lue"},
|
||||
)
|
||||
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
def _get_documents_by_ids(db_session: object, document_ids: list[str]) -> list:
|
||||
_ = db_session, document_ids
|
||||
return []
|
||||
|
||||
monkeypatch.setattr(
|
||||
indexing_pipeline, "get_documents_by_ids", _get_documents_by_ids
|
||||
)
|
||||
|
||||
def _capture_upsert_documents_in_db(**kwargs: object) -> None:
|
||||
captured["upsert_documents"] = kwargs["documents"]
|
||||
|
||||
monkeypatch.setattr(
|
||||
indexing_pipeline, "_upsert_documents_in_db", _capture_upsert_documents_in_db
|
||||
)
|
||||
|
||||
def _capture_doc_cc_pair(*args: object) -> None:
|
||||
captured["cc_pair_doc_ids"] = args[3]
|
||||
|
||||
monkeypatch.setattr(
|
||||
indexing_pipeline,
|
||||
"upsert_document_by_connector_credential_pair",
|
||||
_capture_doc_cc_pair,
|
||||
)
|
||||
|
||||
def _noop_link_hierarchy_nodes_to_documents(
|
||||
db_session: object,
|
||||
document_ids: list[str],
|
||||
source: DocumentSource,
|
||||
commit: bool,
|
||||
) -> int:
|
||||
_ = db_session, document_ids, source, commit
|
||||
return 0
|
||||
|
||||
monkeypatch.setattr(
|
||||
indexing_pipeline,
|
||||
"link_hierarchy_nodes_to_documents",
|
||||
_noop_link_hierarchy_nodes_to_documents,
|
||||
)
|
||||
|
||||
context = indexing_pipeline.index_doc_batch_prepare(
|
||||
documents=[document],
|
||||
index_attempt_metadata=IndexAttemptMetadata(connector_id=1, credential_id=2),
|
||||
db_session=object(), # type: ignore[arg-type]
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
|
||||
assert context is not None
|
||||
assert context.updatable_docs[0].id == "docid"
|
||||
assert context.updatable_docs[0].semantic_identifier == "semid"
|
||||
assert context.updatable_docs[0].metadata == {"key": "value"}
|
||||
assert captured["cc_pair_doc_ids"] == ["docid"]
|
||||
|
||||
upsert_documents = captured["upsert_documents"]
|
||||
assert isinstance(upsert_documents, list)
|
||||
assert upsert_documents[0].id == "docid"
|
||||
52
backend/tests/unit/onyx/onyxbot/test_slack_formatting.py
Normal file
52
backend/tests/unit/onyx/onyxbot/test_slack_formatting.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from onyx.onyxbot.slack.formatting import _normalize_citation_link_destinations
|
||||
from onyx.onyxbot.slack.formatting import format_slack_message
|
||||
from onyx.onyxbot.slack.utils import remove_slack_text_interactions
|
||||
from onyx.utils.text_processing import decode_escapes
|
||||
|
||||
|
||||
def test_normalize_citation_link_wraps_url_with_parentheses() -> None:
|
||||
message = (
|
||||
"See [[1]](https://example.com/Access%20ID%20Card(s)%20Guide.pdf) for details."
|
||||
)
|
||||
|
||||
normalized = _normalize_citation_link_destinations(message)
|
||||
|
||||
assert (
|
||||
"See [[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>) for details."
|
||||
== normalized
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_citation_link_keeps_existing_angle_brackets() -> None:
|
||||
message = "[[1]](<https://example.com/Access%20ID%20Card(s)%20Guide.pdf>)"
|
||||
|
||||
normalized = _normalize_citation_link_destinations(message)
|
||||
|
||||
assert message == normalized
|
||||
|
||||
|
||||
def test_normalize_citation_link_handles_multiple_links() -> None:
|
||||
message = (
|
||||
"[[1]](https://example.com/(USA)%20Guide.pdf) "
|
||||
"[[2]](https://example.com/Plan(s)%20Overview.pdf)"
|
||||
)
|
||||
|
||||
normalized = _normalize_citation_link_destinations(message)
|
||||
|
||||
assert "[[1]](<https://example.com/(USA)%20Guide.pdf>)" in normalized
|
||||
assert "[[2]](<https://example.com/Plan(s)%20Overview.pdf>)" in normalized
|
||||
|
||||
|
||||
def test_format_slack_message_keeps_parenthesized_citation_links_intact() -> None:
|
||||
message = (
|
||||
"Download [[1]](https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf)"
|
||||
)
|
||||
|
||||
formatted = format_slack_message(message)
|
||||
rendered = decode_escapes(remove_slack_text_interactions(formatted))
|
||||
|
||||
assert (
|
||||
"<https://example.com/(USA)%20Access%20ID%20Card(s)%20Guide.pdf|[1]>"
|
||||
in rendered
|
||||
)
|
||||
assert "|[1]>%20Access%20ID%20Card" not in rendered
|
||||
@@ -1,10 +1,12 @@
|
||||
"""Test bulk invite limit for free trial tenants."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from onyx.server.manage.models import EmailInviteStatus
|
||||
from onyx.server.manage.users import bulk_invite_users
|
||||
|
||||
|
||||
@@ -33,6 +35,7 @@ def test_trial_tenant_cannot_exceed_invite_limit(*_mocks: None) -> None:
|
||||
@patch("onyx.server.manage.users.get_invited_users", return_value=[])
|
||||
@patch("onyx.server.manage.users.get_all_users", return_value=[])
|
||||
@patch("onyx.server.manage.users.write_invited_users", return_value=3)
|
||||
@patch("onyx.server.manage.users.enforce_seat_limit")
|
||||
@patch("onyx.server.manage.users.NUM_FREE_TRIAL_USER_INVITES", 5)
|
||||
@patch(
|
||||
"onyx.server.manage.users.fetch_ee_implementation_or_noop",
|
||||
@@ -44,4 +47,69 @@ def test_trial_tenant_can_invite_within_limit(*_mocks: None) -> None:
|
||||
|
||||
result = bulk_invite_users(emails=emails)
|
||||
|
||||
assert result == 3
|
||||
assert result.invited_count == 3
|
||||
assert result.email_invite_status == EmailInviteStatus.DISABLED
|
||||
|
||||
|
||||
# --- email_invite_status tests ---
|
||||
|
||||
_COMMON_PATCHES = [
|
||||
patch("onyx.server.manage.users.MULTI_TENANT", False),
|
||||
patch("onyx.server.manage.users.get_current_tenant_id", return_value="test_tenant"),
|
||||
patch("onyx.server.manage.users.get_invited_users", return_value=[]),
|
||||
patch("onyx.server.manage.users.get_all_users", return_value=[]),
|
||||
patch("onyx.server.manage.users.write_invited_users", return_value=1),
|
||||
patch("onyx.server.manage.users.enforce_seat_limit"),
|
||||
]
|
||||
|
||||
|
||||
def _with_common_patches(fn: object) -> object:
|
||||
for p in reversed(_COMMON_PATCHES):
|
||||
fn = p(fn) # type: ignore
|
||||
return fn
|
||||
|
||||
|
||||
@_with_common_patches
|
||||
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", False)
|
||||
def test_email_invite_status_disabled(*_mocks: None) -> None:
|
||||
"""When email invites are disabled, status is disabled."""
|
||||
result = bulk_invite_users(emails=["user@example.com"])
|
||||
|
||||
assert result.email_invite_status == EmailInviteStatus.DISABLED
|
||||
|
||||
|
||||
@_with_common_patches
|
||||
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
|
||||
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", False)
|
||||
def test_email_invite_status_not_configured(*_mocks: None) -> None:
|
||||
"""When email invites are enabled but no server is configured, status is not_configured."""
|
||||
result = bulk_invite_users(emails=["user@example.com"])
|
||||
|
||||
assert result.email_invite_status == EmailInviteStatus.NOT_CONFIGURED
|
||||
|
||||
|
||||
@_with_common_patches
|
||||
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
|
||||
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", True)
|
||||
@patch("onyx.server.manage.users.send_user_email_invite")
|
||||
def test_email_invite_status_sent(mock_send: MagicMock, *_mocks: None) -> None:
|
||||
"""When email invites are enabled and configured, status is sent."""
|
||||
result = bulk_invite_users(emails=["user@example.com"])
|
||||
|
||||
mock_send.assert_called_once()
|
||||
assert result.email_invite_status == EmailInviteStatus.SENT
|
||||
|
||||
|
||||
@_with_common_patches
|
||||
@patch("onyx.server.manage.users.ENABLE_EMAIL_INVITES", True)
|
||||
@patch("onyx.server.manage.users.EMAIL_CONFIGURED", True)
|
||||
@patch(
|
||||
"onyx.server.manage.users.send_user_email_invite",
|
||||
side_effect=Exception("SMTP auth failed"),
|
||||
)
|
||||
def test_email_invite_status_send_failed(*_mocks: None) -> None:
|
||||
"""When email sending throws, status is send_failed and invite is still saved."""
|
||||
result = bulk_invite_users(emails=["user@example.com"])
|
||||
|
||||
assert result.email_invite_status == EmailInviteStatus.SEND_FAILED
|
||||
assert result.invited_count == 1
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Tests for PythonTool availability based on server_enabled flag.
|
||||
|
||||
Verifies that PythonTool reports itself as unavailable when either:
|
||||
- CODE_INTERPRETER_BASE_URL is not set, or
|
||||
- CodeInterpreterServer.server_enabled is False in the database.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Unavailable when CODE_INTERPRETER_BASE_URL is not set
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
None,
|
||||
)
|
||||
def test_python_tool_unavailable_without_base_url() -> None:
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
|
||||
db_session = MagicMock(spec=Session)
|
||||
assert PythonTool.is_available(db_session) is False
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
"",
|
||||
)
|
||||
def test_python_tool_unavailable_with_empty_base_url() -> None:
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
|
||||
db_session = MagicMock(spec=Session)
|
||||
assert PythonTool.is_available(db_session) is False
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Unavailable when server_enabled is False
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
"http://localhost:8000",
|
||||
)
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.fetch_code_interpreter_server",
|
||||
)
|
||||
def test_python_tool_unavailable_when_server_disabled(
|
||||
mock_fetch: MagicMock,
|
||||
) -> None:
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
|
||||
mock_server = MagicMock()
|
||||
mock_server.server_enabled = False
|
||||
mock_fetch.return_value = mock_server
|
||||
|
||||
db_session = MagicMock(spec=Session)
|
||||
assert PythonTool.is_available(db_session) is False
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Available when both conditions are met
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
|
||||
"http://localhost:8000",
|
||||
)
|
||||
@patch(
|
||||
"onyx.tools.tool_implementations.python.python_tool.fetch_code_interpreter_server",
|
||||
)
|
||||
def test_python_tool_available_when_server_enabled(
|
||||
mock_fetch: MagicMock,
|
||||
) -> None:
|
||||
from onyx.tools.tool_implementations.python.python_tool import PythonTool
|
||||
|
||||
mock_server = MagicMock()
|
||||
mock_server.server_enabled = True
|
||||
mock_fetch.return_value = mock_server
|
||||
|
||||
db_session = MagicMock(spec=Session)
|
||||
assert PythonTool.is_available(db_session) is True
|
||||
@@ -0,0 +1,173 @@
|
||||
"""Unit tests for CodeInterpreterClient streaming-to-batch fallback.
|
||||
|
||||
When the streaming endpoint (/v1/execute/stream) returns 404 — e.g. because the
|
||||
code-interpreter service is an older version that doesn't support streaming — the
|
||||
client should transparently fall back to the batch endpoint (/v1/execute) and
|
||||
convert the batch response into the same stream-event interface.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
CodeInterpreterClient,
|
||||
)
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import FileInput
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
StreamOutputEvent,
|
||||
)
|
||||
from onyx.tools.tool_implementations.python.code_interpreter_client import (
|
||||
StreamResultEvent,
|
||||
)
|
||||
|
||||
|
||||
def _make_batch_response(
|
||||
stdout: str = "",
|
||||
stderr: str = "",
|
||||
exit_code: int = 0,
|
||||
timed_out: bool = False,
|
||||
duration_ms: int = 50,
|
||||
) -> MagicMock:
|
||||
"""Build a mock ``requests.Response`` for the batch /v1/execute endpoint."""
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.raise_for_status = MagicMock()
|
||||
resp.json.return_value = {
|
||||
"stdout": stdout,
|
||||
"stderr": stderr,
|
||||
"exit_code": exit_code,
|
||||
"timed_out": timed_out,
|
||||
"duration_ms": duration_ms,
|
||||
"files": [],
|
||||
}
|
||||
return resp
|
||||
|
||||
|
||||
def _make_404_response() -> MagicMock:
|
||||
"""Build a mock ``requests.Response`` that returns 404 (streaming not found)."""
|
||||
resp = MagicMock()
|
||||
resp.status_code = 404
|
||||
return resp
|
||||
|
||||
|
||||
def test_execute_streaming_fallback_to_batch_on_404() -> None:
|
||||
"""When /v1/execute/stream returns 404, the client should fall back to
|
||||
/v1/execute and yield equivalent StreamEvent objects."""
|
||||
|
||||
client = CodeInterpreterClient(base_url="http://fake:9000")
|
||||
|
||||
stream_resp = _make_404_response()
|
||||
batch_resp = _make_batch_response(
|
||||
stdout="hello world\n",
|
||||
stderr="a warning\n",
|
||||
)
|
||||
|
||||
urls_called: list[str] = []
|
||||
|
||||
def mock_post(url: str, **_kwargs: object) -> MagicMock:
|
||||
urls_called.append(url)
|
||||
if url.endswith("/v1/execute/stream"):
|
||||
return stream_resp
|
||||
if url.endswith("/v1/execute"):
|
||||
return batch_resp
|
||||
raise AssertionError(f"Unexpected URL: {url}")
|
||||
|
||||
with patch.object(client.session, "post", side_effect=mock_post):
|
||||
events = list(client.execute_streaming(code="print('hello world')"))
|
||||
|
||||
# Streaming endpoint was attempted first, then batch
|
||||
assert len(urls_called) == 2
|
||||
assert urls_called[0].endswith("/v1/execute/stream")
|
||||
assert urls_called[1].endswith("/v1/execute")
|
||||
|
||||
# The 404 response must be closed before making the batch call
|
||||
stream_resp.close.assert_called_once()
|
||||
|
||||
# _batch_as_stream yields: stdout event, stderr event, result event
|
||||
assert len(events) == 3
|
||||
|
||||
assert isinstance(events[0], StreamOutputEvent)
|
||||
assert events[0].stream == "stdout"
|
||||
assert events[0].data == "hello world\n"
|
||||
|
||||
assert isinstance(events[1], StreamOutputEvent)
|
||||
assert events[1].stream == "stderr"
|
||||
assert events[1].data == "a warning\n"
|
||||
|
||||
assert isinstance(events[2], StreamResultEvent)
|
||||
assert events[2].exit_code == 0
|
||||
assert not events[2].timed_out
|
||||
assert events[2].duration_ms == 50
|
||||
assert events[2].files == []
|
||||
|
||||
|
||||
def test_execute_streaming_fallback_stdout_only() -> None:
|
||||
"""Fallback with only stdout (no stderr) should yield two events:
|
||||
one StreamOutputEvent for stdout and one StreamResultEvent."""
|
||||
|
||||
client = CodeInterpreterClient(base_url="http://fake:9000")
|
||||
|
||||
stream_resp = _make_404_response()
|
||||
batch_resp = _make_batch_response(stdout="result: 42\n")
|
||||
|
||||
def mock_post(url: str, **_kwargs: object) -> MagicMock:
|
||||
if url.endswith("/v1/execute/stream"):
|
||||
return stream_resp
|
||||
if url.endswith("/v1/execute"):
|
||||
return batch_resp
|
||||
raise AssertionError(f"Unexpected URL: {url}")
|
||||
|
||||
with patch.object(client.session, "post", side_effect=mock_post):
|
||||
events = list(client.execute_streaming(code="print(42)"))
|
||||
|
||||
# No stderr → only stdout + result
|
||||
assert len(events) == 2
|
||||
|
||||
assert isinstance(events[0], StreamOutputEvent)
|
||||
assert events[0].stream == "stdout"
|
||||
assert events[0].data == "result: 42\n"
|
||||
|
||||
assert isinstance(events[1], StreamResultEvent)
|
||||
assert events[1].exit_code == 0
|
||||
|
||||
|
||||
def test_execute_streaming_fallback_preserves_files_param() -> None:
|
||||
"""When falling back, the files parameter must be forwarded to the
|
||||
batch endpoint so staged files are still available for execution."""
|
||||
|
||||
client = CodeInterpreterClient(base_url="http://fake:9000")
|
||||
|
||||
stream_resp = _make_404_response()
|
||||
batch_resp = _make_batch_response(stdout="ok\n")
|
||||
|
||||
captured_payloads: list[dict] = []
|
||||
|
||||
def mock_post(url: str, **kwargs: object) -> MagicMock:
|
||||
if "json" in kwargs:
|
||||
captured_payloads.append(kwargs["json"]) # type: ignore[arg-type]
|
||||
if url.endswith("/v1/execute/stream"):
|
||||
return stream_resp
|
||||
if url.endswith("/v1/execute"):
|
||||
return batch_resp
|
||||
raise AssertionError(f"Unexpected URL: {url}")
|
||||
|
||||
files_input: list[FileInput] = [{"path": "data.csv", "file_id": "file-abc123"}]
|
||||
|
||||
with patch.object(client.session, "post", side_effect=mock_post):
|
||||
events = list(
|
||||
client.execute_streaming(
|
||||
code="import pandas",
|
||||
files=files_input,
|
||||
)
|
||||
)
|
||||
|
||||
# Both the streaming attempt and the batch fallback should include files
|
||||
assert len(captured_payloads) == 2
|
||||
for payload in captured_payloads:
|
||||
assert payload["files"] == files_input
|
||||
assert payload["code"] == "import pandas"
|
||||
|
||||
# Should still yield valid events
|
||||
assert any(isinstance(e, StreamResultEvent) for e in events)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user