mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-18 07:56:48 +00:00
Compare commits
32 Commits
dane/infer
...
bo/pruning
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19e4f9b377 | ||
|
|
146d8522df | ||
|
|
ac5bae3631 | ||
|
|
b5434b2391 | ||
|
|
28e13b503b | ||
|
|
99a90ec196 | ||
|
|
8ffd7fbb56 | ||
|
|
f9e88e3c72 | ||
|
|
97efdbbbc3 | ||
|
|
b91a3aed53 | ||
|
|
51480e1099 | ||
|
|
70efbef95e | ||
|
|
f3936e2669 | ||
|
|
c933c71b59 | ||
|
|
e0d9e109b5 | ||
|
|
66c361bd37 | ||
|
|
01cbea8c4b | ||
|
|
2dc2b0da84 | ||
|
|
4b58c9cda6 | ||
|
|
7eb945f060 | ||
|
|
e29f948f29 | ||
|
|
7a18b896aa | ||
|
|
53e00c7989 | ||
|
|
50df53727a | ||
|
|
e629574580 | ||
|
|
8d539cdf3f | ||
|
|
52524cbe57 | ||
|
|
c64def6a9e | ||
|
|
2628fe1b93 | ||
|
|
96bf344f9c | ||
|
|
b92d3a307d | ||
|
|
c55207eeba |
@@ -45,7 +45,7 @@ if [ "$ACTIVE_HOME" != "$MOUNT_HOME" ]; then
|
||||
[ -d "$MOUNT_HOME/$item" ] || continue
|
||||
if [ -e "$ACTIVE_HOME/$item" ] && [ ! -L "$ACTIVE_HOME/$item" ]; then
|
||||
echo "warning: replacing $ACTIVE_HOME/$item with symlink to $MOUNT_HOME/$item" >&2
|
||||
rm -rf "$ACTIVE_HOME/$item"
|
||||
rm -rf "${ACTIVE_HOME:?}/$item"
|
||||
fi
|
||||
ln -sfn "$MOUNT_HOME/$item" "$ACTIVE_HOME/$item"
|
||||
done
|
||||
|
||||
2
.github/workflows/pr-quality-checks.yml
vendored
2
.github/workflows/pr-quality-checks.yml
vendored
@@ -39,6 +39,8 @@ jobs:
|
||||
working-directory: ./web
|
||||
run: npm ci
|
||||
- uses: j178/prek-action@cbc2f23eb5539cf20d82d1aabd0d0ecbcc56f4e3
|
||||
env:
|
||||
SKIP: ty
|
||||
with:
|
||||
prek-version: '0.3.4'
|
||||
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}
|
||||
|
||||
@@ -68,6 +68,7 @@ repos:
|
||||
pass_filenames: true
|
||||
files: ^backend/(?!\.venv/|scripts/).*\.py$
|
||||
- id: uv-run
|
||||
alias: ty
|
||||
name: ty
|
||||
args: ["ty", "check"]
|
||||
pass_filenames: true
|
||||
@@ -85,6 +86,17 @@ repos:
|
||||
hooks:
|
||||
- id: actionlint
|
||||
|
||||
- repo: https://github.com/shellcheck-py/shellcheck-py
|
||||
rev: 745eface02aef23e168a8afb6b5737818efbea95 # frozen: v0.11.0.1
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
exclude: >-
|
||||
(?x)^(
|
||||
backend/scripts/setup_craft_templates\.sh|
|
||||
deployment/docker_compose/init-letsencrypt\.sh|
|
||||
deployment/docker_compose/install\.sh
|
||||
)$
|
||||
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
|
||||
hooks:
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
"""Add file_id to documents
|
||||
|
||||
Revision ID: 91d150c361f6
|
||||
Revises: d129f37b3d87
|
||||
Create Date: 2026-04-16 15:43:30.314823
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "91d150c361f6"
|
||||
down_revision = "d129f37b3d87"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"document",
|
||||
sa.Column("file_id", sa.String(), nullable=True),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("document", "file_id")
|
||||
@@ -1,8 +1,10 @@
|
||||
import time
|
||||
from typing import cast
|
||||
|
||||
from celery import shared_task
|
||||
from celery import Task
|
||||
from celery.exceptions import SoftTimeLimitExceeded
|
||||
from redis.client import Redis
|
||||
from redis.lock import Lock as RedisLock
|
||||
|
||||
from ee.onyx.server.tenants.product_gating import get_gated_tenants
|
||||
@@ -16,9 +18,56 @@ from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.db.engine.tenant_utils import get_all_tenant_ids
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.redis.redis_pool import redis_lock_dump
|
||||
from onyx.redis.redis_tenant_work_gating import cleanup_expired
|
||||
from onyx.redis.redis_tenant_work_gating import get_active_tenants
|
||||
from onyx.redis.redis_tenant_work_gating import observe_active_set_size
|
||||
from onyx.redis.redis_tenant_work_gating import record_full_fanout_cycle
|
||||
from onyx.redis.redis_tenant_work_gating import record_gate_decision
|
||||
from onyx.server.runtime.onyx_runtime import OnyxRuntime
|
||||
from shared_configs.configs import IGNORED_SYNCING_TENANT_LIST
|
||||
|
||||
|
||||
_FULL_FANOUT_TIMESTAMP_KEY_PREFIX = "tenant_work_gating_last_full_fanout_ms"
|
||||
|
||||
|
||||
def _should_bypass_gate_for_full_fanout(
|
||||
redis_client: Redis, task_name: str, interval_seconds: int
|
||||
) -> bool:
|
||||
"""True if at least `interval_seconds` have elapsed since the last
|
||||
full-fanout bypass for this task. On True, updates the stored timestamp
|
||||
atomically-enough (it's a best-effort counter, not a lock)."""
|
||||
key = f"{_FULL_FANOUT_TIMESTAMP_KEY_PREFIX}:{task_name}"
|
||||
now_ms = int(time.time() * 1000)
|
||||
threshold_ms = now_ms - (interval_seconds * 1000)
|
||||
|
||||
try:
|
||||
raw = cast(bytes | None, redis_client.get(key))
|
||||
except Exception:
|
||||
task_logger.exception(f"full-fanout timestamp read failed: task={task_name}")
|
||||
# Fail open: treat as "interval elapsed" so we don't skip every
|
||||
# tenant during a Redis hiccup.
|
||||
return True
|
||||
|
||||
if raw is None:
|
||||
# First invocation — bypass so the set seeds cleanly.
|
||||
elapsed = True
|
||||
else:
|
||||
try:
|
||||
last_ms = int(raw.decode())
|
||||
elapsed = last_ms <= threshold_ms
|
||||
except ValueError:
|
||||
elapsed = True
|
||||
|
||||
if elapsed:
|
||||
try:
|
||||
redis_client.set(key, str(now_ms))
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
f"full-fanout timestamp write failed: task={task_name}"
|
||||
)
|
||||
return elapsed
|
||||
|
||||
|
||||
@shared_task(
|
||||
name=OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
||||
ignore_result=True,
|
||||
@@ -32,6 +81,7 @@ def cloud_beat_task_generator(
|
||||
priority: int = OnyxCeleryPriority.MEDIUM,
|
||||
expires: int = BEAT_EXPIRES_DEFAULT,
|
||||
skip_gated: bool = True,
|
||||
work_gated: bool = False,
|
||||
) -> bool | None:
|
||||
"""a lightweight task used to kick off individual beat tasks per tenant."""
|
||||
time_start = time.monotonic()
|
||||
@@ -51,8 +101,56 @@ def cloud_beat_task_generator(
|
||||
tenant_ids: list[str] = []
|
||||
num_processed_tenants = 0
|
||||
num_skipped_gated = 0
|
||||
num_would_skip_work_gate = 0
|
||||
num_skipped_work_gate = 0
|
||||
|
||||
# Tenant-work-gating read path. Resolve once per invocation.
|
||||
gate_enabled = False
|
||||
gate_enforce = False
|
||||
full_fanout_cycle = False
|
||||
active_tenants: set[str] | None = None
|
||||
|
||||
try:
|
||||
# Gating setup is inside the try block so any exception still
|
||||
# reaches the finally that releases the beat lock.
|
||||
if work_gated:
|
||||
try:
|
||||
gate_enabled = OnyxRuntime.get_tenant_work_gating_enabled()
|
||||
gate_enforce = OnyxRuntime.get_tenant_work_gating_enforce()
|
||||
except Exception:
|
||||
task_logger.exception("tenant work gating: runtime flag read failed")
|
||||
gate_enabled = False
|
||||
|
||||
if gate_enabled:
|
||||
redis_failed = False
|
||||
interval_s = (
|
||||
OnyxRuntime.get_tenant_work_gating_full_fanout_interval_seconds()
|
||||
)
|
||||
full_fanout_cycle = _should_bypass_gate_for_full_fanout(
|
||||
redis_client, task_name, interval_s
|
||||
)
|
||||
if full_fanout_cycle:
|
||||
record_full_fanout_cycle(task_name)
|
||||
try:
|
||||
ttl_s = OnyxRuntime.get_tenant_work_gating_ttl_seconds()
|
||||
cleanup_expired(ttl_s)
|
||||
except Exception:
|
||||
task_logger.exception(
|
||||
"tenant work gating: cleanup_expired failed"
|
||||
)
|
||||
else:
|
||||
ttl_s = OnyxRuntime.get_tenant_work_gating_ttl_seconds()
|
||||
active_tenants = get_active_tenants(ttl_s)
|
||||
if active_tenants is None:
|
||||
full_fanout_cycle = True
|
||||
record_full_fanout_cycle(task_name)
|
||||
redis_failed = True
|
||||
|
||||
# Only refresh the gauge when Redis is known-reachable —
|
||||
# skip the ZCARD if we just failed open due to a Redis error.
|
||||
if not redis_failed:
|
||||
observe_active_set_size()
|
||||
|
||||
tenant_ids = get_all_tenant_ids()
|
||||
|
||||
# Per-task control over whether gated tenants are included. Most periodic tasks
|
||||
@@ -76,6 +174,21 @@ def cloud_beat_task_generator(
|
||||
if IGNORED_SYNCING_TENANT_LIST and tenant_id in IGNORED_SYNCING_TENANT_LIST:
|
||||
continue
|
||||
|
||||
# Tenant work gate: if the feature is on, check membership. Skip
|
||||
# unmarked tenants when enforce=True AND we're not in a full-
|
||||
# fanout cycle. Always log/emit the shadow counter.
|
||||
if work_gated and gate_enabled and not full_fanout_cycle:
|
||||
would_skip = (
|
||||
active_tenants is not None and tenant_id not in active_tenants
|
||||
)
|
||||
if would_skip:
|
||||
num_would_skip_work_gate += 1
|
||||
if gate_enforce:
|
||||
num_skipped_work_gate += 1
|
||||
record_gate_decision(task_name, skipped=True)
|
||||
continue
|
||||
record_gate_decision(task_name, skipped=False)
|
||||
|
||||
self.app.send_task(
|
||||
task_name,
|
||||
kwargs=dict(
|
||||
@@ -109,6 +222,12 @@ def cloud_beat_task_generator(
|
||||
f"task={task_name} "
|
||||
f"num_processed_tenants={num_processed_tenants} "
|
||||
f"num_skipped_gated={num_skipped_gated} "
|
||||
f"num_would_skip_work_gate={num_would_skip_work_gate} "
|
||||
f"num_skipped_work_gate={num_skipped_work_gate} "
|
||||
f"full_fanout_cycle={full_fanout_cycle} "
|
||||
f"work_gated={work_gated} "
|
||||
f"gate_enabled={gate_enabled} "
|
||||
f"gate_enforce={gate_enforce} "
|
||||
f"num_tenants={len(tenant_ids)} "
|
||||
f"elapsed={time_elapsed:.2f}"
|
||||
)
|
||||
|
||||
@@ -30,6 +30,7 @@ from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFI
|
||||
from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
|
||||
from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
||||
from onyx.configs.constants import OnyxRedisLocks
|
||||
from onyx.db.engine.sql_engine import get_sqlalchemy_engine
|
||||
@@ -531,23 +532,26 @@ def reset_tenant_id(
|
||||
CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
|
||||
|
||||
|
||||
def wait_for_vespa_or_shutdown(
|
||||
sender: Any, # noqa: ARG001
|
||||
**kwargs: Any, # noqa: ARG001
|
||||
) -> None: # noqa: ARG001
|
||||
"""Waits for Vespa to become ready subject to a timeout.
|
||||
Raises WorkerShutdown if the timeout is reached."""
|
||||
def wait_for_document_index_or_shutdown() -> None:
|
||||
"""
|
||||
Waits for all configured document indices to become ready subject to a
|
||||
timeout.
|
||||
|
||||
Raises WorkerShutdown if the timeout is reached.
|
||||
"""
|
||||
if DISABLE_VECTOR_DB:
|
||||
logger.info(
|
||||
"DISABLE_VECTOR_DB is set — skipping Vespa/OpenSearch readiness check."
|
||||
)
|
||||
return
|
||||
|
||||
if not wait_for_vespa_with_timeout():
|
||||
msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
if not ONYX_DISABLE_VESPA:
|
||||
if not wait_for_vespa_with_timeout():
|
||||
msg = (
|
||||
"[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
|
||||
)
|
||||
logger.error(msg)
|
||||
raise WorkerShutdown(msg)
|
||||
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
if not wait_for_opensearch_with_timeout():
|
||||
|
||||
@@ -105,7 +105,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
# Less startup checks in multi-tenant case
|
||||
if MULTI_TENANT:
|
||||
|
||||
@@ -111,7 +111,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
# Less startup checks in multi-tenant case
|
||||
if MULTI_TENANT:
|
||||
|
||||
@@ -97,7 +97,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
# Less startup checks in multi-tenant case
|
||||
if MULTI_TENANT:
|
||||
|
||||
@@ -118,7 +118,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
# Less startup checks in multi-tenant case
|
||||
if MULTI_TENANT:
|
||||
|
||||
@@ -124,7 +124,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
logger.info(f"Running as the primary celery worker: pid={os.getpid()}")
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
|
||||
|
||||
app_base.wait_for_redis(sender, **kwargs)
|
||||
app_base.wait_for_db(sender, **kwargs)
|
||||
app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
|
||||
app_base.wait_for_document_index_or_shutdown()
|
||||
|
||||
# Less startup checks in multi-tenant case
|
||||
if MULTI_TENANT:
|
||||
|
||||
@@ -10,6 +10,7 @@ from onyx.configs.app_configs import DISABLE_OPENSEARCH_MIGRATION_TASK
|
||||
from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
|
||||
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
@@ -67,6 +68,7 @@ beat_task_templates: list[dict] = [
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.MEDIUM,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -100,6 +102,7 @@ beat_task_templates: list[dict] = [
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
# Gated tenants may still have connectors awaiting deletion.
|
||||
"skip_gated": False,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -109,6 +112,7 @@ beat_task_templates: list[dict] = [
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.MEDIUM,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -118,6 +122,7 @@ beat_task_templates: list[dict] = [
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.MEDIUM,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -155,6 +160,7 @@ beat_task_templates: list[dict] = [
|
||||
"priority": OnyxCeleryPriority.LOW,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"queue": OnyxCeleryQueues.SANDBOX,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -179,6 +185,7 @@ if ENTERPRISE_EDITION_ENABLED:
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.MEDIUM,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -188,6 +195,7 @@ if ENTERPRISE_EDITION_ENABLED:
|
||||
"options": {
|
||||
"priority": OnyxCeleryPriority.MEDIUM,
|
||||
"expires": BEAT_EXPIRES_DEFAULT,
|
||||
"work_gated": True,
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -227,7 +235,11 @@ if SCHEDULED_EVAL_DATASET_NAMES:
|
||||
)
|
||||
|
||||
# Add OpenSearch migration task if enabled.
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX and not DISABLE_OPENSEARCH_MIGRATION_TASK:
|
||||
if (
|
||||
ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
and not DISABLE_OPENSEARCH_MIGRATION_TASK
|
||||
and not ONYX_DISABLE_VESPA
|
||||
):
|
||||
beat_task_templates.append(
|
||||
{
|
||||
"name": "migrate-chunks-from-vespa-to-opensearch",
|
||||
@@ -280,7 +292,7 @@ def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
|
||||
cloud_task["kwargs"] = {}
|
||||
cloud_task["kwargs"]["task_name"] = task["task"]
|
||||
|
||||
optional_fields = ["queue", "priority", "expires", "skip_gated"]
|
||||
optional_fields = ["queue", "priority", "expires", "skip_gated", "work_gated"]
|
||||
for field in optional_fields:
|
||||
if field in task["options"]:
|
||||
cloud_task["kwargs"][field] = task["options"][field]
|
||||
@@ -373,12 +385,14 @@ if not MULTI_TENANT:
|
||||
]
|
||||
)
|
||||
|
||||
# `skip_gated` is a cloud-only hint consumed by `cloud_beat_task_generator`. Strip
|
||||
# it before extending the self-hosted schedule so it doesn't leak into apply_async
|
||||
# as an unrecognised option on every fired task message.
|
||||
# `skip_gated` and `work_gated` are cloud-only hints consumed by
|
||||
# `cloud_beat_task_generator`. Strip them before extending the self-hosted
|
||||
# schedule so they don't leak into apply_async as unrecognised options on
|
||||
# every fired task message.
|
||||
for _template in beat_task_templates:
|
||||
_self_hosted_template = copy.deepcopy(_template)
|
||||
_self_hosted_template["options"].pop("skip_gated", None)
|
||||
_self_hosted_template["options"].pop("work_gated", None)
|
||||
tasks_to_schedule.append(_self_hosted_template)
|
||||
|
||||
|
||||
|
||||
@@ -58,8 +58,6 @@ from onyx.db.indexing_coordination import IndexingCoordination
|
||||
from onyx.db.models import IndexAttempt
|
||||
from onyx.file_store.document_batch_storage import DocumentBatchStorage
|
||||
from onyx.file_store.document_batch_storage import get_document_batch_storage
|
||||
from onyx.file_store.staging import build_raw_file_callback
|
||||
from onyx.file_store.staging import RawFileCallback
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
|
||||
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
|
||||
@@ -92,7 +90,6 @@ def _get_connector_runner(
|
||||
end_time: datetime,
|
||||
include_permissions: bool,
|
||||
leave_connector_active: bool = LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE,
|
||||
raw_file_callback: RawFileCallback | None = None,
|
||||
) -> ConnectorRunner:
|
||||
"""
|
||||
NOTE: `start_time` and `end_time` are only used for poll connectors
|
||||
@@ -111,7 +108,6 @@ def _get_connector_runner(
|
||||
input_type=task,
|
||||
connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config,
|
||||
credential=attempt.connector_credential_pair.credential,
|
||||
raw_file_callback=raw_file_callback,
|
||||
)
|
||||
|
||||
# validate the connector settings
|
||||
@@ -279,12 +275,6 @@ def run_docfetching_entrypoint(
|
||||
f"credentials='{credential_id}'"
|
||||
)
|
||||
|
||||
raw_file_callback = build_raw_file_callback(
|
||||
index_attempt_id=index_attempt_id,
|
||||
cc_pair_id=connector_credential_pair_id,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
|
||||
connector_document_extraction(
|
||||
app,
|
||||
index_attempt_id,
|
||||
@@ -292,7 +282,6 @@ def run_docfetching_entrypoint(
|
||||
attempt.search_settings_id,
|
||||
tenant_id,
|
||||
callback,
|
||||
raw_file_callback=raw_file_callback,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -312,7 +301,6 @@ def connector_document_extraction(
|
||||
search_settings_id: int,
|
||||
tenant_id: str,
|
||||
callback: IndexingHeartbeatInterface | None = None,
|
||||
raw_file_callback: RawFileCallback | None = None,
|
||||
) -> None:
|
||||
"""Extract documents from connector and queue them for indexing pipeline processing.
|
||||
|
||||
@@ -463,7 +451,6 @@ def connector_document_extraction(
|
||||
start_time=window_start,
|
||||
end_time=window_end,
|
||||
include_permissions=should_fetch_permissions_during_indexing,
|
||||
raw_file_callback=raw_file_callback,
|
||||
)
|
||||
|
||||
# don't use a checkpoint if we're explicitly indexing from
|
||||
|
||||
@@ -282,6 +282,7 @@ OPENSEARCH_ADMIN_USERNAME = os.environ.get("OPENSEARCH_ADMIN_USERNAME", "admin")
|
||||
OPENSEARCH_ADMIN_PASSWORD = os.environ.get(
|
||||
"OPENSEARCH_ADMIN_PASSWORD", "StrongPassword123!"
|
||||
)
|
||||
OPENSEARCH_USE_SSL = os.environ.get("OPENSEARCH_USE_SSL", "true").lower() == "true"
|
||||
USING_AWS_MANAGED_OPENSEARCH = (
|
||||
os.environ.get("USING_AWS_MANAGED_OPENSEARCH", "").lower() == "true"
|
||||
)
|
||||
@@ -327,6 +328,7 @@ ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX = (
|
||||
DISABLE_OPENSEARCH_MIGRATION_TASK = (
|
||||
os.environ.get("DISABLE_OPENSEARCH_MIGRATION_TASK", "").lower() == "true"
|
||||
)
|
||||
ONYX_DISABLE_VESPA = os.environ.get("ONYX_DISABLE_VESPA", "").lower() == "true"
|
||||
# Whether we should check for and create an index if necessary every time we
|
||||
# instantiate an OpenSearchDocumentIndex on multitenant cloud. Defaults to True.
|
||||
VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT = (
|
||||
@@ -843,6 +845,29 @@ MAX_FILE_SIZE_BYTES = int(
|
||||
os.environ.get("MAX_FILE_SIZE_BYTES") or 2 * 1024 * 1024 * 1024
|
||||
) # 2GB in bytes
|
||||
|
||||
# Maximum embedded images allowed in a single file. PDFs (and other formats)
|
||||
# with thousands of embedded images can OOM the user-file-processing worker
|
||||
# because every image is decoded with PIL and then sent to the vision LLM.
|
||||
# Enforced both at upload time (rejects the file) and during extraction
|
||||
# (defense-in-depth: caps the number of images materialized).
|
||||
#
|
||||
# Clamped to >= 0; a negative env value would turn upload validation into
|
||||
# always-fail and extraction into always-stop, which is never desired. 0
|
||||
# disables image extraction entirely, which is a valid (if aggressive) setting.
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE = max(
|
||||
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_FILE") or 500)
|
||||
)
|
||||
|
||||
# Maximum embedded images allowed across all files in a single upload batch.
|
||||
# Protects against the scenario where a user uploads many files that each
|
||||
# fall under MAX_EMBEDDED_IMAGES_PER_FILE but aggregate to enough work
|
||||
# (serial-ish celery fan-out plus per-image vision-LLM calls) to OOM the
|
||||
# worker under concurrency or run up surprise latency/cost. Also clamped
|
||||
# to >= 0.
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD = max(
|
||||
0, int(os.environ.get("MAX_EMBEDDED_IMAGES_PER_UPLOAD") or 1000)
|
||||
)
|
||||
|
||||
# Use document summary for contextual rag
|
||||
USE_DOCUMENT_SUMMARY = os.environ.get("USE_DOCUMENT_SUMMARY", "true").lower() == "true"
|
||||
# Use chunk summary for contextual rag
|
||||
|
||||
@@ -372,7 +372,6 @@ class FileOrigin(str, Enum):
|
||||
CONNECTOR_METADATA = "connector_metadata"
|
||||
GENERATED_REPORT = "generated_report"
|
||||
INDEXING_CHECKPOINT = "indexing_checkpoint"
|
||||
INDEXING_STAGING = "indexing_staging"
|
||||
PLAINTEXT_CACHE = "plaintext_cache"
|
||||
OTHER = "other"
|
||||
QUERY_HISTORY_CSV = "query_history_csv"
|
||||
|
||||
@@ -3,6 +3,7 @@ from collections.abc import Callable
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Any
|
||||
from typing import TypeVar
|
||||
from urllib.parse import urljoin
|
||||
@@ -10,7 +11,6 @@ from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from dateutil.parser import parse
|
||||
from dateutil.parser import ParserError
|
||||
|
||||
from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
|
||||
from onyx.configs.constants import DocumentSource
|
||||
@@ -56,18 +56,16 @@ def time_str_to_utc(datetime_str: str) -> datetime:
|
||||
if fixed not in candidates:
|
||||
candidates.append(fixed)
|
||||
|
||||
last_exception: Exception | None = None
|
||||
for candidate in candidates:
|
||||
try:
|
||||
dt = parse(candidate)
|
||||
return datetime_to_utc(dt)
|
||||
except (ValueError, ParserError) as exc:
|
||||
last_exception = exc
|
||||
# dateutil is the primary; the stdlib RFC 2822 parser is a fallback for
|
||||
# inputs dateutil rejects (e.g. headers concatenated without a CRLF —
|
||||
# TZ may be dropped, datetime_to_utc then assumes UTC).
|
||||
for parser in (parse, parsedate_to_datetime):
|
||||
for candidate in candidates:
|
||||
try:
|
||||
return datetime_to_utc(parser(candidate))
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
continue
|
||||
|
||||
if last_exception is not None:
|
||||
raise last_exception
|
||||
|
||||
# Fallback in case parsing failed without raising (should not happen)
|
||||
raise ValueError(f"Unable to parse datetime string: {datetime_str}")
|
||||
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ from onyx.db.credentials import backend_update_credential_json
|
||||
from onyx.db.credentials import fetch_credential_by_id
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.models import Credential
|
||||
from onyx.file_store.staging import RawFileCallback
|
||||
from shared_configs.contextvars import get_current_tenant_id
|
||||
|
||||
|
||||
@@ -108,7 +107,6 @@ def instantiate_connector(
|
||||
input_type: InputType,
|
||||
connector_specific_config: dict[str, Any],
|
||||
credential: Credential,
|
||||
raw_file_callback: RawFileCallback | None = None,
|
||||
) -> BaseConnector:
|
||||
connector_class = identify_connector_class(source, input_type)
|
||||
|
||||
@@ -132,9 +130,6 @@ def instantiate_connector(
|
||||
|
||||
connector.set_allow_images(get_image_extraction_and_analysis_enabled())
|
||||
|
||||
if raw_file_callback is not None:
|
||||
connector.set_raw_file_callback(raw_file_callback)
|
||||
|
||||
return connector
|
||||
|
||||
|
||||
|
||||
@@ -253,7 +253,17 @@ def thread_to_document(
|
||||
|
||||
updated_at_datetime = None
|
||||
if updated_at:
|
||||
updated_at_datetime = time_str_to_utc(updated_at)
|
||||
try:
|
||||
updated_at_datetime = time_str_to_utc(updated_at)
|
||||
except (ValueError, OverflowError) as e:
|
||||
# Old mailboxes contain RFC-violating Date headers. Drop the
|
||||
# timestamp instead of aborting the indexing run.
|
||||
logger.warning(
|
||||
"Skipping unparseable Gmail Date header on thread %s: %r (%s)",
|
||||
full_thread.get("id"),
|
||||
updated_at,
|
||||
e,
|
||||
)
|
||||
|
||||
id = full_thread.get("id")
|
||||
if not id:
|
||||
|
||||
@@ -502,6 +502,9 @@ class GoogleDriveConnector(
|
||||
files: list[RetrievedDriveFile],
|
||||
seen_hierarchy_node_raw_ids: ThreadSafeSet[str],
|
||||
fully_walked_hierarchy_node_raw_ids: ThreadSafeSet[str],
|
||||
failed_folder_ids_by_email: (
|
||||
ThreadSafeDict[str, ThreadSafeSet[str]] | None
|
||||
) = None,
|
||||
permission_sync_context: PermissionSyncContext | None = None,
|
||||
add_prefix: bool = False,
|
||||
) -> list[HierarchyNode]:
|
||||
@@ -525,6 +528,9 @@ class GoogleDriveConnector(
|
||||
seen_hierarchy_node_raw_ids: Set of already-yielded node IDs (modified in place)
|
||||
fully_walked_hierarchy_node_raw_ids: Set of node IDs where the walk to root
|
||||
succeeded (modified in place)
|
||||
failed_folder_ids_by_email: Map of email → folder IDs where that email
|
||||
previously confirmed no accessible parent. Skips the API call if the same
|
||||
(folder, email) is encountered again (modified in place).
|
||||
permission_sync_context: If provided, permissions will be fetched for hierarchy nodes.
|
||||
Contains google_domain and primary_admin_email needed for permission syncing.
|
||||
add_prefix: When True, prefix group IDs with source type (for indexing path).
|
||||
@@ -569,7 +575,7 @@ class GoogleDriveConnector(
|
||||
|
||||
# Fetch folder metadata
|
||||
folder = self._get_folder_metadata(
|
||||
current_id, file.user_email, field_type
|
||||
current_id, file.user_email, field_type, failed_folder_ids_by_email
|
||||
)
|
||||
if not folder:
|
||||
# Can't access this folder - stop climbing
|
||||
@@ -653,7 +659,13 @@ class GoogleDriveConnector(
|
||||
return new_nodes
|
||||
|
||||
def _get_folder_metadata(
|
||||
self, folder_id: str, retriever_email: str, field_type: DriveFileFieldType
|
||||
self,
|
||||
folder_id: str,
|
||||
retriever_email: str,
|
||||
field_type: DriveFileFieldType,
|
||||
failed_folder_ids_by_email: (
|
||||
ThreadSafeDict[str, ThreadSafeSet[str]] | None
|
||||
) = None,
|
||||
) -> GoogleDriveFileType | None:
|
||||
"""
|
||||
Fetch metadata for a folder by ID.
|
||||
@@ -667,6 +679,17 @@ class GoogleDriveConnector(
|
||||
|
||||
# Use a set to deduplicate if retriever_email == primary_admin_email
|
||||
for email in {retriever_email, self.primary_admin_email}:
|
||||
failed_ids = (
|
||||
failed_folder_ids_by_email.get(email)
|
||||
if failed_folder_ids_by_email
|
||||
else None
|
||||
)
|
||||
if failed_ids and folder_id in failed_ids:
|
||||
logger.debug(
|
||||
f"Skipping folder {folder_id} using {email} (previously confirmed no parents)"
|
||||
)
|
||||
continue
|
||||
|
||||
service = get_drive_service(self.creds, email)
|
||||
folder = get_folder_metadata(service, folder_id, field_type)
|
||||
|
||||
@@ -682,6 +705,10 @@ class GoogleDriveConnector(
|
||||
|
||||
# Folder has no parents - could be a root OR user lacks access to parent
|
||||
# Keep this as a fallback but try admin to see if they can see parents
|
||||
if failed_folder_ids_by_email is not None:
|
||||
failed_folder_ids_by_email.setdefault(email, ThreadSafeSet()).add(
|
||||
folder_id
|
||||
)
|
||||
if best_folder is None:
|
||||
best_folder = folder
|
||||
logger.debug(
|
||||
@@ -1090,6 +1117,13 @@ class GoogleDriveConnector(
|
||||
]
|
||||
yield from parallel_yield(user_retrieval_gens, max_workers=MAX_DRIVE_WORKERS)
|
||||
|
||||
# Free per-user cache entries now that this batch is done.
|
||||
# Skip the admin email — it is shared across all user batches and must
|
||||
# persist for the duration of the run.
|
||||
for email in non_completed_org_emails:
|
||||
if email != self.primary_admin_email:
|
||||
checkpoint.failed_folder_ids_by_email.pop(email, None)
|
||||
|
||||
# if there are more emails to process, don't mark as complete
|
||||
if not email_batch_takes_us_to_completion:
|
||||
return
|
||||
@@ -1546,6 +1580,7 @@ class GoogleDriveConnector(
|
||||
files=files_batch,
|
||||
seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,
|
||||
fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,
|
||||
failed_folder_ids_by_email=checkpoint.failed_folder_ids_by_email,
|
||||
permission_sync_context=permission_sync_context,
|
||||
add_prefix=True,
|
||||
)
|
||||
@@ -1782,6 +1817,7 @@ class GoogleDriveConnector(
|
||||
files=files_batch,
|
||||
seen_hierarchy_node_raw_ids=checkpoint.seen_hierarchy_node_raw_ids,
|
||||
fully_walked_hierarchy_node_raw_ids=checkpoint.fully_walked_hierarchy_node_raw_ids,
|
||||
failed_folder_ids_by_email=checkpoint.failed_folder_ids_by_email,
|
||||
permission_sync_context=permission_sync_context,
|
||||
)
|
||||
|
||||
|
||||
@@ -379,10 +379,20 @@ def _download_and_extract_sections_basic(
|
||||
mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
or is_tabular_file(file_name)
|
||||
):
|
||||
# Google Drive doesn't enforce file extensions, so the filename may not
|
||||
# end in .xlsx even when the mime type says it's one. Synthesize the
|
||||
# extension so tabular_file_to_sections dispatches correctly.
|
||||
tabular_file_name = file_name
|
||||
if (
|
||||
mime_type
|
||||
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
and not is_tabular_file(file_name)
|
||||
):
|
||||
tabular_file_name = f"{file_name}.xlsx"
|
||||
return list(
|
||||
tabular_file_to_sections(
|
||||
io.BytesIO(response_call()),
|
||||
file_name=file_name,
|
||||
file_name=tabular_file_name,
|
||||
link=link,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -167,6 +167,13 @@ class GoogleDriveCheckpoint(ConnectorCheckpoint):
|
||||
default_factory=ThreadSafeSet
|
||||
)
|
||||
|
||||
# Maps email → set of IDs of folders where that email confirmed no accessible parent.
|
||||
# Avoids redundant API calls when the same (folder, email) pair is
|
||||
# encountered again within the same retrieval run.
|
||||
failed_folder_ids_by_email: ThreadSafeDict[str, ThreadSafeSet[str]] = Field(
|
||||
default_factory=ThreadSafeDict
|
||||
)
|
||||
|
||||
@field_serializer("completion_map")
|
||||
def serialize_completion_map(
|
||||
self, completion_map: ThreadSafeDict[str, StageCompletion], _info: Any
|
||||
@@ -211,3 +218,25 @@ class GoogleDriveCheckpoint(ConnectorCheckpoint):
|
||||
if isinstance(v, list):
|
||||
return ThreadSafeSet(set(v)) # ty: ignore[invalid-return-type]
|
||||
return ThreadSafeSet()
|
||||
|
||||
@field_serializer("failed_folder_ids_by_email")
|
||||
def serialize_failed_folder_ids_by_email(
|
||||
self,
|
||||
failed_folder_ids_by_email: ThreadSafeDict[str, ThreadSafeSet[str]],
|
||||
_info: Any,
|
||||
) -> dict[str, set[str]]:
|
||||
return {
|
||||
k: inner.copy() for k, inner in failed_folder_ids_by_email.copy().items()
|
||||
}
|
||||
|
||||
@field_validator("failed_folder_ids_by_email", mode="before")
|
||||
def validate_failed_folder_ids_by_email(
|
||||
cls, v: Any
|
||||
) -> ThreadSafeDict[str, ThreadSafeSet[str]]:
|
||||
if isinstance(v, ThreadSafeDict):
|
||||
return v
|
||||
if isinstance(v, dict):
|
||||
return ThreadSafeDict(
|
||||
{k: ThreadSafeSet(set(vals)) for k, vals in v.items()}
|
||||
)
|
||||
return ThreadSafeDict()
|
||||
|
||||
@@ -15,7 +15,6 @@ from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.file_store.staging import RawFileCallback
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
|
||||
|
||||
@@ -43,9 +42,6 @@ class NormalizationResult(BaseModel):
|
||||
class BaseConnector(abc.ABC, Generic[CT]):
|
||||
REDIS_KEY_PREFIX = "da_connector_data:"
|
||||
|
||||
# Optional raw-file persistence hook to save original file
|
||||
raw_file_callback: RawFileCallback | None = None
|
||||
|
||||
@abc.abstractmethod
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
raise NotImplementedError
|
||||
@@ -92,15 +88,6 @@ class BaseConnector(abc.ABC, Generic[CT]):
|
||||
"""Implement if the underlying connector wants to skip/allow image downloading
|
||||
based on the application level image analysis setting."""
|
||||
|
||||
def set_raw_file_callback(self, callback: RawFileCallback) -> None:
|
||||
"""Inject the per-attempt raw-file persistence callback.
|
||||
|
||||
Wired up by the docfetching entrypoint via `instantiate_connector`.
|
||||
Connectors that don't care about persisting raw bytes can ignore this
|
||||
— `raw_file_callback` simply stays `None`.
|
||||
"""
|
||||
self.raw_file_callback = callback
|
||||
|
||||
@classmethod
|
||||
def normalize_url(cls, url: str) -> "NormalizationResult": # noqa: ARG003
|
||||
"""Normalize a URL to match the canonical Document.id format used during ingestion.
|
||||
|
||||
@@ -62,17 +62,19 @@ def best_effort_get_field_from_issue(jira_issue: Issue, field: str) -> Any:
|
||||
def extract_text_from_adf(adf: dict | None) -> str:
|
||||
"""Extracts plain text from Atlassian Document Format:
|
||||
https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/
|
||||
|
||||
WARNING: This function is incomplete and will e.g. skip lists!
|
||||
"""
|
||||
# TODO: complete this function
|
||||
texts = []
|
||||
if adf is not None and "content" in adf:
|
||||
for block in adf["content"]:
|
||||
if "content" in block:
|
||||
for item in block["content"]:
|
||||
if item["type"] == "text":
|
||||
texts.append(item["text"])
|
||||
texts: list[str] = []
|
||||
|
||||
def _extract(node: dict) -> None:
|
||||
if node.get("type") == "text":
|
||||
text = node.get("text", "")
|
||||
if text:
|
||||
texts.append(text)
|
||||
for child in node.get("content", []):
|
||||
_extract(child)
|
||||
|
||||
if adf is not None:
|
||||
_extract(adf)
|
||||
return " ".join(texts)
|
||||
|
||||
|
||||
|
||||
@@ -231,8 +231,6 @@ class DocumentBase(BaseModel):
|
||||
# Set during docfetching after hierarchy nodes are cached
|
||||
parent_hierarchy_node_id: int | None = None
|
||||
|
||||
file_id: str | None = None
|
||||
|
||||
def get_title_for_document_index(
|
||||
self,
|
||||
) -> str | None:
|
||||
@@ -372,7 +370,6 @@ class Document(DocumentBase):
|
||||
secondary_owners=base.secondary_owners,
|
||||
title=base.title,
|
||||
from_ingestion_api=base.from_ingestion_api,
|
||||
file_id=base.file_id,
|
||||
)
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
|
||||
@@ -1958,8 +1958,7 @@ class SharepointConnector(
|
||||
self._graph_client = GraphClient(
|
||||
_acquire_token_for_graph, environment=self._azure_environment
|
||||
)
|
||||
if auth_method == SharepointAuthMethod.CERTIFICATE.value:
|
||||
self.sp_tenant_domain = self._resolve_tenant_domain()
|
||||
self.sp_tenant_domain = self._resolve_tenant_domain()
|
||||
return None
|
||||
|
||||
def _get_drive_names_for_site(self, site_url: str) -> list[str]:
|
||||
|
||||
@@ -19,6 +19,7 @@ from playwright.sync_api import Playwright
|
||||
from playwright.sync_api import sync_playwright
|
||||
from playwright.sync_api import TimeoutError
|
||||
from requests_oauthlib import OAuth2Session
|
||||
from typing_extensions import override
|
||||
from urllib3.exceptions import MaxRetryError
|
||||
|
||||
from onyx.configs.app_configs import INDEX_BATCH_SIZE
|
||||
@@ -32,11 +33,16 @@ from onyx.connectors.exceptions import CredentialExpiredError
|
||||
from onyx.connectors.exceptions import InsufficientPermissionsError
|
||||
from onyx.connectors.exceptions import UnexpectedValidationError
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.html_utils import web_html_cleanup
|
||||
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.sitemap import list_pages_for_site
|
||||
from onyx.utils.web_content import extract_pdf_text
|
||||
@@ -55,8 +61,6 @@ class ScrapeSessionContext:
|
||||
self.visited_links: set[str] = set()
|
||||
self.content_hashes: set[int] = set()
|
||||
|
||||
self.doc_batch: list[Document | HierarchyNode] = []
|
||||
|
||||
self.at_least_one_doc: bool = False
|
||||
self.last_error: str | None = None
|
||||
self.needs_retry: bool = False
|
||||
@@ -438,7 +442,7 @@ def _handle_cookies(context: BrowserContext, url: str) -> None:
|
||||
)
|
||||
|
||||
|
||||
class WebConnector(LoadConnector):
|
||||
class WebConnector(LoadConnector, SlimConnector):
|
||||
MAX_RETRIES = 3
|
||||
|
||||
def __init__(
|
||||
@@ -493,8 +497,14 @@ class WebConnector(LoadConnector):
|
||||
index: int,
|
||||
initial_url: str,
|
||||
session_ctx: ScrapeSessionContext,
|
||||
slim: bool = False,
|
||||
) -> ScrapeResult:
|
||||
"""Returns a ScrapeResult object with a doc and retry flag."""
|
||||
"""Returns a ScrapeResult object with a doc and retry flag.
|
||||
|
||||
When slim=True, skips scroll, PDF content download, and content extraction.
|
||||
The bot-detection render wait (5s) fires on CF/403 responses regardless of slim.
|
||||
networkidle is always awaited so JS-rendered links are discovered correctly.
|
||||
"""
|
||||
|
||||
if session_ctx.playwright is None:
|
||||
raise RuntimeError("scrape_context.playwright is None")
|
||||
@@ -515,7 +525,16 @@ class WebConnector(LoadConnector):
|
||||
is_pdf = is_pdf_resource(initial_url, content_type)
|
||||
|
||||
if is_pdf:
|
||||
# PDF files are not checked for links
|
||||
if slim:
|
||||
result.doc = Document(
|
||||
id=initial_url,
|
||||
sections=[],
|
||||
source=DocumentSource.WEB,
|
||||
semantic_identifier=initial_url,
|
||||
metadata={},
|
||||
)
|
||||
return result
|
||||
|
||||
response = requests.get(initial_url, headers=DEFAULT_HEADERS)
|
||||
page_text, metadata = extract_pdf_text(response.content)
|
||||
last_modified = response.headers.get("Last-Modified")
|
||||
@@ -546,14 +565,20 @@ class WebConnector(LoadConnector):
|
||||
timeout=30000, # 30 seconds
|
||||
wait_until="commit", # Wait for navigation to commit
|
||||
)
|
||||
# Give the page a moment to start rendering after navigation commits.
|
||||
# Allows CloudFlare and other bot-detection challenges to complete.
|
||||
page.wait_for_timeout(PAGE_RENDER_TIMEOUT_MS)
|
||||
|
||||
# Wait for network activity to settle so SPAs that fetch content
|
||||
# asynchronously after the initial JS bundle have time to render.
|
||||
# Bot-detection JS challenges (CloudFlare, Imperva, etc.) need a moment
|
||||
# to start network activity after commit before networkidle is meaningful.
|
||||
# We detect this via the cf-ray header (CloudFlare) or a 403 response,
|
||||
# which is the common entry point for JS-challenge-based bot detection.
|
||||
is_bot_challenge = page_response is not None and (
|
||||
page_response.header_value("cf-ray") is not None
|
||||
or page_response.status == 403
|
||||
)
|
||||
if is_bot_challenge:
|
||||
page.wait_for_timeout(PAGE_RENDER_TIMEOUT_MS)
|
||||
|
||||
# Wait for network activity to settle (handles SPAs, CF challenges, etc.)
|
||||
try:
|
||||
# A bit of extra time to account for long-polling, websockets, etc.
|
||||
page.wait_for_load_state("networkidle", timeout=PAGE_RENDER_TIMEOUT_MS)
|
||||
except TimeoutError:
|
||||
pass
|
||||
@@ -576,7 +601,7 @@ class WebConnector(LoadConnector):
|
||||
session_ctx.visited_links.add(initial_url)
|
||||
|
||||
# If we got here, the request was successful
|
||||
if self.scroll_before_scraping:
|
||||
if not slim and self.scroll_before_scraping:
|
||||
scroll_attempts = 0
|
||||
previous_height = page.evaluate("document.body.scrollHeight")
|
||||
while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
|
||||
@@ -615,6 +640,16 @@ class WebConnector(LoadConnector):
|
||||
result.retry = True
|
||||
return result
|
||||
|
||||
if slim:
|
||||
result.doc = Document(
|
||||
id=initial_url,
|
||||
sections=[],
|
||||
source=DocumentSource.WEB,
|
||||
semantic_identifier=initial_url,
|
||||
metadata={},
|
||||
)
|
||||
return result
|
||||
|
||||
# after this point, we don't need the caller to retry
|
||||
parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)
|
||||
|
||||
@@ -666,9 +701,13 @@ class WebConnector(LoadConnector):
|
||||
|
||||
return result
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
"""Traverses through all pages found on the website
|
||||
and converts them into documents"""
|
||||
def load_from_state(self, slim: bool = False) -> GenerateDocumentsOutput:
|
||||
"""Traverses through all pages found on the website and converts them into
|
||||
documents.
|
||||
|
||||
When slim=True, yields SlimDocument objects (URL id only, no content).
|
||||
Playwright is used in all modes — slim skips content extraction only.
|
||||
"""
|
||||
|
||||
if not self.to_visit_list:
|
||||
raise ValueError("No URLs to visit")
|
||||
@@ -679,6 +718,8 @@ class WebConnector(LoadConnector):
|
||||
session_ctx = ScrapeSessionContext(base_url, self.to_visit_list)
|
||||
session_ctx.initialize()
|
||||
|
||||
batch: list[Document | SlimDocument | HierarchyNode] = []
|
||||
|
||||
while session_ctx.to_visit:
|
||||
initial_url = session_ctx.to_visit.pop()
|
||||
if initial_url in session_ctx.visited_links:
|
||||
@@ -693,7 +734,9 @@ class WebConnector(LoadConnector):
|
||||
continue
|
||||
|
||||
index = len(session_ctx.visited_links)
|
||||
logger.info(f"{index}: Visiting {initial_url}")
|
||||
logger.info(
|
||||
f"{index}: {'Slim-visiting' if slim else 'Visiting'} {initial_url}"
|
||||
)
|
||||
|
||||
# Add retry mechanism with exponential backoff
|
||||
retry_count = 0
|
||||
@@ -708,12 +751,14 @@ class WebConnector(LoadConnector):
|
||||
time.sleep(delay)
|
||||
|
||||
try:
|
||||
result = self._do_scrape(index, initial_url, session_ctx)
|
||||
result = self._do_scrape(index, initial_url, session_ctx, slim=slim)
|
||||
if result.retry:
|
||||
continue
|
||||
|
||||
if result.doc:
|
||||
session_ctx.doc_batch.append(result.doc)
|
||||
batch.append(
|
||||
SlimDocument(id=result.doc.id) if slim else result.doc
|
||||
)
|
||||
except Exception as e:
|
||||
session_ctx.last_error = f"Failed to fetch '{initial_url}': {e}"
|
||||
logger.exception(session_ctx.last_error)
|
||||
@@ -724,16 +769,16 @@ class WebConnector(LoadConnector):
|
||||
|
||||
break # success / don't retry
|
||||
|
||||
if len(session_ctx.doc_batch) >= self.batch_size:
|
||||
if len(batch) >= self.batch_size:
|
||||
session_ctx.initialize()
|
||||
session_ctx.at_least_one_doc = True
|
||||
yield session_ctx.doc_batch
|
||||
session_ctx.doc_batch = []
|
||||
yield batch # ty: ignore[invalid-yield]
|
||||
batch = []
|
||||
|
||||
if session_ctx.doc_batch:
|
||||
if batch:
|
||||
session_ctx.stop()
|
||||
session_ctx.at_least_one_doc = True
|
||||
yield session_ctx.doc_batch
|
||||
yield batch # ty: ignore[invalid-yield]
|
||||
|
||||
if not session_ctx.at_least_one_doc:
|
||||
if session_ctx.last_error:
|
||||
@@ -742,6 +787,22 @@ class WebConnector(LoadConnector):
|
||||
|
||||
session_ctx.stop()
|
||||
|
||||
@override
|
||||
def retrieve_all_slim_docs(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
callback: IndexingHeartbeatInterface | None = None,
|
||||
) -> GenerateSlimDocumentOutput:
|
||||
"""Yields SlimDocuments for all pages reachable from the configured URLs.
|
||||
|
||||
Uses the same Playwright crawl as full indexing but skips content extraction,
|
||||
scroll, and PDF downloads. The 5s render wait fires only on bot-detection
|
||||
responses (CloudFlare cf-ray header or HTTP 403).
|
||||
The start/end parameters are ignored — WEB connector has no incremental path.
|
||||
"""
|
||||
yield from self.load_from_state(slim=True) # ty: ignore[invalid-yield]
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
# Make sure we have at least one valid URL to check
|
||||
if not self.to_visit_list:
|
||||
|
||||
@@ -81,9 +81,7 @@ class ZulipConnector(LoadConnector, PollConnector):
|
||||
# zuliprc file. This reverts them back to newlines.
|
||||
contents_spaces_to_newlines = contents.replace(" ", "\n")
|
||||
# create a temporary zuliprc file
|
||||
tempdir = tempfile.tempdir
|
||||
if tempdir is None:
|
||||
raise Exception("Could not determine tempfile directory")
|
||||
tempdir = tempfile.gettempdir()
|
||||
config_file = os.path.join(tempdir, f"zuliprc-{self.realm_name}")
|
||||
with open(config_file, "w") as f:
|
||||
f.write(contents_spaces_to_newlines)
|
||||
|
||||
@@ -244,13 +244,21 @@ def fetch_latest_index_attempts_by_status(
|
||||
return query.all()
|
||||
|
||||
|
||||
_INTERNAL_ONLY_SOURCES = {
|
||||
# Used by the ingestion API, not a user-created connector.
|
||||
DocumentSource.INGESTION_API,
|
||||
# Backs the user library / build feature, not a connector users filter by.
|
||||
DocumentSource.CRAFT_FILE,
|
||||
}
|
||||
|
||||
|
||||
def fetch_unique_document_sources(db_session: Session) -> list[DocumentSource]:
|
||||
distinct_sources = db_session.query(Connector.source).distinct().all()
|
||||
|
||||
sources = [
|
||||
source[0]
|
||||
for source in distinct_sources
|
||||
if source[0] != DocumentSource.INGESTION_API
|
||||
if source[0] not in _INTERNAL_ONLY_SOURCES
|
||||
]
|
||||
|
||||
return sources
|
||||
|
||||
@@ -696,7 +696,6 @@ def upsert_documents(
|
||||
else {}
|
||||
),
|
||||
doc_metadata=doc.doc_metadata,
|
||||
file_id=doc.file_id,
|
||||
)
|
||||
)
|
||||
for doc in seen_documents.values()
|
||||
@@ -713,7 +712,6 @@ def upsert_documents(
|
||||
"secondary_owners": insert_stmt.excluded.secondary_owners,
|
||||
"doc_metadata": insert_stmt.excluded.doc_metadata,
|
||||
"parent_hierarchy_node_id": insert_stmt.excluded.parent_hierarchy_node_id,
|
||||
"file_id": insert_stmt.excluded.file_id,
|
||||
}
|
||||
if includes_permissions:
|
||||
# Use COALESCE to preserve existing permissions when new values are NULL.
|
||||
|
||||
@@ -62,21 +62,6 @@ def delete_filerecord_by_file_id(
|
||||
db_session.query(FileRecord).filter_by(file_id=file_id).delete()
|
||||
|
||||
|
||||
def update_filerecord_origin(
|
||||
file_id: str,
|
||||
from_origin: FileOrigin,
|
||||
to_origin: FileOrigin,
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""Change a file_record's `file_origin`, filtered on the current origin
|
||||
so the update is idempotent. Caller owns the commit.
|
||||
"""
|
||||
db_session.query(FileRecord).filter(
|
||||
FileRecord.file_id == file_id,
|
||||
FileRecord.file_origin == from_origin,
|
||||
).update({FileRecord.file_origin: to_origin})
|
||||
|
||||
|
||||
def upsert_filerecord(
|
||||
file_id: str,
|
||||
display_name: str,
|
||||
|
||||
@@ -952,7 +952,6 @@ class Document(Base):
|
||||
semantic_id: Mapped[str] = mapped_column(NullFilteredString)
|
||||
# First Section's link
|
||||
link: Mapped[str | None] = mapped_column(NullFilteredString, nullable=True)
|
||||
file_id: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||
|
||||
# The updated time is also used as a measure of the last successful state of the doc
|
||||
# pulled from the source (to help skip reindexing already updated docs in case of
|
||||
|
||||
@@ -20,6 +20,7 @@ from onyx.background.celery.tasks.opensearch_migration.constants import (
|
||||
TOTAL_ALLOWABLE_DOC_MIGRATION_ATTEMPTS_BEFORE_PERMANENT_FAILURE,
|
||||
)
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.db.enums import OpenSearchDocumentMigrationStatus
|
||||
from onyx.db.models import Document
|
||||
from onyx.db.models import OpenSearchDocumentMigrationRecord
|
||||
@@ -412,7 +413,11 @@ def get_opensearch_retrieval_state(
|
||||
|
||||
If the tenant migration record is not found, defaults to
|
||||
ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX.
|
||||
|
||||
If ONYX_DISABLE_VESPA is True, always returns True.
|
||||
"""
|
||||
if ONYX_DISABLE_VESPA:
|
||||
return True
|
||||
record = db_session.query(OpenSearchTenantMigrationRecord).first()
|
||||
if record is None:
|
||||
return ENABLE_OPENSEARCH_RETRIEVAL_FOR_ONYX
|
||||
|
||||
@@ -3,6 +3,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.db.models import SearchSettings
|
||||
from onyx.db.opensearch_migration import get_opensearch_retrieval_state
|
||||
from onyx.document_index.disabled import DisabledDocumentIndex
|
||||
@@ -48,6 +49,11 @@ def get_default_document_index(
|
||||
secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled
|
||||
|
||||
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
|
||||
if ONYX_DISABLE_VESPA:
|
||||
if not opensearch_retrieval_enabled:
|
||||
raise ValueError(
|
||||
"BUG: ONYX_DISABLE_VESPA is set but opensearch_retrieval_enabled is not set."
|
||||
)
|
||||
if opensearch_retrieval_enabled:
|
||||
indexing_setting = IndexingSetting.from_db_model(search_settings)
|
||||
secondary_indexing_setting = (
|
||||
@@ -119,21 +125,32 @@ def get_all_document_indices(
|
||||
)
|
||||
]
|
||||
|
||||
vespa_document_index = VespaIndex(
|
||||
index_name=search_settings.index_name,
|
||||
secondary_index_name=(
|
||||
secondary_search_settings.index_name if secondary_search_settings else None
|
||||
),
|
||||
large_chunks_enabled=search_settings.large_chunks_enabled,
|
||||
secondary_large_chunks_enabled=(
|
||||
secondary_search_settings.large_chunks_enabled
|
||||
if secondary_search_settings
|
||||
else None
|
||||
),
|
||||
multitenant=MULTI_TENANT,
|
||||
httpx_client=httpx_client,
|
||||
)
|
||||
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
|
||||
result: list[DocumentIndex] = []
|
||||
|
||||
if ONYX_DISABLE_VESPA:
|
||||
if not ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
raise ValueError(
|
||||
"ONYX_DISABLE_VESPA is set but ENABLE_OPENSEARCH_INDEXING_FOR_ONYX is not set."
|
||||
)
|
||||
else:
|
||||
vespa_document_index = VespaIndex(
|
||||
index_name=search_settings.index_name,
|
||||
secondary_index_name=(
|
||||
secondary_search_settings.index_name
|
||||
if secondary_search_settings
|
||||
else None
|
||||
),
|
||||
large_chunks_enabled=search_settings.large_chunks_enabled,
|
||||
secondary_large_chunks_enabled=(
|
||||
secondary_search_settings.large_chunks_enabled
|
||||
if secondary_search_settings
|
||||
else None
|
||||
),
|
||||
multitenant=MULTI_TENANT,
|
||||
httpx_client=httpx_client,
|
||||
)
|
||||
result.append(vespa_document_index)
|
||||
|
||||
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
|
||||
indexing_setting = IndexingSetting.from_db_model(search_settings)
|
||||
secondary_indexing_setting = (
|
||||
@@ -169,7 +186,6 @@ def get_all_document_indices(
|
||||
multitenant=MULTI_TENANT,
|
||||
httpx_client=httpx_client,
|
||||
)
|
||||
result: list[DocumentIndex] = [vespa_document_index]
|
||||
if opensearch_document_index:
|
||||
result.append(opensearch_document_index)
|
||||
|
||||
return result
|
||||
|
||||
@@ -98,9 +98,6 @@ class DocumentMetadata:
|
||||
# The resolved database ID of the parent hierarchy node (folder/container)
|
||||
parent_hierarchy_node_id: int | None = None
|
||||
|
||||
# Opt-in pointer to the persisted raw file for this document (file_store id).
|
||||
file_id: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VespaDocumentFields:
|
||||
|
||||
@@ -17,6 +17,7 @@ from onyx.configs.app_configs import OPENSEARCH_ADMIN_PASSWORD
|
||||
from onyx.configs.app_configs import OPENSEARCH_ADMIN_USERNAME
|
||||
from onyx.configs.app_configs import OPENSEARCH_HOST
|
||||
from onyx.configs.app_configs import OPENSEARCH_REST_API_PORT
|
||||
from onyx.configs.app_configs import OPENSEARCH_USE_SSL
|
||||
from onyx.document_index.interfaces_new import TenantState
|
||||
from onyx.document_index.opensearch.constants import OpenSearchSearchType
|
||||
from onyx.document_index.opensearch.schema import DocumentChunk
|
||||
@@ -132,7 +133,7 @@ class OpenSearchClient(AbstractContextManager):
|
||||
host: str = OPENSEARCH_HOST,
|
||||
port: int = OPENSEARCH_REST_API_PORT,
|
||||
auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
|
||||
use_ssl: bool = True,
|
||||
use_ssl: bool = OPENSEARCH_USE_SSL,
|
||||
verify_certs: bool = False,
|
||||
ssl_show_warn: bool = False,
|
||||
timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
|
||||
@@ -302,7 +303,7 @@ class OpenSearchIndexClient(OpenSearchClient):
|
||||
host: str = OPENSEARCH_HOST,
|
||||
port: int = OPENSEARCH_REST_API_PORT,
|
||||
auth: tuple[str, str] = (OPENSEARCH_ADMIN_USERNAME, OPENSEARCH_ADMIN_PASSWORD),
|
||||
use_ssl: bool = True,
|
||||
use_ssl: bool = OPENSEARCH_USE_SSL,
|
||||
verify_certs: bool = False,
|
||||
ssl_show_warn: bool = False,
|
||||
timeout: int = DEFAULT_OPENSEARCH_CLIENT_TIMEOUT_S,
|
||||
|
||||
@@ -23,6 +23,7 @@ import openpyxl
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from PIL import Image
|
||||
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.constants import ONYX_METADATA_FILENAME
|
||||
from onyx.configs.llm_configs import get_image_extraction_and_analysis_enabled
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
@@ -47,6 +48,7 @@ KNOWN_OPENPYXL_BUGS = [
|
||||
"File contains no valid workbook part",
|
||||
"Unable to read workbook: could not read stylesheet from None",
|
||||
"Colors must be aRGB hex values",
|
||||
"Max value is",
|
||||
]
|
||||
|
||||
|
||||
@@ -191,6 +193,56 @@ def read_text_file(
|
||||
return file_content_raw, metadata
|
||||
|
||||
|
||||
def count_pdf_embedded_images(file: IO[Any], cap: int) -> int:
|
||||
"""Return the number of embedded images in a PDF, short-circuiting at cap+1.
|
||||
|
||||
Used to reject PDFs whose image count would OOM the user-file-processing
|
||||
worker during indexing. Returns a value > cap as a sentinel once the count
|
||||
exceeds the cap, so callers do not iterate thousands of image objects just
|
||||
to report a number. Returns 0 if the PDF cannot be parsed.
|
||||
|
||||
Owner-password-only PDFs (permission restrictions but no open password) are
|
||||
counted normally — they decrypt with an empty string. Truly password-locked
|
||||
PDFs are skipped (return 0) since we can't inspect them; the caller should
|
||||
ensure the password-protected check runs first.
|
||||
|
||||
Always restores the file pointer to its original position before returning.
|
||||
"""
|
||||
from pypdf import PdfReader
|
||||
|
||||
try:
|
||||
start_pos = file.tell()
|
||||
except Exception:
|
||||
start_pos = None
|
||||
try:
|
||||
if start_pos is not None:
|
||||
file.seek(0)
|
||||
reader = PdfReader(file)
|
||||
if reader.is_encrypted:
|
||||
# Try empty password first (owner-password-only PDFs); give up if that fails.
|
||||
try:
|
||||
if reader.decrypt("") == 0:
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
count = 0
|
||||
for page in reader.pages:
|
||||
for _ in page.images:
|
||||
count += 1
|
||||
if count > cap:
|
||||
return count
|
||||
return count
|
||||
except Exception:
|
||||
logger.warning("Failed to count embedded images in PDF", exc_info=True)
|
||||
return 0
|
||||
finally:
|
||||
if start_pos is not None:
|
||||
try:
|
||||
file.seek(start_pos)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str:
|
||||
"""
|
||||
Extract text from a PDF. For embedded images, a more complex approach is needed.
|
||||
@@ -254,8 +306,27 @@ def read_pdf_file(
|
||||
)
|
||||
|
||||
if extract_images:
|
||||
image_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
images_processed = 0
|
||||
cap_reached = False
|
||||
for page_num, page in enumerate(pdf_reader.pages):
|
||||
if cap_reached:
|
||||
break
|
||||
for image_file_object in page.images:
|
||||
if images_processed >= image_cap:
|
||||
# Defense-in-depth backstop. Upload-time validation
|
||||
# should have rejected files exceeding the cap, but
|
||||
# we also break here so a single oversized file can
|
||||
# never pin a worker.
|
||||
logger.warning(
|
||||
"PDF embedded image cap reached (%d). "
|
||||
"Skipping remaining images on page %d and beyond.",
|
||||
image_cap,
|
||||
page_num + 1,
|
||||
)
|
||||
cap_reached = True
|
||||
break
|
||||
|
||||
image = Image.open(io.BytesIO(image_file_object.data))
|
||||
img_byte_arr = io.BytesIO()
|
||||
image.save(img_byte_arr, format=image.format)
|
||||
@@ -268,6 +339,7 @@ def read_pdf_file(
|
||||
image_callback(img_bytes, image_name)
|
||||
else:
|
||||
extracted_images.append((img_bytes, image_name))
|
||||
images_processed += 1
|
||||
|
||||
return text, metadata, extracted_images
|
||||
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
from typing import IO
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.db.file_record import update_filerecord_origin
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
# (content, content_type) -> file_id
|
||||
RawFileCallback = Callable[[IO[bytes], str], str]
|
||||
|
||||
|
||||
def stage_raw_file(
|
||||
content: IO,
|
||||
content_type: str,
|
||||
*,
|
||||
metadata: dict[str, Any],
|
||||
) -> str:
|
||||
"""Persist raw bytes to the file store with FileOrigin.INDEXING_STAGING.
|
||||
|
||||
`metadata` is attached to the file_record so that downstream promotion
|
||||
(in docprocessing) and orphan reaping (TTL janitor) can locate the file
|
||||
by its originating context.
|
||||
"""
|
||||
file_store = get_default_file_store()
|
||||
file_id = file_store.save_file(
|
||||
content=content,
|
||||
display_name=None,
|
||||
file_origin=FileOrigin.INDEXING_STAGING,
|
||||
file_type=content_type,
|
||||
file_metadata=metadata,
|
||||
)
|
||||
return file_id
|
||||
|
||||
|
||||
def build_raw_file_callback(
|
||||
*,
|
||||
index_attempt_id: int,
|
||||
cc_pair_id: int,
|
||||
tenant_id: str,
|
||||
) -> RawFileCallback:
|
||||
"""Build a per-attempt callback that connectors can invoke to opt in to
|
||||
raw-file persistence. The closure binds the attempt-level context as the
|
||||
staging metadata so the connector only needs to pass per-call info
|
||||
(bytes, content_type) and gets back a file_id to attach to its Document.
|
||||
"""
|
||||
metadata: dict[str, Any] = {
|
||||
"index_attempt_id": index_attempt_id,
|
||||
"cc_pair_id": cc_pair_id,
|
||||
"tenant_id": tenant_id,
|
||||
}
|
||||
|
||||
def _callback(content: IO[bytes], content_type: str) -> str:
|
||||
return stage_raw_file(
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
return _callback
|
||||
|
||||
|
||||
def promote_staged_file(db_session: Session, file_id: str) -> None:
|
||||
"""Mark a previously-staged file as `FileOrigin.CONNECTOR`.
|
||||
|
||||
Idempotent — the underlying update filters on the STAGING origin so
|
||||
repeated calls no-op once the file has already been promoted or removed.
|
||||
Caller owns the commit so promotion stays transactional with whatever
|
||||
document-level bookkeeping the caller is doing.
|
||||
"""
|
||||
update_filerecord_origin(
|
||||
file_id=file_id,
|
||||
from_origin=FileOrigin.INDEXING_STAGING,
|
||||
to_origin=FileOrigin.CONNECTOR,
|
||||
db_session=db_session,
|
||||
)
|
||||
@@ -49,7 +49,6 @@ from onyx.document_index.interfaces import DocumentMetadata
|
||||
from onyx.document_index.interfaces import IndexBatchParams
|
||||
from onyx.file_processing.image_summarization import summarize_image_with_error_handling
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.file_store.staging import promote_staged_file
|
||||
from onyx.hooks.executor import execute_hook
|
||||
from onyx.hooks.executor import HookSkipped
|
||||
from onyx.hooks.executor import HookSoftFailed
|
||||
@@ -155,7 +154,6 @@ def _upsert_documents_in_db(
|
||||
doc_metadata=doc.doc_metadata,
|
||||
# parent_hierarchy_node_id is resolved in docfetching using Redis cache
|
||||
parent_hierarchy_node_id=doc.parent_hierarchy_node_id,
|
||||
file_id=doc.file_id,
|
||||
)
|
||||
document_metadata_list.append(db_doc_metadata)
|
||||
|
||||
@@ -366,39 +364,6 @@ def index_doc_batch_with_handler(
|
||||
return index_pipeline_result
|
||||
|
||||
|
||||
def _apply_file_id_transitions(
|
||||
documents: list[Document],
|
||||
previous_file_ids: dict[str, str],
|
||||
db_session: Session,
|
||||
) -> None:
|
||||
"""Finalize file_id lifecycle for the batch.
|
||||
|
||||
`document.file_id` is already written by `upsert_documents`. For each doc
|
||||
whose file_id changed, promote the new staged file to `CONNECTOR` (so the
|
||||
TTL janitor leaves it alone) and delete the replaced one. The delete is
|
||||
best-effort; if it fails the janitor will reap the orphan.
|
||||
"""
|
||||
file_store = get_default_file_store()
|
||||
for doc in documents:
|
||||
new_file_id = doc.file_id
|
||||
old_file_id = previous_file_ids.get(doc.id)
|
||||
|
||||
if new_file_id == old_file_id:
|
||||
continue
|
||||
|
||||
if new_file_id is not None:
|
||||
promote_staged_file(db_session=db_session, file_id=new_file_id)
|
||||
|
||||
if old_file_id is not None:
|
||||
try:
|
||||
file_store.delete_file(old_file_id, error_on_missing=False)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Failed to delete replaced file_id={old_file_id}; "
|
||||
"will be reaped by janitor."
|
||||
)
|
||||
|
||||
|
||||
def index_doc_batch_prepare(
|
||||
documents: list[Document],
|
||||
index_attempt_metadata: IndexAttemptMetadata,
|
||||
@@ -417,11 +382,6 @@ def index_doc_batch_prepare(
|
||||
document_ids=document_ids,
|
||||
)
|
||||
|
||||
# Capture previous file_ids BEFORE any writes so we know what to reap.
|
||||
previous_file_ids: dict[str, str] = {
|
||||
db_doc.id: db_doc.file_id for db_doc in db_docs if db_doc.file_id is not None
|
||||
}
|
||||
|
||||
updatable_docs = (
|
||||
get_doc_ids_to_update(documents=documents, db_docs=db_docs)
|
||||
if not ignore_time_skip
|
||||
@@ -444,11 +404,6 @@ def index_doc_batch_prepare(
|
||||
index_attempt_metadata=index_attempt_metadata,
|
||||
db_session=db_session,
|
||||
)
|
||||
_apply_file_id_transitions(
|
||||
documents=updatable_docs,
|
||||
previous_file_ids=previous_file_ids,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Upserted {len(updatable_docs)} changed docs out of {len(documents)} total docs into the DB"
|
||||
|
||||
@@ -19,9 +19,14 @@ from onyx.configs.app_configs import MCP_SERVER_CORS_ORIGINS
|
||||
from onyx.mcp_server.auth import OnyxTokenVerifier
|
||||
from onyx.mcp_server.utils import shutdown_http_client
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Initialize EE flag at module import so it's set regardless of the entry point
|
||||
# (python -m onyx.mcp_server_main, uvicorn onyx.mcp_server.api:mcp_app, etc.).
|
||||
set_is_ee_based_on_env_variable()
|
||||
|
||||
logger.info("Creating Onyx MCP Server...")
|
||||
|
||||
mcp_server = FastMCP(
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Resource registrations for the Onyx MCP server."""
|
||||
|
||||
# Import resource modules so decorators execute when the package loads.
|
||||
from onyx.mcp_server.resources import document_sets # noqa: F401
|
||||
from onyx.mcp_server.resources import indexed_sources # noqa: F401
|
||||
|
||||
41
backend/onyx/mcp_server/resources/document_sets.py
Normal file
41
backend/onyx/mcp_server/resources/document_sets.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Resource exposing document sets available to the current user."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from onyx.mcp_server.api import mcp_server
|
||||
from onyx.mcp_server.utils import get_accessible_document_sets
|
||||
from onyx.mcp_server.utils import require_access_token
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@mcp_server.resource(
|
||||
"resource://document_sets",
|
||||
name="document_sets",
|
||||
description=(
|
||||
"Enumerate the Document Sets accessible to the current user. Use the "
|
||||
"returned `name` values with the `document_set_names` filter of the "
|
||||
"`search_indexed_documents` tool to scope searches to a specific set."
|
||||
),
|
||||
mime_type="application/json",
|
||||
)
|
||||
async def document_sets_resource() -> str:
|
||||
"""Return the list of document sets the user can filter searches by."""
|
||||
|
||||
access_token = require_access_token()
|
||||
|
||||
document_sets = sorted(
|
||||
await get_accessible_document_sets(access_token), key=lambda entry: entry.name
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Onyx MCP Server: document_sets resource returning %s entries",
|
||||
len(document_sets),
|
||||
)
|
||||
|
||||
# FastMCP 3.2+ requires str/bytes/list[ResourceContent] — it no longer
|
||||
# auto-serializes; serialize to JSON ourselves.
|
||||
return json.dumps([entry.model_dump(mode="json") for entry in document_sets])
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
import json
|
||||
|
||||
from onyx.mcp_server.api import mcp_server
|
||||
from onyx.mcp_server.utils import get_indexed_sources
|
||||
@@ -21,7 +21,7 @@ logger = setup_logger()
|
||||
),
|
||||
mime_type="application/json",
|
||||
)
|
||||
async def indexed_sources_resource() -> dict[str, Any]:
|
||||
async def indexed_sources_resource() -> str:
|
||||
"""Return the list of indexed source types for search filtering."""
|
||||
|
||||
access_token = require_access_token()
|
||||
@@ -33,6 +33,6 @@ async def indexed_sources_resource() -> dict[str, Any]:
|
||||
len(sources),
|
||||
)
|
||||
|
||||
return {
|
||||
"indexed_sources": sorted(sources),
|
||||
}
|
||||
# FastMCP 3.2+ requires str/bytes/list[ResourceContent] — it no longer
|
||||
# auto-serializes; serialize to JSON ourselves.
|
||||
return json.dumps(sorted(sources))
|
||||
|
||||
@@ -4,12 +4,23 @@ from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from fastmcp.server.auth.auth import AccessToken
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.chat.models import ChatFullResponse
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import SearchDoc
|
||||
from onyx.mcp_server.api import mcp_server
|
||||
from onyx.mcp_server.utils import get_http_client
|
||||
from onyx.mcp_server.utils import get_indexed_sources
|
||||
from onyx.mcp_server.utils import require_access_token
|
||||
from onyx.server.features.web_search.models import OpenUrlsToolRequest
|
||||
from onyx.server.features.web_search.models import OpenUrlsToolResponse
|
||||
from onyx.server.features.web_search.models import WebSearchToolRequest
|
||||
from onyx.server.features.web_search.models import WebSearchToolResponse
|
||||
from onyx.server.query_and_chat.models import ChatSessionCreationRequest
|
||||
from onyx.server.query_and_chat.models import SendMessageRequest
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests
|
||||
from onyx.utils.variable_functionality import global_version
|
||||
@@ -17,6 +28,43 @@ from onyx.utils.variable_functionality import global_version
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
# CE search falls through to the chat endpoint, which invokes an LLM — the
|
||||
# default 60s client timeout is not enough for a real RAG-backed response.
|
||||
_CE_SEARCH_TIMEOUT_SECONDS = 300.0
|
||||
|
||||
|
||||
async def _post_model(
|
||||
url: str,
|
||||
body: BaseModel,
|
||||
access_token: AccessToken,
|
||||
timeout: float | None = None,
|
||||
) -> httpx.Response:
|
||||
"""POST a Pydantic model as JSON to the Onyx backend."""
|
||||
return await get_http_client().post(
|
||||
url,
|
||||
content=body.model_dump_json(exclude_unset=True),
|
||||
headers={
|
||||
"Authorization": f"Bearer {access_token.token}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout=timeout if timeout is not None else httpx.USE_CLIENT_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
def _project_doc(doc: SearchDoc, content: str | None) -> dict[str, Any]:
|
||||
"""Project a backend search doc into the MCP wire shape.
|
||||
|
||||
Accepts SearchDocWithContent (EE) too since it extends SearchDoc.
|
||||
"""
|
||||
return {
|
||||
"semantic_identifier": doc.semantic_identifier,
|
||||
"content": content,
|
||||
"source_type": doc.source_type.value,
|
||||
"link": doc.link,
|
||||
"score": doc.score,
|
||||
}
|
||||
|
||||
|
||||
def _extract_error_detail(response: httpx.Response) -> str:
|
||||
"""Extract a human-readable error message from a failed backend response.
|
||||
|
||||
@@ -36,6 +84,7 @@ def _extract_error_detail(response: httpx.Response) -> str:
|
||||
async def search_indexed_documents(
|
||||
query: str,
|
||||
source_types: list[str] | None = None,
|
||||
document_set_names: list[str] | None = None,
|
||||
time_cutoff: str | None = None,
|
||||
limit: int = 10,
|
||||
) -> dict[str, Any]:
|
||||
@@ -53,6 +102,10 @@ async def search_indexed_documents(
|
||||
In EE mode, the dedicated search endpoint is used instead.
|
||||
|
||||
To find a list of available sources, use the `indexed_sources` resource.
|
||||
`document_set_names` restricts results to documents belonging to the named
|
||||
Document Sets — useful for scoping queries to a curated subset of the
|
||||
knowledge base (e.g. to isolate knowledge between agents). Use the
|
||||
`document_sets` resource to discover accessible set names.
|
||||
Returns chunks of text as search results with snippets, scores, and metadata.
|
||||
|
||||
Example usage:
|
||||
@@ -60,15 +113,23 @@ async def search_indexed_documents(
|
||||
{
|
||||
"query": "What is the latest status of PROJ-1234 and what is the next development item?",
|
||||
"source_types": ["jira", "google_drive", "github"],
|
||||
"document_set_names": ["Engineering Wiki"],
|
||||
"time_cutoff": "2025-11-24T00:00:00Z",
|
||||
"limit": 10,
|
||||
}
|
||||
```
|
||||
"""
|
||||
logger.info(
|
||||
f"Onyx MCP Server: document search: query='{query}', sources={source_types}, limit={limit}"
|
||||
f"Onyx MCP Server: document search: query='{query}', sources={source_types}, "
|
||||
f"document_sets={document_set_names}, limit={limit}"
|
||||
)
|
||||
|
||||
# Normalize empty list inputs to None so downstream filter construction is
|
||||
# consistent — BaseFilters treats [] as "match zero" which differs from
|
||||
# "no filter" (None).
|
||||
source_types = source_types or None
|
||||
document_set_names = document_set_names or None
|
||||
|
||||
# Parse time_cutoff string to datetime if provided
|
||||
time_cutoff_dt: datetime | None = None
|
||||
if time_cutoff:
|
||||
@@ -81,9 +142,6 @@ async def search_indexed_documents(
|
||||
# Continue with no time_cutoff instead of returning an error
|
||||
time_cutoff_dt = None
|
||||
|
||||
# Initialize source_type_enums early to avoid UnboundLocalError
|
||||
source_type_enums: list[DocumentSource] | None = None
|
||||
|
||||
# Get authenticated user from FastMCP's access token
|
||||
access_token = require_access_token()
|
||||
|
||||
@@ -117,6 +175,7 @@ async def search_indexed_documents(
|
||||
|
||||
# Convert source_types strings to DocumentSource enums if provided
|
||||
# Invalid values will be handled by the API server
|
||||
source_type_enums: list[DocumentSource] | None = None
|
||||
if source_types is not None:
|
||||
source_type_enums = []
|
||||
for src in source_types:
|
||||
@@ -127,83 +186,83 @@ async def search_indexed_documents(
|
||||
f"Onyx MCP Server: Invalid source type '{src}' - will be ignored by server"
|
||||
)
|
||||
|
||||
# Build filters dict only with non-None values
|
||||
filters: dict[str, Any] | None = None
|
||||
if source_type_enums or time_cutoff_dt:
|
||||
filters = {}
|
||||
if source_type_enums:
|
||||
filters["source_type"] = [src.value for src in source_type_enums]
|
||||
if time_cutoff_dt:
|
||||
filters["time_cutoff"] = time_cutoff_dt.isoformat()
|
||||
filters: BaseFilters | None = None
|
||||
if source_type_enums or document_set_names or time_cutoff_dt:
|
||||
filters = BaseFilters(
|
||||
source_type=source_type_enums,
|
||||
document_set=document_set_names,
|
||||
time_cutoff=time_cutoff_dt,
|
||||
)
|
||||
|
||||
is_ee = global_version.is_ee_version()
|
||||
base_url = build_api_server_url_for_http_requests(respect_env_override_if_set=True)
|
||||
auth_headers = {"Authorization": f"Bearer {access_token.token}"}
|
||||
is_ee = global_version.is_ee_version()
|
||||
|
||||
search_request: dict[str, Any]
|
||||
request: BaseModel
|
||||
if is_ee:
|
||||
# EE: use the dedicated search endpoint (no LLM invocation)
|
||||
search_request = {
|
||||
"search_query": query,
|
||||
"filters": filters,
|
||||
"num_docs_fed_to_llm_selection": limit,
|
||||
"run_query_expansion": False,
|
||||
"include_content": True,
|
||||
"stream": False,
|
||||
}
|
||||
# EE: use the dedicated search endpoint (no LLM invocation).
|
||||
# Lazy import so CE deployments that strip ee/ never load this module.
|
||||
from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
|
||||
|
||||
request = SendSearchQueryRequest(
|
||||
search_query=query,
|
||||
filters=filters,
|
||||
num_docs_fed_to_llm_selection=limit,
|
||||
run_query_expansion=False,
|
||||
include_content=True,
|
||||
stream=False,
|
||||
)
|
||||
endpoint = f"{base_url}/search/send-search-message"
|
||||
error_key = "error"
|
||||
docs_key = "search_docs"
|
||||
content_field = "content"
|
||||
else:
|
||||
# CE: fall back to the chat endpoint (invokes LLM, consumes tokens)
|
||||
search_request = {
|
||||
"message": query,
|
||||
"stream": False,
|
||||
"chat_session_info": {},
|
||||
}
|
||||
if filters:
|
||||
search_request["internal_search_filters"] = filters
|
||||
request = SendMessageRequest(
|
||||
message=query,
|
||||
stream=False,
|
||||
chat_session_info=ChatSessionCreationRequest(),
|
||||
internal_search_filters=filters,
|
||||
)
|
||||
endpoint = f"{base_url}/chat/send-chat-message"
|
||||
error_key = "error_msg"
|
||||
docs_key = "top_documents"
|
||||
content_field = "blurb"
|
||||
|
||||
try:
|
||||
response = await get_http_client().post(
|
||||
response = await _post_model(
|
||||
endpoint,
|
||||
json=search_request,
|
||||
headers=auth_headers,
|
||||
request,
|
||||
access_token,
|
||||
timeout=None if is_ee else _CE_SEARCH_TIMEOUT_SECONDS,
|
||||
)
|
||||
if not response.is_success:
|
||||
error_detail = _extract_error_detail(response)
|
||||
return {
|
||||
"documents": [],
|
||||
"total_results": 0,
|
||||
"query": query,
|
||||
"error": error_detail,
|
||||
}
|
||||
result = response.json()
|
||||
|
||||
# Check for error in response
|
||||
if result.get(error_key):
|
||||
return {
|
||||
"documents": [],
|
||||
"total_results": 0,
|
||||
"query": query,
|
||||
"error": result.get(error_key),
|
||||
"error": _extract_error_detail(response),
|
||||
}
|
||||
|
||||
documents = [
|
||||
{
|
||||
"semantic_identifier": doc.get("semantic_identifier"),
|
||||
"content": doc.get(content_field),
|
||||
"source_type": doc.get("source_type"),
|
||||
"link": doc.get("link"),
|
||||
"score": doc.get("score"),
|
||||
}
|
||||
for doc in result.get(docs_key, [])
|
||||
]
|
||||
if is_ee:
|
||||
from ee.onyx.server.query_and_chat.models import SearchFullResponse
|
||||
|
||||
ee_payload = SearchFullResponse.model_validate_json(response.content)
|
||||
if ee_payload.error:
|
||||
return {
|
||||
"documents": [],
|
||||
"total_results": 0,
|
||||
"query": query,
|
||||
"error": ee_payload.error,
|
||||
}
|
||||
documents = [
|
||||
_project_doc(doc, doc.content) for doc in ee_payload.search_docs
|
||||
]
|
||||
else:
|
||||
ce_payload = ChatFullResponse.model_validate_json(response.content)
|
||||
if ce_payload.error_msg:
|
||||
return {
|
||||
"documents": [],
|
||||
"total_results": 0,
|
||||
"query": query,
|
||||
"error": ce_payload.error_msg,
|
||||
}
|
||||
documents = [
|
||||
_project_doc(doc, doc.blurb) for doc in ce_payload.top_documents
|
||||
]
|
||||
|
||||
# NOTE: search depth is controlled by the backend persona defaults, not `limit`.
|
||||
# `limit` only caps the returned list; fewer results may be returned if the
|
||||
@@ -252,23 +311,20 @@ async def search_web(
|
||||
access_token = require_access_token()
|
||||
|
||||
try:
|
||||
request_payload = {"queries": [query], "max_results": limit}
|
||||
response = await get_http_client().post(
|
||||
response = await _post_model(
|
||||
f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/search-lite",
|
||||
json=request_payload,
|
||||
headers={"Authorization": f"Bearer {access_token.token}"},
|
||||
WebSearchToolRequest(queries=[query], max_results=limit),
|
||||
access_token,
|
||||
)
|
||||
if not response.is_success:
|
||||
error_detail = _extract_error_detail(response)
|
||||
return {
|
||||
"error": error_detail,
|
||||
"error": _extract_error_detail(response),
|
||||
"results": [],
|
||||
"query": query,
|
||||
}
|
||||
response_payload = response.json()
|
||||
results = response_payload.get("results", [])
|
||||
payload = WebSearchToolResponse.model_validate_json(response.content)
|
||||
return {
|
||||
"results": results,
|
||||
"results": [result.model_dump(mode="json") for result in payload.results],
|
||||
"query": query,
|
||||
}
|
||||
except Exception as e:
|
||||
@@ -305,21 +361,19 @@ async def open_urls(
|
||||
access_token = require_access_token()
|
||||
|
||||
try:
|
||||
response = await get_http_client().post(
|
||||
response = await _post_model(
|
||||
f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/web-search/open-urls",
|
||||
json={"urls": urls},
|
||||
headers={"Authorization": f"Bearer {access_token.token}"},
|
||||
OpenUrlsToolRequest(urls=urls),
|
||||
access_token,
|
||||
)
|
||||
if not response.is_success:
|
||||
error_detail = _extract_error_detail(response)
|
||||
return {
|
||||
"error": error_detail,
|
||||
"error": _extract_error_detail(response),
|
||||
"results": [],
|
||||
}
|
||||
response_payload = response.json()
|
||||
results = response_payload.get("results", [])
|
||||
payload = OpenUrlsToolResponse.model_validate_json(response.content)
|
||||
return {
|
||||
"results": results,
|
||||
"results": [result.model_dump(mode="json") for result in payload.results],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Onyx MCP Server: URL fetch error: {e}", exc_info=True)
|
||||
|
||||
@@ -5,10 +5,24 @@ from __future__ import annotations
|
||||
import httpx
|
||||
from fastmcp.server.auth.auth import AccessToken
|
||||
from fastmcp.server.dependencies import get_access_token
|
||||
from pydantic import BaseModel
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import build_api_server_url_for_http_requests
|
||||
|
||||
|
||||
class DocumentSetEntry(BaseModel):
|
||||
"""Minimal document-set shape surfaced to MCP clients.
|
||||
|
||||
Projected from the backend's DocumentSetSummary to avoid coupling MCP to
|
||||
admin-only fields (cc-pair summaries, federated connectors, etc.).
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str | None = None
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Shared HTTP client reused across requests
|
||||
@@ -84,3 +98,32 @@ async def get_indexed_sources(
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to fetch indexed sources: {exc}") from exc
|
||||
|
||||
|
||||
_DOCUMENT_SET_ENTRIES_ADAPTER = TypeAdapter(list[DocumentSetEntry])
|
||||
|
||||
|
||||
async def get_accessible_document_sets(
|
||||
access_token: AccessToken,
|
||||
) -> list[DocumentSetEntry]:
|
||||
"""Fetch document sets accessible to the current user."""
|
||||
headers = {"Authorization": f"Bearer {access_token.token}"}
|
||||
try:
|
||||
response = await get_http_client().get(
|
||||
f"{build_api_server_url_for_http_requests(respect_env_override_if_set=True)}/manage/document-set",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return _DOCUMENT_SET_ENTRIES_ADAPTER.validate_json(response.content)
|
||||
except (httpx.HTTPStatusError, httpx.RequestError, ValueError):
|
||||
logger.error(
|
||||
"Onyx MCP Server: Failed to fetch document sets",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Onyx MCP Server: Unexpected error fetching document sets",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to fetch document sets: {exc}") from exc
|
||||
|
||||
@@ -5,6 +5,7 @@ import uvicorn
|
||||
from onyx.configs.app_configs import MCP_SERVER_ENABLED
|
||||
from onyx.configs.app_configs import MCP_SERVER_HOST
|
||||
from onyx.configs.app_configs import MCP_SERVER_PORT
|
||||
from onyx.tracing.setup import setup_tracing
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
|
||||
|
||||
@@ -18,6 +19,7 @@ def main() -> None:
|
||||
return
|
||||
|
||||
set_is_ee_based_on_env_variable()
|
||||
setup_tracing()
|
||||
logger.info(f"Starting MCP server on {MCP_SERVER_HOST}:{MCP_SERVER_PORT}")
|
||||
|
||||
from onyx.mcp_server.api import mcp_app
|
||||
|
||||
@@ -11,6 +11,8 @@ All public functions no-op in single-tenant mode (`MULTI_TENANT=False`).
|
||||
import time
|
||||
from typing import cast
|
||||
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Gauge
|
||||
from redis.client import Redis
|
||||
|
||||
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
|
||||
@@ -26,6 +28,40 @@ logger = setup_logger()
|
||||
_SET_KEY = "active_tenants"
|
||||
|
||||
|
||||
# --- Prometheus metrics ---
|
||||
|
||||
_active_set_size = Gauge(
|
||||
"onyx_tenant_work_gating_active_set_size",
|
||||
"Current cardinality of the active_tenants sorted set (updated once per "
|
||||
"generator invocation when the gate reads it).",
|
||||
)
|
||||
|
||||
_marked_total = Counter(
|
||||
"onyx_tenant_work_gating_marked_total",
|
||||
"Writes into active_tenants, labelled by caller.",
|
||||
["caller"],
|
||||
)
|
||||
|
||||
_skipped_total = Counter(
|
||||
"onyx_tenant_work_gating_skipped_total",
|
||||
"Per-tenant fanouts skipped by the gate (enforce mode only), by task.",
|
||||
["task"],
|
||||
)
|
||||
|
||||
_would_skip_total = Counter(
|
||||
"onyx_tenant_work_gating_would_skip_total",
|
||||
"Per-tenant fanouts that would have been skipped if enforce were on "
|
||||
"(shadow counter), by task.",
|
||||
["task"],
|
||||
)
|
||||
|
||||
_full_fanout_total = Counter(
|
||||
"onyx_tenant_work_gating_full_fanout_total",
|
||||
"Generator invocations that bypassed the gate for a full fanout cycle, by task.",
|
||||
["task"],
|
||||
)
|
||||
|
||||
|
||||
def _now_ms() -> int:
|
||||
return int(time.time() * 1000)
|
||||
|
||||
@@ -54,10 +90,14 @@ def mark_tenant_active(tenant_id: str) -> None:
|
||||
logger.exception(f"mark_tenant_active failed: tenant_id={tenant_id}")
|
||||
|
||||
|
||||
def maybe_mark_tenant_active(tenant_id: str) -> None:
|
||||
def maybe_mark_tenant_active(tenant_id: str, caller: str = "unknown") -> None:
|
||||
"""Convenience wrapper for writer call sites: records the tenant only
|
||||
when the feature flag is on. Fully defensive — never raises, so a Redis
|
||||
outage or flag-read failure can't abort the calling task."""
|
||||
outage or flag-read failure can't abort the calling task.
|
||||
|
||||
`caller` labels the Prometheus counter so a dashboard can show which
|
||||
consumer is firing the hook most.
|
||||
"""
|
||||
try:
|
||||
# Local import to avoid a module-load cycle: OnyxRuntime imports
|
||||
# onyx.redis.redis_pool, so a top-level import here would wedge on
|
||||
@@ -67,10 +107,44 @@ def maybe_mark_tenant_active(tenant_id: str) -> None:
|
||||
if not OnyxRuntime.get_tenant_work_gating_enabled():
|
||||
return
|
||||
mark_tenant_active(tenant_id)
|
||||
_marked_total.labels(caller=caller).inc()
|
||||
except Exception:
|
||||
logger.exception(f"maybe_mark_tenant_active failed: tenant_id={tenant_id}")
|
||||
|
||||
|
||||
def observe_active_set_size() -> int | None:
|
||||
"""Return `ZCARD active_tenants` and update the Prometheus gauge. Call
|
||||
from the gate generator once per invocation so the dashboard has a
|
||||
live reading.
|
||||
|
||||
Returns `None` on Redis error or in single-tenant mode; callers can
|
||||
tolerate that (gauge simply doesn't update)."""
|
||||
if not MULTI_TENANT:
|
||||
return None
|
||||
try:
|
||||
size = cast(int, _client().zcard(_SET_KEY))
|
||||
_active_set_size.set(size)
|
||||
return size
|
||||
except Exception:
|
||||
logger.exception("observe_active_set_size failed")
|
||||
return None
|
||||
|
||||
|
||||
def record_gate_decision(task_name: str, skipped: bool) -> None:
|
||||
"""Increment skip counters from the gate generator. Called once per
|
||||
tenant that the gate would skip. Always increments the shadow counter;
|
||||
increments the enforced counter only when `skipped=True`."""
|
||||
_would_skip_total.labels(task=task_name).inc()
|
||||
if skipped:
|
||||
_skipped_total.labels(task=task_name).inc()
|
||||
|
||||
|
||||
def record_full_fanout_cycle(task_name: str) -> None:
|
||||
"""Increment the full-fanout counter. Called once per generator
|
||||
invocation where the gate is bypassed (interval elapsed OR fail-open)."""
|
||||
_full_fanout_total.labels(task=task_name).inc()
|
||||
|
||||
|
||||
def get_active_tenants(ttl_seconds: int) -> set[str] | None:
|
||||
"""Return tenants whose last-seen timestamp is within `ttl_seconds` of
|
||||
now.
|
||||
|
||||
@@ -40,6 +40,8 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.permissions import require_permission
|
||||
from onyx.background.celery.versioned_apps.client import app as celery_app
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
@@ -51,6 +53,9 @@ from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.enums import Permission
|
||||
from onyx.db.models import User
|
||||
from onyx.document_index.interfaces import DocumentMetadata
|
||||
from onyx.error_handling.error_codes import OnyxErrorCode
|
||||
from onyx.error_handling.exceptions import OnyxError
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILE_SIZE_BYTES
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_FILES_PER_UPLOAD
|
||||
from onyx.server.features.build.configs import USER_LIBRARY_MAX_TOTAL_SIZE_BYTES
|
||||
@@ -128,6 +133,49 @@ class DeleteFileResponse(BaseModel):
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _looks_like_pdf(filename: str, content_type: str | None) -> bool:
|
||||
"""True if either the filename or the content-type indicates a PDF.
|
||||
|
||||
Client-supplied ``content_type`` can be spoofed (e.g. a PDF uploaded with
|
||||
``Content-Type: application/octet-stream``), so we also fall back to
|
||||
extension-based detection via ``mimetypes.guess_type`` on the filename.
|
||||
"""
|
||||
if content_type == "application/pdf":
|
||||
return True
|
||||
guessed, _ = mimetypes.guess_type(filename)
|
||||
return guessed == "application/pdf"
|
||||
|
||||
|
||||
def _check_pdf_image_caps(
|
||||
filename: str, content: bytes, content_type: str | None, batch_total: int
|
||||
) -> int:
|
||||
"""Enforce per-file and per-batch embedded-image caps for PDFs.
|
||||
|
||||
Returns the number of embedded images in this file (0 for non-PDFs) so
|
||||
callers can update their running batch total. Raises OnyxError(INVALID_INPUT)
|
||||
if either cap is exceeded.
|
||||
"""
|
||||
if not _looks_like_pdf(filename, content_type):
|
||||
return 0
|
||||
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
# Short-circuit at the larger cap so we get a useful count for both checks.
|
||||
count = count_pdf_embedded_images(BytesIO(content), max(file_cap, batch_cap))
|
||||
if count > file_cap:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"PDF '{filename}' contains too many embedded images "
|
||||
f"(more than {file_cap}). Try splitting the document into smaller files.",
|
||||
)
|
||||
if batch_total + count > batch_cap:
|
||||
raise OnyxError(
|
||||
OnyxErrorCode.INVALID_INPUT,
|
||||
f"Upload would exceed the {batch_cap}-image limit across all "
|
||||
f"files in this batch. Try uploading fewer image-heavy files at once.",
|
||||
)
|
||||
return count
|
||||
|
||||
|
||||
def _sanitize_path(path: str) -> str:
|
||||
"""Sanitize a file path, removing traversal attempts and normalizing.
|
||||
|
||||
@@ -356,6 +404,7 @@ async def upload_files(
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
batch_image_total = 0
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Sanitize the base path
|
||||
@@ -375,6 +424,14 @@ async def upload_files(
|
||||
detail=f"File '{file.filename}' exceeds maximum size of {USER_LIBRARY_MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB",
|
||||
)
|
||||
|
||||
# Reject PDFs with an unreasonable per-file or per-batch image count
|
||||
batch_image_total += _check_pdf_image_caps(
|
||||
filename=file.filename or "unnamed",
|
||||
content=content,
|
||||
content_type=file.content_type,
|
||||
batch_total=batch_image_total,
|
||||
)
|
||||
|
||||
# Validate cumulative storage (existing + this upload batch)
|
||||
total_size += file_size
|
||||
if existing_usage + total_size > USER_LIBRARY_MAX_TOTAL_SIZE_BYTES:
|
||||
@@ -473,6 +530,7 @@ async def upload_zip(
|
||||
|
||||
uploaded_entries: list[LibraryEntryResponse] = []
|
||||
total_size = 0
|
||||
batch_image_total = 0
|
||||
|
||||
# Extract zip contents into a subfolder named after the zip file
|
||||
zip_name = api_sanitize_filename(file.filename or "upload")
|
||||
@@ -511,6 +569,36 @@ async def upload_zip(
|
||||
logger.warning(f"Skipping '{zip_info.filename}' - exceeds max size")
|
||||
continue
|
||||
|
||||
# Skip PDFs that would trip the per-file or per-batch image
|
||||
# cap (would OOM the user-file-processing worker). Matches
|
||||
# /upload behavior but uses skip-and-warn to stay consistent
|
||||
# with the zip path's handling of oversized files.
|
||||
zip_file_name = zip_info.filename.split("/")[-1]
|
||||
zip_content_type, _ = mimetypes.guess_type(zip_file_name)
|
||||
if zip_content_type == "application/pdf":
|
||||
image_count = count_pdf_embedded_images(
|
||||
BytesIO(file_content),
|
||||
max(
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE,
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
|
||||
),
|
||||
)
|
||||
if image_count > MAX_EMBEDDED_IMAGES_PER_FILE:
|
||||
logger.warning(
|
||||
"Skipping '%s' - exceeds %d per-file embedded-image cap",
|
||||
zip_info.filename,
|
||||
MAX_EMBEDDED_IMAGES_PER_FILE,
|
||||
)
|
||||
continue
|
||||
if batch_image_total + image_count > MAX_EMBEDDED_IMAGES_PER_UPLOAD:
|
||||
logger.warning(
|
||||
"Skipping '%s' - would exceed %d per-batch embedded-image cap",
|
||||
zip_info.filename,
|
||||
MAX_EMBEDDED_IMAGES_PER_UPLOAD,
|
||||
)
|
||||
continue
|
||||
batch_image_total += image_count
|
||||
|
||||
total_size += file_size
|
||||
|
||||
# Validate cumulative storage
|
||||
|
||||
@@ -9,7 +9,10 @@ from pydantic import ConfigDict
|
||||
from pydantic import Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
from onyx.configs.app_configs import MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
from onyx.db.llm import fetch_default_llm_model
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.file_processing.extract_file_text import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
@@ -190,6 +193,11 @@ def categorize_uploaded_files(
|
||||
token_threshold_k * 1000 if token_threshold_k else None
|
||||
) # 0 → None = no limit
|
||||
|
||||
# Running total of embedded images across PDFs in this batch. Once the
|
||||
# aggregate cap is reached, subsequent PDFs in the same upload are
|
||||
# rejected even if they'd individually fit under MAX_EMBEDDED_IMAGES_PER_FILE.
|
||||
batch_image_total = 0
|
||||
|
||||
for upload in files:
|
||||
try:
|
||||
filename = get_safe_filename(upload)
|
||||
@@ -252,6 +260,47 @@ def categorize_uploaded_files(
|
||||
)
|
||||
continue
|
||||
|
||||
# Reject PDFs with an unreasonable number of embedded images
|
||||
# (either per-file or accumulated across this upload batch).
|
||||
# A PDF with thousands of embedded images can OOM the
|
||||
# user-file-processing celery worker because every image is
|
||||
# decoded with PIL and then sent to the vision LLM.
|
||||
if extension == ".pdf":
|
||||
file_cap = MAX_EMBEDDED_IMAGES_PER_FILE
|
||||
batch_cap = MAX_EMBEDDED_IMAGES_PER_UPLOAD
|
||||
# Use the larger of the two caps as the short-circuit
|
||||
# threshold so we get a useful count for both checks.
|
||||
# count_pdf_embedded_images restores the stream position.
|
||||
count = count_pdf_embedded_images(
|
||||
upload.file, max(file_cap, batch_cap)
|
||||
)
|
||||
if count > file_cap:
|
||||
results.rejected.append(
|
||||
RejectedFile(
|
||||
filename=filename,
|
||||
reason=(
|
||||
f"PDF contains too many embedded images "
|
||||
f"(more than {file_cap}). Try splitting "
|
||||
f"the document into smaller files."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
if batch_image_total + count > batch_cap:
|
||||
results.rejected.append(
|
||||
RejectedFile(
|
||||
filename=filename,
|
||||
reason=(
|
||||
f"Upload would exceed the "
|
||||
f"{batch_cap}-image limit across all "
|
||||
f"files in this batch. Try uploading "
|
||||
f"fewer image-heavy files at once."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
batch_image_total += count
|
||||
|
||||
text_content = extract_file_text(
|
||||
file=upload.file,
|
||||
file_name=filename,
|
||||
|
||||
@@ -3,6 +3,7 @@ from fastapi import Depends
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.auth.permissions import require_permission
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.enums import Permission
|
||||
from onyx.db.models import User
|
||||
@@ -49,6 +50,7 @@ def get_opensearch_retrieval_status(
|
||||
enable_opensearch_retrieval = get_opensearch_retrieval_state(db_session)
|
||||
return OpenSearchRetrievalStatusResponse(
|
||||
enable_opensearch_retrieval=enable_opensearch_retrieval,
|
||||
toggling_retrieval_is_disabled=ONYX_DISABLE_VESPA,
|
||||
)
|
||||
|
||||
|
||||
@@ -63,4 +65,5 @@ def set_opensearch_retrieval_status(
|
||||
)
|
||||
return OpenSearchRetrievalStatusResponse(
|
||||
enable_opensearch_retrieval=request.enable_opensearch_retrieval,
|
||||
toggling_retrieval_is_disabled=ONYX_DISABLE_VESPA,
|
||||
)
|
||||
|
||||
@@ -19,3 +19,4 @@ class OpenSearchRetrievalStatusRequest(BaseModel):
|
||||
class OpenSearchRetrievalStatusResponse(BaseModel):
|
||||
model_config = {"frozen": True}
|
||||
enable_opensearch_retrieval: bool
|
||||
toggling_retrieval_is_disabled: bool = False
|
||||
|
||||
@@ -395,6 +395,15 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
|
||||
Reads worker status from ``WorkerHeartbeatMonitor`` which listens
|
||||
to the Celery event stream via a single persistent connection.
|
||||
|
||||
TODO: every monitoring pod subscribes to the cluster-wide Celery event
|
||||
stream, so each replica reports health for *all* workers in the cluster,
|
||||
not just itself. Prometheus distinguishes the replicas via the ``instance``
|
||||
label, so this doesn't break scraping, but it means N monitoring replicas
|
||||
do N× the work and may emit slightly inconsistent snapshots of the same
|
||||
cluster. The proper fix is to have each worker expose its own health (or
|
||||
to elect a single monitoring replica as the reporter) rather than
|
||||
broadcasting the full cluster view from every monitoring pod.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_ttl: float = 30.0) -> None:
|
||||
@@ -413,10 +422,16 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
"onyx_celery_active_worker_count",
|
||||
"Number of active Celery workers with recent heartbeats",
|
||||
)
|
||||
# Celery hostnames are ``{worker_type}@{nodename}`` (see supervisord.conf).
|
||||
# Emitting only the worker_type as a label causes N replicas of the same
|
||||
# type to collapse into identical timeseries within a single scrape,
|
||||
# which Prometheus rejects as "duplicate sample for timestamp". Split
|
||||
# the pieces into separate labels so each replica is distinct; callers
|
||||
# can still ``sum by (worker_type)`` to recover the old aggregated view.
|
||||
worker_up = GaugeMetricFamily(
|
||||
"onyx_celery_worker_up",
|
||||
"Whether a specific Celery worker is alive (1=up, 0=down)",
|
||||
labels=["worker"],
|
||||
labels=["worker_type", "hostname"],
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -424,11 +439,15 @@ class WorkerHealthCollector(_CachedCollector):
|
||||
alive_count = sum(1 for alive in status.values() if alive)
|
||||
active_workers.add_metric([], alive_count)
|
||||
|
||||
for hostname in sorted(status):
|
||||
# Use short name (before @) for single-host deployments,
|
||||
# full hostname when multiple hosts share a worker type.
|
||||
label = hostname.split("@")[0]
|
||||
worker_up.add_metric([label], 1 if status[hostname] else 0)
|
||||
for full_hostname in sorted(status):
|
||||
worker_type, sep, host = full_hostname.partition("@")
|
||||
if not sep:
|
||||
# Hostname didn't contain "@" — fall back to using the
|
||||
# whole string as the hostname with an empty type.
|
||||
worker_type, host = "", full_hostname
|
||||
worker_up.add_metric(
|
||||
[worker_type, host], 1 if status[full_hostname] else 0
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to collect worker health metrics", exc_info=True)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from onyx.configs.app_configs import DISABLE_VECTOR_DB
|
||||
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
|
||||
from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
|
||||
from onyx.configs.app_configs import MANAGED_VESPA
|
||||
from onyx.configs.app_configs import ONYX_DISABLE_VESPA
|
||||
from onyx.configs.app_configs import VESPA_NUM_ATTEMPTS_ON_STARTUP
|
||||
from onyx.configs.constants import KV_REINDEX_KEY
|
||||
from onyx.configs.embedding_configs import SUPPORTED_EMBEDDING_MODELS
|
||||
@@ -126,10 +127,11 @@ def setup_onyx(
|
||||
"DISABLE_VECTOR_DB is set — skipping document index setup and embedding model warm-up."
|
||||
)
|
||||
else:
|
||||
# Ensure Vespa is setup correctly, this step is relatively near the end
|
||||
# because Vespa takes a bit of time to start up
|
||||
# Ensure the document indices are setup correctly. This step is
|
||||
# relatively near the end because Vespa takes a bit of time to start up.
|
||||
logger.notice("Verifying Document Index(s) is/are available.")
|
||||
# This flow is for setting up the document index so we get all indices here.
|
||||
# This flow is for setting up the document index so we get all indices
|
||||
# here.
|
||||
document_indices = get_all_document_indices(
|
||||
search_settings,
|
||||
secondary_search_settings,
|
||||
@@ -335,7 +337,7 @@ def setup_multitenant_onyx() -> None:
|
||||
|
||||
# For Managed Vespa, the schema is sent over via the Vespa Console manually.
|
||||
# NOTE: Pretty sure this code is never hit in any production environment.
|
||||
if not MANAGED_VESPA:
|
||||
if not MANAGED_VESPA and not ONYX_DISABLE_VESPA:
|
||||
setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS)
|
||||
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ R = TypeVar("R")
|
||||
KT = TypeVar("KT") # Key type
|
||||
VT = TypeVar("VT") # Value type
|
||||
_T = TypeVar("_T") # Default type
|
||||
_MISSING: object = object()
|
||||
|
||||
|
||||
class ThreadSafeDict(MutableMapping[KT, VT]):
|
||||
@@ -117,10 +118,10 @@ class ThreadSafeDict(MutableMapping[KT, VT]):
|
||||
with self.lock:
|
||||
return self._dict.get(key, default)
|
||||
|
||||
def pop(self, key: KT, default: Any = None) -> Any:
|
||||
def pop(self, key: KT, default: Any = _MISSING) -> Any:
|
||||
"""Remove and return a value with optional default, atomically."""
|
||||
with self.lock:
|
||||
if default is None:
|
||||
if default is _MISSING:
|
||||
return self._dict.pop(key)
|
||||
return self._dict.pop(key, default)
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ attrs==25.4.0
|
||||
# jsonschema
|
||||
# referencing
|
||||
# zeep
|
||||
authlib==1.6.9
|
||||
authlib==1.6.11
|
||||
# via fastmcp
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
babel==2.17.0
|
||||
@@ -214,7 +214,9 @@ distro==1.9.0
|
||||
dnspython==2.8.0
|
||||
# via email-validator
|
||||
docstring-parser==0.17.0
|
||||
# via cyclopts
|
||||
# via
|
||||
# cyclopts
|
||||
# google-cloud-aiplatform
|
||||
docutils==0.22.3
|
||||
# via rich-rst
|
||||
dropbox==12.0.2
|
||||
@@ -270,7 +272,13 @@ gitdb==4.0.12
|
||||
gitpython==3.1.45
|
||||
# via braintrust
|
||||
google-api-core==2.28.1
|
||||
# via google-api-python-client
|
||||
# via
|
||||
# google-api-python-client
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-api-python-client==2.86.0
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
@@ -278,21 +286,61 @@ google-auth==2.48.0
|
||||
# google-api-python-client
|
||||
# google-auth-httplib2
|
||||
# google-auth-oauthlib
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-auth-httplib2==0.1.0
|
||||
# via google-api-python-client
|
||||
google-auth-oauthlib==1.0.0
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# opentelemetry-exporter-otlp-proto-http
|
||||
greenlet==3.2.4
|
||||
# via
|
||||
# playwright
|
||||
# sqlalchemy
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -443,7 +491,7 @@ magika==0.6.3
|
||||
# via markitdown
|
||||
makefun==1.16.0
|
||||
# via fastapi-users
|
||||
mako==1.2.4
|
||||
mako==1.3.11
|
||||
# via alembic
|
||||
mammoth==1.11.0
|
||||
# via markitdown
|
||||
@@ -559,6 +607,8 @@ packaging==24.2
|
||||
# dask
|
||||
# distributed
|
||||
# fastmcp
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
# jira
|
||||
# kombu
|
||||
@@ -605,12 +655,19 @@ propcache==0.4.1
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via google-api-core
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# ddtrace
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# onnxruntime
|
||||
# opentelemetry-proto
|
||||
# proto-plus
|
||||
@@ -643,6 +700,7 @@ pydantic==2.11.7
|
||||
# exa-py
|
||||
# fastapi
|
||||
# fastmcp
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# langchain-core
|
||||
# langfuse
|
||||
@@ -679,7 +737,7 @@ pynacl==1.6.2
|
||||
pypandoc-binary==1.16.2
|
||||
pyparsing==3.2.5
|
||||
# via httplib2
|
||||
pypdf==6.10.0
|
||||
pypdf==6.10.2
|
||||
# via unstructured-client
|
||||
pyperclip==1.11.0
|
||||
# via fastmcp
|
||||
@@ -701,6 +759,7 @@ python-dateutil==2.8.2
|
||||
# botocore
|
||||
# celery
|
||||
# dateparser
|
||||
# google-cloud-bigquery
|
||||
# htmldate
|
||||
# hubspot-api-client
|
||||
# kubernetes
|
||||
@@ -779,6 +838,8 @@ requests==2.33.0
|
||||
# dropbox
|
||||
# exa-py
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# hubspot-api-client
|
||||
# jira
|
||||
@@ -951,7 +1012,9 @@ typing-extensions==4.15.0
|
||||
# exa-py
|
||||
# exceptiongroup
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# jira
|
||||
# langchain-core
|
||||
|
||||
@@ -114,6 +114,8 @@ distlib==0.4.0
|
||||
# via virtualenv
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
execnet==2.1.2
|
||||
@@ -141,14 +143,65 @@ frozenlist==1.8.0
|
||||
# aiosignal
|
||||
fsspec==2025.10.0
|
||||
# via huggingface-hub
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
|
||||
# via sqlalchemy
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -218,7 +271,7 @@ kubernetes==31.0.0
|
||||
# via onyx
|
||||
litellm==1.81.6
|
||||
# via onyx
|
||||
mako==1.2.4
|
||||
mako==1.3.11
|
||||
# via alembic
|
||||
manygo==0.2.0
|
||||
markdown-it-py==4.0.0
|
||||
@@ -267,6 +320,8 @@ openapi-generator-cli==7.17.0
|
||||
packaging==24.2
|
||||
# via
|
||||
# black
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# hatchling
|
||||
# huggingface-hub
|
||||
# ipykernel
|
||||
@@ -307,6 +362,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
psutil==7.1.3
|
||||
# via ipykernel
|
||||
ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
|
||||
@@ -328,6 +397,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -364,6 +434,7 @@ python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# google-cloud-bigquery
|
||||
# jupyter-client
|
||||
# kubernetes
|
||||
# matplotlib
|
||||
@@ -398,6 +469,9 @@ reorder-python-imports-black==3.14.0
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
@@ -498,7 +572,9 @@ typing-extensions==4.15.0
|
||||
# celery-types
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# ipython
|
||||
# mcp
|
||||
|
||||
@@ -87,6 +87,8 @@ discord-py==2.4.0
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
fastapi==0.133.1
|
||||
@@ -103,12 +105,63 @@ frozenlist==1.8.0
|
||||
# aiosignal
|
||||
fsspec==2025.10.0
|
||||
# via huggingface-hub
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -184,7 +237,10 @@ openai==2.14.0
|
||||
# litellm
|
||||
# onyx
|
||||
packaging==24.2
|
||||
# via huggingface-hub
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
parameterized==0.9.0
|
||||
# via cohere
|
||||
posthog==3.7.4
|
||||
@@ -198,6 +254,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
py==1.11.0
|
||||
# via retry
|
||||
pyasn1==0.6.3
|
||||
@@ -213,6 +283,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -231,6 +302,7 @@ python-dateutil==2.8.2
|
||||
# via
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# google-cloud-bigquery
|
||||
# kubernetes
|
||||
# posthog
|
||||
python-dotenv==1.1.1
|
||||
@@ -254,6 +326,9 @@ regex==2025.11.3
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
# posthog
|
||||
@@ -318,7 +393,9 @@ typing-extensions==4.15.0
|
||||
# anyio
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# mcp
|
||||
# openai
|
||||
|
||||
@@ -102,6 +102,8 @@ discord-py==2.4.0
|
||||
# via onyx
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
docstring-parser==0.17.0
|
||||
# via google-cloud-aiplatform
|
||||
durationpy==0.10
|
||||
# via kubernetes
|
||||
einops==0.8.1
|
||||
@@ -125,12 +127,63 @@ fsspec==2025.10.0
|
||||
# via
|
||||
# huggingface-hub
|
||||
# torch
|
||||
google-api-core==2.28.1
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
google-auth==2.48.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-core
|
||||
# google-cloud-resource-manager
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
google-cloud-aiplatform==1.133.0
|
||||
# via litellm
|
||||
google-cloud-bigquery==3.41.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.5.1
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
google-cloud-resource-manager==1.17.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==3.10.1
|
||||
# via google-cloud-aiplatform
|
||||
google-crc32c==1.8.0
|
||||
# via
|
||||
# google-cloud-storage
|
||||
# google-resumable-media
|
||||
google-genai==1.52.0
|
||||
# via onyx
|
||||
# via
|
||||
# google-cloud-aiplatform
|
||||
# onyx
|
||||
google-resumable-media==2.8.2
|
||||
# via
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
googleapis-common-protos==1.72.0
|
||||
# via
|
||||
# google-api-core
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpc-google-iam-v1==0.14.4
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.80.0
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
grpcio-status==1.80.0
|
||||
# via google-api-core
|
||||
h11==0.16.0
|
||||
# via
|
||||
# httpcore
|
||||
@@ -265,6 +318,8 @@ openai==2.14.0
|
||||
packaging==24.2
|
||||
# via
|
||||
# accelerate
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-bigquery
|
||||
# huggingface-hub
|
||||
# kombu
|
||||
# transformers
|
||||
@@ -282,6 +337,20 @@ propcache==0.4.1
|
||||
# via
|
||||
# aiohttp
|
||||
# yarl
|
||||
proto-plus==1.26.1
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
protobuf==6.33.5
|
||||
# via
|
||||
# google-api-core
|
||||
# google-cloud-aiplatform
|
||||
# google-cloud-resource-manager
|
||||
# googleapis-common-protos
|
||||
# grpc-google-iam-v1
|
||||
# grpcio-status
|
||||
# proto-plus
|
||||
psutil==7.1.3
|
||||
# via accelerate
|
||||
py==1.11.0
|
||||
@@ -299,6 +368,7 @@ pydantic==2.11.7
|
||||
# agent-client-protocol
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# litellm
|
||||
# mcp
|
||||
@@ -318,6 +388,7 @@ python-dateutil==2.8.2
|
||||
# aiobotocore
|
||||
# botocore
|
||||
# celery
|
||||
# google-cloud-bigquery
|
||||
# kubernetes
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
@@ -344,6 +415,9 @@ regex==2025.11.3
|
||||
requests==2.33.0
|
||||
# via
|
||||
# cohere
|
||||
# google-api-core
|
||||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# google-genai
|
||||
# kubernetes
|
||||
# requests-oauthlib
|
||||
@@ -437,7 +511,9 @@ typing-extensions==4.15.0
|
||||
# anyio
|
||||
# cohere
|
||||
# fastapi
|
||||
# google-cloud-aiplatform
|
||||
# google-genai
|
||||
# grpcio
|
||||
# huggingface-hub
|
||||
# mcp
|
||||
# openai
|
||||
|
||||
@@ -46,7 +46,7 @@ stop_and_remove_containers
|
||||
# Start the PostgreSQL container with optional volume
|
||||
echo "Starting PostgreSQL container..."
|
||||
if [[ -n "$POSTGRES_VOLUME" ]]; then
|
||||
docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d -v $POSTGRES_VOLUME:/var/lib/postgresql/data postgres -c max_connections=250
|
||||
docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d -v "$POSTGRES_VOLUME":/var/lib/postgresql/data postgres -c max_connections=250
|
||||
else
|
||||
docker run -p 5432:5432 --name onyx_postgres -e POSTGRES_PASSWORD=password -d postgres -c max_connections=250
|
||||
fi
|
||||
@@ -54,7 +54,7 @@ fi
|
||||
# Start the Vespa container with optional volume
|
||||
echo "Starting Vespa container..."
|
||||
if [[ -n "$VESPA_VOLUME" ]]; then
|
||||
docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 -v $VESPA_VOLUME:/opt/vespa/var vespaengine/vespa:8
|
||||
docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 -v "$VESPA_VOLUME":/opt/vespa/var vespaengine/vespa:8
|
||||
else
|
||||
docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
|
||||
fi
|
||||
@@ -85,7 +85,7 @@ docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-en
|
||||
# Start the Redis container with optional volume
|
||||
echo "Starting Redis container..."
|
||||
if [[ -n "$REDIS_VOLUME" ]]; then
|
||||
docker run --detach --name onyx_redis --publish 6379:6379 -v $REDIS_VOLUME:/data redis
|
||||
docker run --detach --name onyx_redis --publish 6379:6379 -v "$REDIS_VOLUME":/data redis
|
||||
else
|
||||
docker run --detach --name onyx_redis --publish 6379:6379 redis
|
||||
fi
|
||||
@@ -93,7 +93,7 @@ fi
|
||||
# Start the MinIO container with optional volume
|
||||
echo "Starting MinIO container..."
|
||||
if [[ -n "$MINIO_VOLUME" ]]; then
|
||||
docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin -v $MINIO_VOLUME:/data minio/minio server /data --console-address ":9001"
|
||||
docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin -v "$MINIO_VOLUME":/data minio/minio server /data --console-address ":9001"
|
||||
else
|
||||
docker run --detach --name onyx_minio --publish 9004:9000 --publish 9005:9001 -e MINIO_ROOT_USER=minioadmin -e MINIO_ROOT_PASSWORD=minioadmin minio/minio server /data --console-address ":9001"
|
||||
fi
|
||||
@@ -111,6 +111,7 @@ sleep 1
|
||||
|
||||
# Alembic should be configured in the virtualenv for this repo
|
||||
if [[ -f "../.venv/bin/activate" ]]; then
|
||||
# shellcheck source=/dev/null
|
||||
source ../.venv/bin/activate
|
||||
else
|
||||
echo "Warning: Python virtual environment not found at .venv/bin/activate; alembic may not work."
|
||||
|
||||
@@ -9,8 +9,10 @@ import pytest
|
||||
|
||||
from onyx.configs.constants import BlobType
|
||||
from onyx.connectors.blob.connector import BlobStorageConnector
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import is_tabular_file
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
@@ -111,15 +113,18 @@ def test_blob_s3_connector(
|
||||
|
||||
for doc in all_docs:
|
||||
section = doc.sections[0]
|
||||
assert isinstance(section, TextSection)
|
||||
|
||||
file_extension = get_file_ext(doc.semantic_identifier)
|
||||
if file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:
|
||||
if is_tabular_file(doc.semantic_identifier):
|
||||
assert isinstance(section, TabularSection)
|
||||
assert len(section.text) > 0
|
||||
continue
|
||||
|
||||
# unknown extension
|
||||
assert len(section.text) == 0
|
||||
assert isinstance(section, TextSection)
|
||||
file_extension = get_file_ext(doc.semantic_identifier)
|
||||
if file_extension in OnyxFileExtensions.TEXT_AND_DOCUMENT_EXTENSIONS:
|
||||
assert len(section.text) > 0
|
||||
else:
|
||||
assert len(section.text) == 0
|
||||
|
||||
|
||||
@patch(
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
"""External dependency unit tests for `index_doc_batch_prepare`.
|
||||
|
||||
Validates the file_id lifecycle that runs alongside the document upsert:
|
||||
|
||||
* `document.file_id` is written on insert AND on conflict (upsert path)
|
||||
* Newly-staged files get promoted from INDEXING_STAGING -> CONNECTOR
|
||||
* Replaced files are deleted from both `file_record` and S3
|
||||
* No-op when the file_id is unchanged
|
||||
|
||||
Uses real PostgreSQL + real S3/MinIO via the file store.
|
||||
"""
|
||||
|
||||
from collections.abc import Generator
|
||||
from io import BytesIO
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import IndexAttemptMetadata
|
||||
from onyx.connectors.models import InputType
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.db.enums import AccessType
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
from onyx.db.file_record import get_filerecord_by_file_id_optional
|
||||
from onyx.db.models import Connector
|
||||
from onyx.db.models import ConnectorCredentialPair
|
||||
from onyx.db.models import Credential
|
||||
from onyx.db.models import Document as DBDocument
|
||||
from onyx.db.models import FileRecord
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_doc(doc_id: str, file_id: str | None = None) -> Document:
|
||||
"""Minimal Document for indexing-pipeline tests. MOCK_CONNECTOR avoids
|
||||
triggering the hierarchy-node linking branch (NOTION/CONFLUENCE only)."""
|
||||
return Document(
|
||||
id=doc_id,
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
semantic_identifier=f"semantic-{doc_id}",
|
||||
sections=[TextSection(text="content", link=None)],
|
||||
metadata={},
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
|
||||
def _stage_file(content: bytes = b"raw bytes") -> str:
|
||||
"""Write bytes to the file store as INDEXING_STAGING and return the file_id.
|
||||
|
||||
Mirrors what the connector raw_file_callback would do during fetch.
|
||||
"""
|
||||
return get_default_file_store().save_file(
|
||||
content=BytesIO(content),
|
||||
display_name=None,
|
||||
file_origin=FileOrigin.INDEXING_STAGING,
|
||||
file_type="application/octet-stream",
|
||||
file_metadata={"test": True},
|
||||
)
|
||||
|
||||
|
||||
def _get_doc_row(db_session: Session, doc_id: str) -> DBDocument | None:
|
||||
"""Reload the document row fresh from DB so we see post-upsert state."""
|
||||
db_session.expire_all()
|
||||
return db_session.query(DBDocument).filter(DBDocument.id == doc_id).one_or_none()
|
||||
|
||||
|
||||
def _get_filerecord(db_session: Session, file_id: str) -> FileRecord | None:
|
||||
db_session.expire_all()
|
||||
return get_filerecord_by_file_id_optional(file_id=file_id, db_session=db_session)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cc_pair(
|
||||
db_session: Session,
|
||||
tenant_context: None, # noqa: ARG001
|
||||
initialize_file_store: None, # noqa: ARG001
|
||||
) -> Generator[ConnectorCredentialPair, None, None]:
|
||||
"""Create a connector + credential + cc_pair backing the index attempt."""
|
||||
connector = Connector(
|
||||
name=f"test-connector-{uuid4().hex[:8]}",
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
input_type=InputType.LOAD_STATE,
|
||||
connector_specific_config={},
|
||||
refresh_freq=None,
|
||||
prune_freq=None,
|
||||
indexing_start=None,
|
||||
)
|
||||
db_session.add(connector)
|
||||
db_session.flush()
|
||||
|
||||
credential = Credential(
|
||||
source=DocumentSource.MOCK_CONNECTOR,
|
||||
credential_json={},
|
||||
)
|
||||
db_session.add(credential)
|
||||
db_session.flush()
|
||||
|
||||
pair = ConnectorCredentialPair(
|
||||
connector_id=connector.id,
|
||||
credential_id=credential.id,
|
||||
name=f"test-cc-pair-{uuid4().hex[:8]}",
|
||||
status=ConnectorCredentialPairStatus.ACTIVE,
|
||||
access_type=AccessType.PUBLIC,
|
||||
auto_sync_options=None,
|
||||
)
|
||||
db_session.add(pair)
|
||||
db_session.commit()
|
||||
db_session.refresh(pair)
|
||||
|
||||
yield pair
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def attempt_metadata(cc_pair: ConnectorCredentialPair) -> IndexAttemptMetadata:
|
||||
return IndexAttemptMetadata(
|
||||
connector_id=cc_pair.connector_id,
|
||||
credential_id=cc_pair.credential_id,
|
||||
attempt_id=None,
|
||||
request_id="test-request",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNewDocuments:
|
||||
"""First-time inserts — no previous file_id to reconcile against."""
|
||||
|
||||
def test_new_doc_without_file_id(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=None)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
row = _get_doc_row(db_session, doc.id)
|
||||
assert row is not None
|
||||
assert row.file_id is None
|
||||
|
||||
def test_new_doc_with_staged_file_id_promotes_to_connector(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
file_id = _stage_file()
|
||||
doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=file_id)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
row = _get_doc_row(db_session, doc.id)
|
||||
assert row is not None and row.file_id == file_id
|
||||
|
||||
record = _get_filerecord(db_session, file_id)
|
||||
assert record is not None
|
||||
assert record.file_origin == FileOrigin.CONNECTOR
|
||||
|
||||
|
||||
class TestExistingDocuments:
|
||||
"""Re-index path — a `document` row already exists with some file_id."""
|
||||
|
||||
def test_unchanged_file_id_is_noop(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
file_id = _stage_file()
|
||||
doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=file_id)
|
||||
|
||||
# First pass: inserts the row + promotes the file.
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
# Second pass with the same file_id — should not delete or re-promote.
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
record = _get_filerecord(db_session, file_id)
|
||||
assert record is not None
|
||||
assert record.file_origin == FileOrigin.CONNECTOR
|
||||
|
||||
row = _get_doc_row(db_session, doc.id)
|
||||
assert row is not None and row.file_id == file_id
|
||||
|
||||
def test_swapping_file_id_promotes_new_and_deletes_old(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
old_file_id = _stage_file(content=b"old bytes")
|
||||
doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=old_file_id)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
# Re-fetch produces a new staged file_id for the same doc.
|
||||
new_file_id = _stage_file(content=b"new bytes")
|
||||
doc_v2 = _make_doc(doc.id, file_id=new_file_id)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc_v2],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
row = _get_doc_row(db_session, doc.id)
|
||||
assert row is not None and row.file_id == new_file_id
|
||||
|
||||
new_record = _get_filerecord(db_session, new_file_id)
|
||||
assert new_record is not None
|
||||
assert new_record.file_origin == FileOrigin.CONNECTOR
|
||||
|
||||
# Old file_record + S3 object are gone.
|
||||
assert _get_filerecord(db_session, old_file_id) is None
|
||||
|
||||
def test_clearing_file_id_deletes_old_and_nulls_column(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
old_file_id = _stage_file()
|
||||
doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=old_file_id)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
# Connector opts out on next run — yields the doc without a file_id.
|
||||
doc_v2 = _make_doc(doc.id, file_id=None)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[doc_v2],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
row = _get_doc_row(db_session, doc.id)
|
||||
assert row is not None and row.file_id is None
|
||||
assert _get_filerecord(db_session, old_file_id) is None
|
||||
|
||||
|
||||
class TestBatchHandling:
|
||||
"""Mixed batches — multiple docs at different lifecycle states in one call."""
|
||||
|
||||
def test_mixed_batch_each_doc_handled_independently(
|
||||
self,
|
||||
db_session: Session,
|
||||
attempt_metadata: IndexAttemptMetadata,
|
||||
) -> None:
|
||||
# Pre-seed an existing doc with a file_id we'll swap.
|
||||
existing_old_id = _stage_file(content=b"existing-old")
|
||||
existing_doc = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=existing_old_id)
|
||||
index_doc_batch_prepare(
|
||||
documents=[existing_doc],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
# Now: swap the existing one, add a brand-new doc with file_id, and a
|
||||
# brand-new doc without file_id.
|
||||
swap_new_id = _stage_file(content=b"existing-new")
|
||||
new_with_file_id = _stage_file(content=b"new-with-file")
|
||||
existing_v2 = _make_doc(existing_doc.id, file_id=swap_new_id)
|
||||
new_with = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=new_with_file_id)
|
||||
new_without = _make_doc(f"doc-{uuid4().hex[:8]}", file_id=None)
|
||||
|
||||
index_doc_batch_prepare(
|
||||
documents=[existing_v2, new_with, new_without],
|
||||
index_attempt_metadata=attempt_metadata,
|
||||
db_session=db_session,
|
||||
ignore_time_skip=True,
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
# Existing doc was swapped: old file gone, new file promoted.
|
||||
existing_row = _get_doc_row(db_session, existing_doc.id)
|
||||
assert existing_row is not None and existing_row.file_id == swap_new_id
|
||||
assert _get_filerecord(db_session, existing_old_id) is None
|
||||
swap_record = _get_filerecord(db_session, swap_new_id)
|
||||
assert swap_record is not None
|
||||
assert swap_record.file_origin == FileOrigin.CONNECTOR
|
||||
|
||||
# New doc with file_id: row exists, file promoted.
|
||||
new_with_row = _get_doc_row(db_session, new_with.id)
|
||||
assert new_with_row is not None and new_with_row.file_id == new_with_file_id
|
||||
new_with_record = _get_filerecord(db_session, new_with_file_id)
|
||||
assert new_with_record is not None
|
||||
assert new_with_record.file_origin == FileOrigin.CONNECTOR
|
||||
|
||||
# New doc without file_id: row exists, no file_record involvement.
|
||||
new_without_row = _get_doc_row(db_session, new_without.id)
|
||||
assert new_without_row is not None and new_without_row.file_id is None
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Tests for `cloud_beat_task_generator`'s tenant work-gating logic.
|
||||
|
||||
Exercises the gate-read path end-to-end against real Redis. The Celery
|
||||
`.app.send_task` is mocked so we can count dispatches without actually
|
||||
sending messages.
|
||||
|
||||
Requires a running Redis instance. Run with::
|
||||
|
||||
python -m dotenv -f .vscode/.env run -- pytest \
|
||||
backend/tests/external_dependency_unit/tenant_work_gating/test_gate_generator.py
|
||||
"""
|
||||
|
||||
from collections.abc import Generator
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from ee.onyx.background.celery.tasks.cloud import tasks as cloud_tasks
|
||||
from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
|
||||
from onyx.redis import redis_tenant_work_gating as twg
|
||||
from onyx.redis.redis_pool import get_redis_client
|
||||
from onyx.redis.redis_tenant_work_gating import _SET_KEY
|
||||
from onyx.redis.redis_tenant_work_gating import mark_tenant_active
|
||||
|
||||
|
||||
_TENANT_A = "tenant_aaaa0000-0000-0000-0000-000000000001"
|
||||
_TENANT_B = "tenant_bbbb0000-0000-0000-0000-000000000002"
|
||||
_TENANT_C = "tenant_cccc0000-0000-0000-0000-000000000003"
|
||||
_ALL_TEST_TENANTS = [_TENANT_A, _TENANT_B, _TENANT_C]
|
||||
_FANOUT_KEY_PREFIX = cloud_tasks._FULL_FANOUT_TIMESTAMP_KEY_PREFIX
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _multi_tenant_true() -> Generator[None, None, None]:
|
||||
with patch.object(twg, "MULTI_TENANT", True):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_redis() -> Generator[None, None, None]:
|
||||
"""Clear the active set AND the per-task full-fanout timestamp so each
|
||||
test starts fresh."""
|
||||
r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
|
||||
r.delete(_SET_KEY)
|
||||
r.delete(f"{_FANOUT_KEY_PREFIX}:test_task")
|
||||
r.delete("runtime:tenant_work_gating:enabled")
|
||||
r.delete("runtime:tenant_work_gating:enforce")
|
||||
yield
|
||||
r.delete(_SET_KEY)
|
||||
r.delete(f"{_FANOUT_KEY_PREFIX}:test_task")
|
||||
r.delete("runtime:tenant_work_gating:enabled")
|
||||
r.delete("runtime:tenant_work_gating:enforce")
|
||||
|
||||
|
||||
def _invoke_generator(
|
||||
*,
|
||||
work_gated: bool,
|
||||
enabled: bool,
|
||||
enforce: bool,
|
||||
tenant_ids: list[str],
|
||||
full_fanout_interval_seconds: int = 1200,
|
||||
ttl_seconds: int = 1800,
|
||||
) -> MagicMock:
|
||||
"""Helper: call the generator with runtime flags fixed and the Celery
|
||||
app mocked. Returns the mock so callers can assert on send_task calls."""
|
||||
mock_app = MagicMock()
|
||||
# The task binds `self` = the task itself when invoked via `.run()`;
|
||||
# patch its `.app` so `self.app.send_task` routes to our mock.
|
||||
with (
|
||||
patch.object(cloud_tasks.cloud_beat_task_generator, "app", mock_app),
|
||||
patch.object(cloud_tasks, "get_all_tenant_ids", return_value=list(tenant_ids)),
|
||||
patch.object(cloud_tasks, "get_gated_tenants", return_value=set()),
|
||||
patch(
|
||||
"onyx.server.runtime.onyx_runtime.OnyxRuntime.get_tenant_work_gating_enabled",
|
||||
return_value=enabled,
|
||||
),
|
||||
patch(
|
||||
"onyx.server.runtime.onyx_runtime.OnyxRuntime.get_tenant_work_gating_enforce",
|
||||
return_value=enforce,
|
||||
),
|
||||
patch(
|
||||
"onyx.server.runtime.onyx_runtime.OnyxRuntime.get_tenant_work_gating_full_fanout_interval_seconds",
|
||||
return_value=full_fanout_interval_seconds,
|
||||
),
|
||||
patch(
|
||||
"onyx.server.runtime.onyx_runtime.OnyxRuntime.get_tenant_work_gating_ttl_seconds",
|
||||
return_value=ttl_seconds,
|
||||
),
|
||||
):
|
||||
cloud_tasks.cloud_beat_task_generator.run(
|
||||
task_name="test_task",
|
||||
work_gated=work_gated,
|
||||
)
|
||||
return mock_app
|
||||
|
||||
|
||||
def _dispatched_tenants(mock_app: MagicMock) -> list[str]:
|
||||
"""Pull tenant_ids out of each send_task call for assertion."""
|
||||
return [c.kwargs["kwargs"]["tenant_id"] for c in mock_app.send_task.call_args_list]
|
||||
|
||||
|
||||
def _seed_recent_full_fanout_timestamp() -> None:
|
||||
"""Pre-seed the per-task timestamp so the interval-elapsed branch
|
||||
reports False, i.e. the gate enforces normally instead of going into
|
||||
full-fanout on first invocation."""
|
||||
import time as _t
|
||||
|
||||
r = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
|
||||
r.set(f"{_FANOUT_KEY_PREFIX}:test_task", str(int(_t.time() * 1000)))
|
||||
|
||||
|
||||
def test_enforce_skips_unmarked_tenants() -> None:
|
||||
"""With enable+enforce on (interval NOT elapsed), only tenants in the
|
||||
active set get dispatched."""
|
||||
mark_tenant_active(_TENANT_A)
|
||||
_seed_recent_full_fanout_timestamp()
|
||||
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=True,
|
||||
enabled=True,
|
||||
enforce=True,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
full_fanout_interval_seconds=3600,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert dispatched == [_TENANT_A]
|
||||
|
||||
|
||||
def test_shadow_mode_dispatches_all_tenants() -> None:
|
||||
"""enabled=True, enforce=False: gate computes skip but still dispatches."""
|
||||
mark_tenant_active(_TENANT_A)
|
||||
_seed_recent_full_fanout_timestamp()
|
||||
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=True,
|
||||
enabled=True,
|
||||
enforce=False,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
full_fanout_interval_seconds=3600,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert set(dispatched) == set(_ALL_TEST_TENANTS)
|
||||
|
||||
|
||||
def test_full_fanout_cycle_dispatches_all_tenants() -> None:
|
||||
"""First invocation (no prior timestamp → interval considered elapsed)
|
||||
counts as full-fanout; every tenant gets dispatched even under enforce."""
|
||||
mark_tenant_active(_TENANT_A)
|
||||
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=True,
|
||||
enabled=True,
|
||||
enforce=True,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert set(dispatched) == set(_ALL_TEST_TENANTS)
|
||||
|
||||
|
||||
def test_redis_unavailable_fails_open() -> None:
|
||||
"""When `get_active_tenants` returns None (simulated Redis outage) the
|
||||
gate treats the invocation as full-fanout and dispatches everyone —
|
||||
even when the interval hasn't elapsed and enforce is on."""
|
||||
mark_tenant_active(_TENANT_A)
|
||||
_seed_recent_full_fanout_timestamp()
|
||||
|
||||
with patch.object(cloud_tasks, "get_active_tenants", return_value=None):
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=True,
|
||||
enabled=True,
|
||||
enforce=True,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
full_fanout_interval_seconds=3600,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert set(dispatched) == set(_ALL_TEST_TENANTS)
|
||||
|
||||
|
||||
def test_work_gated_false_bypasses_gate_entirely() -> None:
|
||||
"""Beat templates that don't opt in (`work_gated=False`) never consult
|
||||
the set — no matter the flag state."""
|
||||
# Even with enforce on and nothing in the set, all tenants dispatch.
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=False,
|
||||
enabled=True,
|
||||
enforce=True,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert set(dispatched) == set(_ALL_TEST_TENANTS)
|
||||
|
||||
|
||||
def test_gate_disabled_dispatches_everyone_regardless_of_enforce() -> None:
|
||||
"""enabled=False means the gate isn't computed — dispatch is unchanged."""
|
||||
# Intentionally don't add anyone to the set.
|
||||
mock_app = _invoke_generator(
|
||||
work_gated=True,
|
||||
enabled=False,
|
||||
enforce=True,
|
||||
tenant_ids=_ALL_TEST_TENANTS,
|
||||
)
|
||||
|
||||
dispatched = _dispatched_tenants(mock_app)
|
||||
assert set(dispatched) == set(_ALL_TEST_TENANTS)
|
||||
@@ -16,12 +16,14 @@ from mcp import ClientSession
|
||||
from mcp.client.streamable_http import streamablehttp_client
|
||||
from mcp.types import CallToolResult
|
||||
from mcp.types import TextContent
|
||||
from pydantic import AnyUrl
|
||||
|
||||
from onyx.db.enums import AccessType
|
||||
from tests.integration.common_utils.constants import MCP_SERVER_URL
|
||||
from tests.integration.common_utils.managers.api_key import APIKeyManager
|
||||
from tests.integration.common_utils.managers.cc_pair import CCPairManager
|
||||
from tests.integration.common_utils.managers.document import DocumentManager
|
||||
from tests.integration.common_utils.managers.document_set import DocumentSetManager
|
||||
from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
|
||||
from tests.integration.common_utils.managers.pat import PATManager
|
||||
from tests.integration.common_utils.managers.user import UserManager
|
||||
@@ -34,6 +36,7 @@ from tests.integration.common_utils.test_models import DATestUser
|
||||
# Constants
|
||||
MCP_SEARCH_TOOL = "search_indexed_documents"
|
||||
INDEXED_SOURCES_RESOURCE_URI = "resource://indexed_sources"
|
||||
DOCUMENT_SETS_RESOURCE_URI = "resource://document_sets"
|
||||
DEFAULT_SEARCH_LIMIT = 5
|
||||
STREAMABLE_HTTP_URL = f"{MCP_SERVER_URL.rstrip('/')}/?transportType=streamable-http"
|
||||
|
||||
@@ -73,19 +76,22 @@ def _extract_tool_payload(result: CallToolResult) -> dict[str, Any]:
|
||||
|
||||
|
||||
def _call_search_tool(
|
||||
headers: dict[str, str], query: str, limit: int = DEFAULT_SEARCH_LIMIT
|
||||
headers: dict[str, str],
|
||||
query: str,
|
||||
limit: int = DEFAULT_SEARCH_LIMIT,
|
||||
document_set_names: list[str] | None = None,
|
||||
) -> CallToolResult:
|
||||
"""Call the search_indexed_documents tool via MCP."""
|
||||
|
||||
async def _action(session: ClientSession) -> CallToolResult:
|
||||
await session.initialize()
|
||||
return await session.call_tool(
|
||||
MCP_SEARCH_TOOL,
|
||||
{
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
},
|
||||
)
|
||||
arguments: dict[str, Any] = {
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
}
|
||||
if document_set_names is not None:
|
||||
arguments["document_set_names"] = document_set_names
|
||||
return await session.call_tool(MCP_SEARCH_TOOL, arguments)
|
||||
|
||||
return _run_with_mcp_session(headers, _action)
|
||||
|
||||
@@ -238,3 +244,106 @@ def test_mcp_search_respects_acl_filters(
|
||||
blocked_payload = _extract_tool_payload(blocked_result)
|
||||
assert blocked_payload["total_results"] == 0
|
||||
assert blocked_payload["documents"] == []
|
||||
|
||||
|
||||
def test_mcp_search_filters_by_document_set(
|
||||
reset: None, # noqa: ARG001
|
||||
admin_user: DATestUser,
|
||||
) -> None:
|
||||
"""Passing document_set_names should scope results to the named set."""
|
||||
LLMProviderManager.create(user_performing_action=admin_user)
|
||||
|
||||
api_key = APIKeyManager.create(user_performing_action=admin_user)
|
||||
cc_pair_in_set = CCPairManager.create_from_scratch(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
cc_pair_out_of_set = CCPairManager.create_from_scratch(
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
shared_phrase = "document-set-filter-shared-phrase"
|
||||
in_set_content = f"{shared_phrase} inside curated set"
|
||||
out_of_set_content = f"{shared_phrase} outside curated set"
|
||||
|
||||
_seed_document_and_wait_for_indexing(
|
||||
cc_pair=cc_pair_in_set,
|
||||
content=in_set_content,
|
||||
api_key=api_key,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
_seed_document_and_wait_for_indexing(
|
||||
cc_pair=cc_pair_out_of_set,
|
||||
content=out_of_set_content,
|
||||
api_key=api_key,
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
|
||||
doc_set = DocumentSetManager.create(
|
||||
cc_pair_ids=[cc_pair_in_set.id],
|
||||
user_performing_action=admin_user,
|
||||
)
|
||||
DocumentSetManager.wait_for_sync(
|
||||
user_performing_action=admin_user,
|
||||
document_sets_to_check=[doc_set],
|
||||
)
|
||||
|
||||
headers = _auth_headers(admin_user, name="mcp-doc-set-filter")
|
||||
|
||||
# The document_sets resource should surface the newly created set so MCP
|
||||
# clients can discover which values to pass to document_set_names.
|
||||
async def _list_resources(session: ClientSession) -> Any:
|
||||
await session.initialize()
|
||||
resources = await session.list_resources()
|
||||
contents = await session.read_resource(AnyUrl(DOCUMENT_SETS_RESOURCE_URI))
|
||||
return resources, contents
|
||||
|
||||
resources_result, doc_sets_contents = _run_with_mcp_session(
|
||||
headers, _list_resources
|
||||
)
|
||||
resource_uris = {str(resource.uri) for resource in resources_result.resources}
|
||||
assert DOCUMENT_SETS_RESOURCE_URI in resource_uris
|
||||
doc_sets_payload = json.loads(doc_sets_contents.contents[0].text)
|
||||
exposed_names = {entry["name"] for entry in doc_sets_payload}
|
||||
assert doc_set.name in exposed_names
|
||||
|
||||
# Without the filter both documents are visible.
|
||||
unfiltered_payload = _extract_tool_payload(
|
||||
_call_search_tool(headers, shared_phrase, limit=10)
|
||||
)
|
||||
unfiltered_contents = [
|
||||
doc.get("content") or "" for doc in unfiltered_payload["documents"]
|
||||
]
|
||||
assert any(in_set_content in content for content in unfiltered_contents)
|
||||
assert any(out_of_set_content in content for content in unfiltered_contents)
|
||||
|
||||
# With the document set filter only the in-set document is returned.
|
||||
filtered_payload = _extract_tool_payload(
|
||||
_call_search_tool(
|
||||
headers,
|
||||
shared_phrase,
|
||||
limit=10,
|
||||
document_set_names=[doc_set.name],
|
||||
)
|
||||
)
|
||||
filtered_contents = [
|
||||
doc.get("content") or "" for doc in filtered_payload["documents"]
|
||||
]
|
||||
assert filtered_payload["total_results"] >= 1
|
||||
assert any(in_set_content in content for content in filtered_contents)
|
||||
assert all(out_of_set_content not in content for content in filtered_contents)
|
||||
|
||||
# An empty document_set_names should behave like "no filter" (normalized
|
||||
# to None), not "match zero sets".
|
||||
empty_list_payload = _extract_tool_payload(
|
||||
_call_search_tool(
|
||||
headers,
|
||||
shared_phrase,
|
||||
limit=10,
|
||||
document_set_names=[],
|
||||
)
|
||||
)
|
||||
empty_list_contents = [
|
||||
doc.get("content") or "" for doc in empty_list_payload["documents"]
|
||||
]
|
||||
assert any(in_set_content in content for content in empty_list_contents)
|
||||
assert any(out_of_set_content in content for content in empty_list_contents)
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
|
||||
|
||||
def test_time_str_to_utc() -> None:
|
||||
str_to_dt = {
|
||||
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
|
||||
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
|
||||
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
|
||||
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
|
||||
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
|
||||
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
|
||||
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
}
|
||||
for strptime, expected_datetime in str_to_dt.items():
|
||||
assert time_str_to_utc(strptime) == expected_datetime
|
||||
|
||||
|
||||
def test_time_str_to_utc_recovers_from_concatenated_headers() -> None:
|
||||
# TZ is dropped during recovery, so the expected result is UTC rather
|
||||
# than the original offset.
|
||||
assert time_str_to_utc(
|
||||
'Sat, 3 Nov 2007 14:33:28 -0200To: "jason" <jason@example.net>'
|
||||
) == datetime.datetime(2007, 11, 3, 14, 33, 28, tzinfo=datetime.timezone.utc)
|
||||
|
||||
assert time_str_to_utc(
|
||||
"Fri, 20 Feb 2015 10:30:00 +0500Cc: someone@example.com"
|
||||
) == datetime.datetime(2015, 2, 20, 10, 30, 0, tzinfo=datetime.timezone.utc)
|
||||
|
||||
|
||||
def test_time_str_to_utc_raises_on_impossible_dates() -> None:
|
||||
for bad in (
|
||||
"Wed, 33 Sep 2007 13:42:59 +0100",
|
||||
"Thu, 11 Oct 2007 31:50:55 +0900",
|
||||
"not a date at all",
|
||||
"",
|
||||
):
|
||||
with pytest.raises(ValueError):
|
||||
time_str_to_utc(bad)
|
||||
@@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
@@ -8,7 +9,6 @@ from unittest.mock import patch
|
||||
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from onyx.connectors.gmail.connector import _build_time_range_query
|
||||
from onyx.connectors.gmail.connector import GmailCheckpoint
|
||||
from onyx.connectors.gmail.connector import GmailConnector
|
||||
@@ -51,29 +51,43 @@ def test_build_time_range_query() -> None:
|
||||
assert query is None
|
||||
|
||||
|
||||
def test_time_str_to_utc() -> None:
|
||||
str_to_dt = {
|
||||
"Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime(
|
||||
2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime(
|
||||
2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime(
|
||||
2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"30 Jun 2023 18:45:01 +0300": datetime.datetime(
|
||||
2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime(
|
||||
2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
"Date: Wed, 27 Aug 2025 11:40:00 +0200": datetime.datetime(
|
||||
2025, 8, 27, 9, 40, 0, tzinfo=datetime.timezone.utc
|
||||
),
|
||||
}
|
||||
for strptime, expected_datetime in str_to_dt.items():
|
||||
assert time_str_to_utc(strptime) == expected_datetime
|
||||
def _thread_with_date(date_header: str | None) -> dict[str, Any]:
|
||||
"""Load the fixture thread and replace (or strip, if None) its Date header."""
|
||||
json_path = os.path.join(os.path.dirname(__file__), "thread.json")
|
||||
with open(json_path, "r") as f:
|
||||
thread = cast(dict[str, Any], json.load(f))
|
||||
thread = copy.deepcopy(thread)
|
||||
|
||||
for message in thread["messages"]:
|
||||
headers: list[dict[str, str]] = message["payload"]["headers"]
|
||||
if date_header is None:
|
||||
message["payload"]["headers"] = [
|
||||
h for h in headers if h.get("name") != "Date"
|
||||
]
|
||||
continue
|
||||
|
||||
replaced = False
|
||||
for header in headers:
|
||||
if header.get("name") == "Date":
|
||||
header["value"] = date_header
|
||||
replaced = True
|
||||
break
|
||||
if not replaced:
|
||||
headers.append({"name": "Date", "value": date_header})
|
||||
|
||||
return thread
|
||||
|
||||
|
||||
def test_thread_to_document_skips_unparseable_dates() -> None:
|
||||
for bad_date in (
|
||||
"Wed, 33 Sep 2007 13:42:59 +0100",
|
||||
"Thu, 11 Oct 2007 31:50:55 +0900",
|
||||
"total garbage not even close to a date",
|
||||
):
|
||||
doc = thread_to_document(_thread_with_date(bad_date), "admin@example.com")
|
||||
assert isinstance(doc, Document), f"failed for {bad_date!r}"
|
||||
assert doc.doc_updated_at is None
|
||||
assert doc.id == "192edefb315737c3"
|
||||
|
||||
|
||||
def test_gmail_checkpoint_progression() -> None:
|
||||
|
||||
@@ -12,12 +12,14 @@ from unittest.mock import patch
|
||||
|
||||
from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
|
||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from onyx.connectors.google_drive.file_retrieval import DriveFileFieldType
|
||||
from onyx.connectors.google_drive.models import DriveRetrievalStage
|
||||
from onyx.connectors.google_drive.models import GoogleDriveCheckpoint
|
||||
from onyx.connectors.interfaces import SlimConnector
|
||||
from onyx.connectors.interfaces import SlimConnectorWithPermSync
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.utils.threadpool_concurrency import ThreadSafeDict
|
||||
from onyx.utils.threadpool_concurrency import ThreadSafeSet
|
||||
|
||||
|
||||
def _make_done_checkpoint() -> GoogleDriveCheckpoint:
|
||||
@@ -198,3 +200,90 @@ class TestCeleryUtilsRouting:
|
||||
|
||||
mock_slim.assert_called_once()
|
||||
mock_perm_sync.assert_not_called()
|
||||
|
||||
|
||||
class TestFailedFolderIdsByEmail:
|
||||
def _make_failed_map(
|
||||
self, entries: dict[str, set[str]]
|
||||
) -> ThreadSafeDict[str, ThreadSafeSet[str]]:
|
||||
return ThreadSafeDict({k: ThreadSafeSet(v) for k, v in entries.items()})
|
||||
|
||||
def test_skips_api_call_for_known_failed_pair(self) -> None:
|
||||
"""_get_folder_metadata must skip the API call for a (folder, email) pair
|
||||
that previously confirmed no accessible parent."""
|
||||
connector = _make_connector()
|
||||
failed_map = self._make_failed_map(
|
||||
{
|
||||
"retriever@example.com": {"folder1"},
|
||||
"admin@example.com": {"folder1"},
|
||||
}
|
||||
)
|
||||
|
||||
with patch(
|
||||
"onyx.connectors.google_drive.connector.get_folder_metadata"
|
||||
) as mock_api:
|
||||
result = connector._get_folder_metadata(
|
||||
folder_id="folder1",
|
||||
retriever_email="retriever@example.com",
|
||||
field_type=DriveFileFieldType.SLIM,
|
||||
failed_folder_ids_by_email=failed_map,
|
||||
)
|
||||
|
||||
mock_api.assert_not_called()
|
||||
assert result is None
|
||||
|
||||
def test_records_failed_pair_when_no_parents(self) -> None:
|
||||
"""_get_folder_metadata must record (email → folder_id) in the map
|
||||
when the API returns a folder with no parents."""
|
||||
connector = _make_connector()
|
||||
failed_map: ThreadSafeDict[str, ThreadSafeSet[str]] = ThreadSafeDict()
|
||||
folder_no_parents: dict = {"id": "folder1", "name": "Orphaned"}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"onyx.connectors.google_drive.connector.get_drive_service",
|
||||
return_value=MagicMock(),
|
||||
),
|
||||
patch(
|
||||
"onyx.connectors.google_drive.connector.get_folder_metadata",
|
||||
return_value=folder_no_parents,
|
||||
),
|
||||
):
|
||||
connector._get_folder_metadata(
|
||||
folder_id="folder1",
|
||||
retriever_email="retriever@example.com",
|
||||
field_type=DriveFileFieldType.SLIM,
|
||||
failed_folder_ids_by_email=failed_map,
|
||||
)
|
||||
|
||||
assert "folder1" in failed_map.get("retriever@example.com", ThreadSafeSet())
|
||||
assert "folder1" in failed_map.get("admin@example.com", ThreadSafeSet())
|
||||
|
||||
def test_does_not_record_when_parents_found(self) -> None:
|
||||
"""_get_folder_metadata must NOT record a pair when parents are found."""
|
||||
connector = _make_connector()
|
||||
failed_map: ThreadSafeDict[str, ThreadSafeSet[str]] = ThreadSafeDict()
|
||||
folder_with_parents: dict = {
|
||||
"id": "folder1",
|
||||
"name": "Normal",
|
||||
"parents": ["root"],
|
||||
}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"onyx.connectors.google_drive.connector.get_drive_service",
|
||||
return_value=MagicMock(),
|
||||
),
|
||||
patch(
|
||||
"onyx.connectors.google_drive.connector.get_folder_metadata",
|
||||
return_value=folder_with_parents,
|
||||
),
|
||||
):
|
||||
connector._get_folder_metadata(
|
||||
folder_id="folder1",
|
||||
retriever_email="retriever@example.com",
|
||||
field_type=DriveFileFieldType.SLIM,
|
||||
failed_folder_ids_by_email=failed_map,
|
||||
)
|
||||
|
||||
assert len(failed_map) == 0
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
"""Unit tests for SharepointConnector.load_credentials sp_tenant_domain resolution."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.connectors.sharepoint.connector import SharepointConnector
|
||||
|
||||
SITE_URL = "https://mytenant.sharepoint.com/sites/MySite"
|
||||
EXPECTED_TENANT_DOMAIN = "mytenant"
|
||||
|
||||
CLIENT_SECRET_CREDS = {
|
||||
"authentication_method": "client_secret",
|
||||
"sp_client_id": "fake-client-id",
|
||||
"sp_client_secret": "fake-client-secret",
|
||||
"sp_directory_id": "fake-directory-id",
|
||||
}
|
||||
|
||||
CERTIFICATE_CREDS = {
|
||||
"authentication_method": "certificate",
|
||||
"sp_client_id": "fake-client-id",
|
||||
"sp_directory_id": "fake-directory-id",
|
||||
"sp_private_key": base64.b64encode(b"fake-pfx-data").decode(),
|
||||
"sp_certificate_password": "fake-password",
|
||||
}
|
||||
|
||||
|
||||
def _make_mock_msal() -> MagicMock:
|
||||
mock_app = MagicMock()
|
||||
mock_app.acquire_token_for_client.return_value = {"access_token": "fake-token"}
|
||||
return mock_app
|
||||
|
||||
|
||||
@patch("onyx.connectors.sharepoint.connector.msal.ConfidentialClientApplication")
|
||||
@patch("onyx.connectors.sharepoint.connector.GraphClient")
|
||||
def test_client_secret_with_site_pages_sets_tenant_domain(
|
||||
_mock_graph_client: MagicMock,
|
||||
mock_msal_cls: MagicMock,
|
||||
) -> None:
|
||||
"""client_secret auth + include_site_pages=True must resolve sp_tenant_domain."""
|
||||
mock_msal_cls.return_value = _make_mock_msal()
|
||||
connector = SharepointConnector(sites=[SITE_URL], include_site_pages=True)
|
||||
|
||||
connector.load_credentials(CLIENT_SECRET_CREDS)
|
||||
|
||||
assert connector.sp_tenant_domain == EXPECTED_TENANT_DOMAIN
|
||||
|
||||
|
||||
@patch("onyx.connectors.sharepoint.connector.msal.ConfidentialClientApplication")
|
||||
@patch("onyx.connectors.sharepoint.connector.GraphClient")
|
||||
def test_client_secret_without_site_pages_still_sets_tenant_domain(
|
||||
_mock_graph_client: MagicMock,
|
||||
mock_msal_cls: MagicMock,
|
||||
) -> None:
|
||||
"""client_secret auth + include_site_pages=False must still resolve sp_tenant_domain
|
||||
because _create_rest_client_context is also called for drive items."""
|
||||
mock_msal_cls.return_value = _make_mock_msal()
|
||||
connector = SharepointConnector(sites=[SITE_URL], include_site_pages=False)
|
||||
|
||||
connector.load_credentials(CLIENT_SECRET_CREDS)
|
||||
|
||||
assert connector.sp_tenant_domain == EXPECTED_TENANT_DOMAIN
|
||||
|
||||
|
||||
@patch("onyx.connectors.sharepoint.connector.load_certificate_from_pfx")
|
||||
@patch("onyx.connectors.sharepoint.connector.msal.ConfidentialClientApplication")
|
||||
@patch("onyx.connectors.sharepoint.connector.GraphClient")
|
||||
def test_certificate_with_site_pages_sets_tenant_domain(
|
||||
_mock_graph_client: MagicMock,
|
||||
mock_msal_cls: MagicMock,
|
||||
mock_load_cert: MagicMock,
|
||||
) -> None:
|
||||
"""certificate auth + include_site_pages=True must resolve sp_tenant_domain."""
|
||||
mock_msal_cls.return_value = _make_mock_msal()
|
||||
mock_load_cert.return_value = MagicMock()
|
||||
connector = SharepointConnector(sites=[SITE_URL], include_site_pages=True)
|
||||
|
||||
connector.load_credentials(CERTIFICATE_CREDS)
|
||||
|
||||
assert connector.sp_tenant_domain == EXPECTED_TENANT_DOMAIN
|
||||
|
||||
|
||||
@patch("onyx.connectors.sharepoint.connector.load_certificate_from_pfx")
|
||||
@patch("onyx.connectors.sharepoint.connector.msal.ConfidentialClientApplication")
|
||||
@patch("onyx.connectors.sharepoint.connector.GraphClient")
|
||||
def test_certificate_without_site_pages_sets_tenant_domain(
|
||||
_mock_graph_client: MagicMock,
|
||||
mock_msal_cls: MagicMock,
|
||||
mock_load_cert: MagicMock,
|
||||
) -> None:
|
||||
"""certificate auth + include_site_pages=False must still resolve sp_tenant_domain
|
||||
because _create_rest_client_context is also called for drive items."""
|
||||
mock_msal_cls.return_value = _make_mock_msal()
|
||||
mock_load_cert.return_value = MagicMock()
|
||||
connector = SharepointConnector(sites=[SITE_URL], include_site_pages=False)
|
||||
|
||||
connector.load_credentials(CERTIFICATE_CREDS)
|
||||
|
||||
assert connector.sp_tenant_domain == EXPECTED_TENANT_DOMAIN
|
||||
242
backend/tests/unit/onyx/connectors/web/test_slim_retrieval.py
Normal file
242
backend/tests/unit/onyx/connectors/web/test_slim_retrieval.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Unit tests for WebConnector.retrieve_all_slim_docs (slim pruning path)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.connectors.web.connector import WEB_CONNECTOR_VALID_SETTINGS
|
||||
from onyx.connectors.web.connector import WebConnector
|
||||
|
||||
BASE_URL = "http://example.com"
|
||||
|
||||
SINGLE_PAGE_HTML = (
|
||||
"<html><body><p>Content that should not appear in slim output</p></body></html>"
|
||||
)
|
||||
|
||||
RECURSIVE_ROOT_HTML = """
|
||||
<html><body>
|
||||
<a href="/page2">Page 2</a>
|
||||
<a href="/page3">Page 3</a>
|
||||
</body></html>
|
||||
"""
|
||||
|
||||
PAGE2_HTML = "<html><body><p>page 2</p></body></html>"
|
||||
PAGE3_HTML = "<html><body><p>page 3</p></body></html>"
|
||||
|
||||
|
||||
def _make_playwright_context_mock(url_to_html: dict[str, str]) -> MagicMock:
|
||||
"""Return a BrowserContext mock whose pages respond based on goto URL."""
|
||||
context = MagicMock()
|
||||
|
||||
def _new_page() -> MagicMock:
|
||||
page = MagicMock()
|
||||
visited: list[str] = []
|
||||
|
||||
def _goto(url: str, **kwargs: Any) -> MagicMock: # noqa: ARG001
|
||||
visited.append(url)
|
||||
page.url = url
|
||||
response = MagicMock()
|
||||
response.status = 200
|
||||
response.header_value.return_value = None # no cf-ray
|
||||
return response
|
||||
|
||||
def _content() -> str:
|
||||
return url_to_html.get(
|
||||
visited[-1] if visited else "", "<html><body></body></html>"
|
||||
)
|
||||
|
||||
page.goto.side_effect = _goto
|
||||
page.content.side_effect = _content
|
||||
return page
|
||||
|
||||
context.new_page.side_effect = _new_page
|
||||
return context
|
||||
|
||||
|
||||
def _make_playwright_mock() -> MagicMock:
|
||||
playwright = MagicMock()
|
||||
playwright.stop = MagicMock()
|
||||
return playwright
|
||||
|
||||
|
||||
def _make_page_mock(html: str, cf_ray: str | None = None) -> MagicMock:
|
||||
"""Return a Playwright page mock. Set cf_ray to simulate a CloudFlare response."""
|
||||
page = MagicMock()
|
||||
page.url = BASE_URL + "/"
|
||||
response = MagicMock()
|
||||
response.status = 200
|
||||
response.header_value.side_effect = lambda h: cf_ray if h == "cf-ray" else None
|
||||
page.goto.return_value = response
|
||||
page.content.return_value = html
|
||||
return page
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_slim_yields_slim_documents(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
) -> None:
|
||||
"""retrieve_all_slim_docs yields SlimDocuments with the correct URL as id."""
|
||||
context = _make_playwright_context_mock({BASE_URL + "/": SINGLE_PAGE_HTML})
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
)
|
||||
|
||||
docs = [doc for batch in connector.retrieve_all_slim_docs() for doc in batch]
|
||||
|
||||
assert len(docs) == 1
|
||||
assert isinstance(docs[0], SlimDocument)
|
||||
assert docs[0].id == BASE_URL + "/"
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_slim_skips_content_extraction(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
) -> None:
|
||||
"""web_html_cleanup is never called in slim mode."""
|
||||
context = _make_playwright_context_mock({BASE_URL + "/": SINGLE_PAGE_HTML})
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
)
|
||||
|
||||
with patch("onyx.connectors.web.connector.web_html_cleanup") as mock_cleanup:
|
||||
list(connector.retrieve_all_slim_docs())
|
||||
mock_cleanup.assert_not_called()
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_slim_discovers_links_recursively(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
) -> None:
|
||||
"""In RECURSIVE mode, internal <a href> links are followed and all URLs yielded."""
|
||||
url_to_html = {
|
||||
BASE_URL + "/": RECURSIVE_ROOT_HTML,
|
||||
BASE_URL + "/page2": PAGE2_HTML,
|
||||
BASE_URL + "/page3": PAGE3_HTML,
|
||||
}
|
||||
context = _make_playwright_context_mock(url_to_html)
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
|
||||
)
|
||||
|
||||
ids = {
|
||||
doc.id
|
||||
for batch in connector.retrieve_all_slim_docs()
|
||||
for doc in batch
|
||||
if isinstance(doc, SlimDocument)
|
||||
}
|
||||
|
||||
assert ids == {
|
||||
BASE_URL + "/",
|
||||
BASE_URL + "/page2",
|
||||
BASE_URL + "/page3",
|
||||
}
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_non_cloudflare_skips_5s_wait(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
) -> None:
|
||||
"""Pages without a cf-ray header never trigger the 5s render wait."""
|
||||
page = _make_page_mock(SINGLE_PAGE_HTML, cf_ray=None)
|
||||
context = MagicMock()
|
||||
context.new_page.return_value = page
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
)
|
||||
|
||||
list(connector.retrieve_all_slim_docs())
|
||||
|
||||
page.wait_for_timeout.assert_not_called()
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_cloudflare_applies_5s_wait(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
) -> None:
|
||||
"""Pages with a cf-ray header trigger the 5s wait before networkidle."""
|
||||
page = _make_page_mock(SINGLE_PAGE_HTML, cf_ray="abc123-LAX")
|
||||
context = MagicMock()
|
||||
context.new_page.return_value = page
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
)
|
||||
|
||||
list(connector.retrieve_all_slim_docs())
|
||||
|
||||
page.wait_for_timeout.assert_called_once_with(5000)
|
||||
|
||||
|
||||
@patch("onyx.connectors.web.connector.time")
|
||||
@patch("onyx.connectors.web.connector.check_internet_connection")
|
||||
@patch("onyx.connectors.web.connector.requests.head")
|
||||
@patch("onyx.connectors.web.connector.start_playwright")
|
||||
def test_403_applies_5s_wait(
|
||||
mock_start_playwright: MagicMock,
|
||||
mock_head: MagicMock,
|
||||
_mock_check: MagicMock,
|
||||
_mock_time: MagicMock,
|
||||
) -> None:
|
||||
"""A 403 response triggers the 5s wait (common bot-detection challenge entry point)."""
|
||||
page = _make_page_mock(SINGLE_PAGE_HTML, cf_ray=None)
|
||||
page.goto.return_value.status = 403
|
||||
context = MagicMock()
|
||||
context.new_page.return_value = page
|
||||
mock_start_playwright.return_value = (_make_playwright_mock(), context)
|
||||
mock_head.return_value.headers = {"content-type": "text/html"}
|
||||
|
||||
connector = WebConnector(
|
||||
base_url=BASE_URL + "/",
|
||||
web_connector_type=WEB_CONNECTOR_VALID_SETTINGS.SINGLE.value,
|
||||
)
|
||||
|
||||
# All retries return 403 so no docs are found — that's expected here.
|
||||
# We only care that the 5s wait fired.
|
||||
try:
|
||||
list(connector.retrieve_all_slim_docs())
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
page.wait_for_timeout.assert_called_with(5000)
|
||||
@@ -12,6 +12,10 @@ dependency on pypdf internals (pypdf.generic).
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.file_processing import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import count_pdf_embedded_images
|
||||
from onyx.file_processing.extract_file_text import pdf_to_text
|
||||
from onyx.file_processing.extract_file_text import read_pdf_file
|
||||
from onyx.file_processing.password_validation import is_pdf_protected
|
||||
@@ -96,6 +100,80 @@ class TestReadPdfFile:
|
||||
# Returned list is empty when callback is used
|
||||
assert images == []
|
||||
|
||||
def test_image_cap_skips_images_above_limit(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""When the embedded-image cap is exceeded, remaining images are skipped.
|
||||
|
||||
The cap protects the user-file-processing worker from OOMing on PDFs
|
||||
with thousands of embedded images. Setting the cap to 0 should yield
|
||||
zero extracted images even though the fixture has one.
|
||||
"""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
|
||||
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
|
||||
assert images == []
|
||||
|
||||
def test_image_cap_at_limit_extracts_up_to_cap(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""A cap >= image count behaves identically to the uncapped path."""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 100)
|
||||
_, _, images = read_pdf_file(_load("with_image.pdf"), extract_images=True)
|
||||
assert len(images) == 1
|
||||
|
||||
def test_image_cap_with_callback_stops_streaming_at_limit(
|
||||
self, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""The cap also short-circuits the streaming callback path."""
|
||||
monkeypatch.setattr(extract_file_text, "MAX_EMBEDDED_IMAGES_PER_FILE", 0)
|
||||
collected: list[tuple[bytes, str]] = []
|
||||
|
||||
def callback(data: bytes, name: str) -> None:
|
||||
collected.append((data, name))
|
||||
|
||||
read_pdf_file(
|
||||
_load("with_image.pdf"), extract_images=True, image_callback=callback
|
||||
)
|
||||
assert collected == []
|
||||
|
||||
|
||||
# ── count_pdf_embedded_images ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCountPdfEmbeddedImages:
|
||||
def test_returns_count_for_normal_pdf(self) -> None:
|
||||
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=10) == 1
|
||||
|
||||
def test_short_circuits_above_cap(self) -> None:
|
||||
# with_image.pdf has 1 image. cap=0 means "anything > 0 is over cap" —
|
||||
# function returns on first increment as the over-cap sentinel.
|
||||
assert count_pdf_embedded_images(_load("with_image.pdf"), cap=0) == 1
|
||||
|
||||
def test_returns_zero_for_pdf_without_images(self) -> None:
|
||||
assert count_pdf_embedded_images(_load("simple.pdf"), cap=10) == 0
|
||||
|
||||
def test_returns_zero_for_invalid_pdf(self) -> None:
|
||||
assert count_pdf_embedded_images(BytesIO(b"not a pdf"), cap=10) == 0
|
||||
|
||||
def test_returns_zero_for_password_locked_pdf(self) -> None:
|
||||
# encrypted.pdf has an open password; we can't inspect without it, so
|
||||
# the helper returns 0 — callers rely on the password-protected check
|
||||
# that runs earlier in the upload pipeline.
|
||||
assert count_pdf_embedded_images(_load("encrypted.pdf"), cap=10) == 0
|
||||
|
||||
def test_inspects_owner_password_only_pdf(self) -> None:
|
||||
# owner_protected.pdf is encrypted but has no open password. It should
|
||||
# decrypt with an empty string and count images normally. The fixture
|
||||
# has zero images, so 0 is a real count (not the "bail on encrypted"
|
||||
# path).
|
||||
assert count_pdf_embedded_images(_load("owner_protected.pdf"), cap=10) == 0
|
||||
|
||||
def test_preserves_file_position(self) -> None:
|
||||
pdf = _load("with_image.pdf")
|
||||
pdf.seek(42)
|
||||
count_pdf_embedded_images(pdf, cap=10)
|
||||
assert pdf.tell() == 42
|
||||
|
||||
|
||||
# ── pdf_to_text ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import io
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import openpyxl
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
@@ -321,6 +322,17 @@ class TestXlsxSheetExtraction:
|
||||
sheets = xlsx_sheet_extraction(bad_file, file_name="~$temp.xlsx")
|
||||
assert sheets == []
|
||||
|
||||
def test_known_openpyxl_bug_max_value_returns_empty(self) -> None:
|
||||
"""openpyxl's strict descriptor validation rejects font family
|
||||
values >14 with 'Max value is 14'. Treat as a known openpyxl bug
|
||||
and skip the file rather than fail the whole connector batch."""
|
||||
with patch(
|
||||
"onyx.file_processing.extract_file_text.openpyxl.load_workbook",
|
||||
side_effect=ValueError("Max value is 14"),
|
||||
):
|
||||
sheets = xlsx_sheet_extraction(io.BytesIO(b""), file_name="bad_font.xlsx")
|
||||
assert sheets == []
|
||||
|
||||
def test_csv_content_matches_xlsx_to_text_per_sheet(self) -> None:
|
||||
"""For a single-sheet workbook, xlsx_to_text output should equal
|
||||
the csv_text from xlsx_sheet_extraction — they share the same
|
||||
|
||||
@@ -129,12 +129,36 @@ class TestWorkerHealthCollector:
|
||||
up = families[1]
|
||||
assert up.name == "onyx_celery_worker_up"
|
||||
assert len(up.samples) == 3
|
||||
# Labels use short names (before @)
|
||||
labels = {s.labels["worker"] for s in up.samples}
|
||||
assert labels == {"primary", "docfetching", "monitoring"}
|
||||
label_pairs = {
|
||||
(s.labels["worker_type"], s.labels["hostname"]) for s in up.samples
|
||||
}
|
||||
assert label_pairs == {
|
||||
("primary", "host1"),
|
||||
("docfetching", "host1"),
|
||||
("monitoring", "host1"),
|
||||
}
|
||||
for sample in up.samples:
|
||||
assert sample.value == 1
|
||||
|
||||
def test_replicas_of_same_worker_type_are_distinct(self) -> None:
|
||||
"""Regression: ``docprocessing@pod-1`` and ``docprocessing@pod-2`` must
|
||||
produce separate samples, not collapse into one duplicate-timestamp
|
||||
series.
|
||||
"""
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-1"})
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-2"})
|
||||
monitor._on_heartbeat({"hostname": "docprocessing@pod-3"})
|
||||
|
||||
collector = WorkerHealthCollector(cache_ttl=0)
|
||||
collector.set_monitor(monitor)
|
||||
|
||||
up = collector.collect()[1]
|
||||
assert len(up.samples) == 3
|
||||
hostnames = {s.labels["hostname"] for s in up.samples}
|
||||
assert hostnames == {"pod-1", "pod-2", "pod-3"}
|
||||
assert all(s.labels["worker_type"] == "docprocessing" for s in up.samples)
|
||||
|
||||
def test_reports_dead_worker(self) -> None:
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
monitor._on_heartbeat({"hostname": "primary@host1"})
|
||||
@@ -151,9 +175,9 @@ class TestWorkerHealthCollector:
|
||||
assert active.samples[0].value == 1
|
||||
|
||||
up = families[1]
|
||||
samples_by_name = {s.labels["worker"]: s.value for s in up.samples}
|
||||
assert samples_by_name["primary"] == 1
|
||||
assert samples_by_name["monitoring"] == 0
|
||||
samples_by_type = {s.labels["worker_type"]: s.value for s in up.samples}
|
||||
assert samples_by_type["primary"] == 1
|
||||
assert samples_by_type["monitoring"] == 0
|
||||
|
||||
def test_empty_monitor_returns_zero(self) -> None:
|
||||
monitor = WorkerHeartbeatMonitor(MagicMock())
|
||||
|
||||
@@ -58,8 +58,7 @@ SERVICE_ORDER=(
|
||||
validate_template() {
|
||||
local template_file=$1
|
||||
echo "Validating template: $template_file..."
|
||||
aws cloudformation validate-template --template-body file://"$template_file" --region "$AWS_REGION" > /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! aws cloudformation validate-template --template-body file://"$template_file" --region "$AWS_REGION" > /dev/null; then
|
||||
echo "Error: Validation failed for $template_file. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
@@ -108,13 +107,15 @@ deploy_stack() {
|
||||
fi
|
||||
|
||||
# Create temporary parameters file for this template
|
||||
local temp_params_file=$(create_parameters_from_json "$template_file")
|
||||
local temp_params_file
|
||||
temp_params_file=$(create_parameters_from_json "$template_file")
|
||||
|
||||
# Special handling for SubnetIDs parameter if needed
|
||||
if grep -q "SubnetIDs" "$template_file"; then
|
||||
echo "Template uses SubnetIDs parameter, ensuring it's properly formatted..."
|
||||
# Make sure we're passing SubnetIDs as a comma-separated list
|
||||
local subnet_ids=$(remove_comments "$CONFIG_FILE" | jq -r '.SubnetIDs // empty')
|
||||
local subnet_ids
|
||||
subnet_ids=$(remove_comments "$CONFIG_FILE" | jq -r '.SubnetIDs // empty')
|
||||
if [ -n "$subnet_ids" ]; then
|
||||
echo "Using SubnetIDs from config: $subnet_ids"
|
||||
else
|
||||
@@ -123,15 +124,13 @@ deploy_stack() {
|
||||
fi
|
||||
|
||||
echo "Deploying stack: $stack_name with template: $template_file and generated config from: $CONFIG_FILE..."
|
||||
aws cloudformation deploy \
|
||||
if ! aws cloudformation deploy \
|
||||
--stack-name "$stack_name" \
|
||||
--template-file "$template_file" \
|
||||
--parameter-overrides file://"$temp_params_file" \
|
||||
--capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND \
|
||||
--region "$AWS_REGION" \
|
||||
--no-cli-auto-prompt > /dev/null
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
--no-cli-auto-prompt > /dev/null; then
|
||||
echo "Error: Deployment failed for $stack_name. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -52,11 +52,9 @@ delete_stack() {
|
||||
--region "$AWS_REGION"
|
||||
|
||||
echo "Waiting for stack $stack_name to be deleted..."
|
||||
aws cloudformation wait stack-delete-complete \
|
||||
if aws cloudformation wait stack-delete-complete \
|
||||
--stack-name "$stack_name" \
|
||||
--region "$AWS_REGION"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
--region "$AWS_REGION"; then
|
||||
echo "Stack $stack_name deleted successfully."
|
||||
sleep 10
|
||||
else
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#!/bin/sh
|
||||
# fill in the template
|
||||
export ONYX_BACKEND_API_HOST="${ONYX_BACKEND_API_HOST:-api_server}"
|
||||
export ONYX_WEB_SERVER_HOST="${ONYX_WEB_SERVER_HOST:-web_server}"
|
||||
@@ -16,12 +17,15 @@ echo "Using web server host: $ONYX_WEB_SERVER_HOST"
|
||||
echo "Using MCP server host: $ONYX_MCP_SERVER_HOST"
|
||||
echo "Using nginx proxy timeouts - connect: ${NGINX_PROXY_CONNECT_TIMEOUT}s, send: ${NGINX_PROXY_SEND_TIMEOUT}s, read: ${NGINX_PROXY_READ_TIMEOUT}s"
|
||||
|
||||
# shellcheck disable=SC2016
|
||||
envsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME $ONYX_BACKEND_API_HOST $ONYX_WEB_SERVER_HOST $ONYX_MCP_SERVER_HOST $NGINX_PROXY_CONNECT_TIMEOUT $NGINX_PROXY_SEND_TIMEOUT $NGINX_PROXY_READ_TIMEOUT' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf
|
||||
|
||||
# Conditionally create MCP server configuration
|
||||
if [ "${MCP_SERVER_ENABLED}" = "True" ] || [ "${MCP_SERVER_ENABLED}" = "true" ]; then
|
||||
echo "MCP server is enabled, creating MCP configuration..."
|
||||
# shellcheck disable=SC2016
|
||||
envsubst '$ONYX_MCP_SERVER_HOST' < "/etc/nginx/conf.d/mcp_upstream.conf.inc.template" > /etc/nginx/conf.d/mcp_upstream.conf.inc
|
||||
# shellcheck disable=SC2016
|
||||
envsubst '$ONYX_MCP_SERVER_HOST' < "/etc/nginx/conf.d/mcp.conf.inc.template" > /etc/nginx/conf.d/mcp.conf.inc
|
||||
else
|
||||
echo "MCP server is disabled, removing MCP configuration..."
|
||||
|
||||
@@ -12,7 +12,7 @@ dependencies = [
|
||||
"cohere==5.6.1",
|
||||
"fastapi==0.133.1",
|
||||
"google-genai==1.52.0",
|
||||
"litellm==1.81.6",
|
||||
"litellm[google]==1.81.6",
|
||||
"openai==2.14.0",
|
||||
"pydantic==2.11.7",
|
||||
"prometheus_client>=0.21.1",
|
||||
@@ -69,7 +69,7 @@ backend = [
|
||||
"langchain-core==1.2.28",
|
||||
"lazy_imports==1.0.1",
|
||||
"lxml==5.3.0",
|
||||
"Mako==1.2.4",
|
||||
"Mako==1.3.11",
|
||||
# NOTE: Do not update without understanding the patching behavior in
|
||||
# get_markitdown_converter in
|
||||
# backend/onyx/file_processing/extract_file_text.py and what impacts
|
||||
@@ -96,7 +96,7 @@ backend = [
|
||||
"python-gitlab==5.6.0",
|
||||
"python-pptx==0.6.23",
|
||||
"pypandoc_binary==1.16.2",
|
||||
"pypdf==6.10.0",
|
||||
"pypdf==6.10.2",
|
||||
"pytest-mock==3.12.0",
|
||||
"pytest-playwright==0.7.2",
|
||||
"python-docx==1.1.2",
|
||||
|
||||
@@ -48,6 +48,19 @@ func runWebScript(args []string) {
|
||||
log.Fatalf("Failed to find web directory: %v", err)
|
||||
}
|
||||
|
||||
nodeModules := filepath.Join(webDir, "node_modules")
|
||||
if _, err := os.Stat(nodeModules); os.IsNotExist(err) {
|
||||
log.Info("node_modules not found, running npm install --no-save...")
|
||||
installCmd := exec.Command("npm", "install", "--no-save")
|
||||
installCmd.Dir = webDir
|
||||
installCmd.Stdout = os.Stdout
|
||||
installCmd.Stderr = os.Stderr
|
||||
installCmd.Stdin = os.Stdin
|
||||
if err := installCmd.Run(); err != nil {
|
||||
log.Fatalf("Failed to run npm install: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
scriptName := args[0]
|
||||
scriptArgs := args[1:]
|
||||
if len(scriptArgs) > 0 && scriptArgs[0] == "--" {
|
||||
|
||||
251
uv.lock
generated
251
uv.lock
generated
@@ -447,14 +447,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.6.9"
|
||||
version = "1.6.11"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/af/98/00d3dd826d46959ad8e32af2dbb2398868fd9fd0683c26e56d0789bd0e68/authlib-1.6.9.tar.gz", hash = "sha256:d8f2421e7e5980cc1ddb4e32d3f5fa659cfaf60d8eaf3281ebed192e4ab74f04", size = 165134, upload-time = "2026-03-02T07:44:01.998Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/28/10/b325d58ffe86815b399334a101e63bc6fa4e1953921cb23703b48a0a0220/authlib-1.6.11.tar.gz", hash = "sha256:64db35b9b01aeccb4715a6c9a6613a06f2bd7be2ab9d2eb89edd1dfc7580a38f", size = 165359, upload-time = "2026-04-16T07:22:50.279Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/53/23/b65f568ed0c22f1efacb744d2db1a33c8068f384b8c9b482b52ebdbc3ef6/authlib-1.6.9-py2.py3-none-any.whl", hash = "sha256:f08b4c14e08f0861dc18a32357b33fbcfd2ea86cfe3fe149484b4d764c4a0ac3", size = 244197, upload-time = "2026-03-02T07:44:00.307Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/2f/55fca558f925a51db046e5b929deb317ddb05afed74b22d89f4eca578980/authlib-1.6.11-py2.py3-none-any.whl", hash = "sha256:c8687a9a26451c51a34a06fa17bb97cb15bba46a6a626755e2d7f50da8bff3e3", size = 244469, upload-time = "2026-04-16T07:22:48.413Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2124,6 +2124,12 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/d4/90197b416cb61cefd316964fd9e7bd8324bcbafabf40eef14a9f20b81974/google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c", size = 173706, upload-time = "2025-10-28T21:34:50.151Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
grpc = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "grpcio-status" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-api-python-client"
|
||||
version = "2.86.0"
|
||||
@@ -2181,6 +2187,124 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4a/07/8d9a8186e6768b55dfffeb57c719bc03770cf8a970a074616ae6f9e26a57/google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb", size = 18926, upload-time = "2023-02-07T20:53:18.837Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-aiplatform"
|
||||
version = "1.133.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "docstring-parser" },
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-bigquery" },
|
||||
{ name = "google-cloud-resource-manager" },
|
||||
{ name = "google-cloud-storage" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "packaging" },
|
||||
{ name = "proto-plus" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d4/be/31ce7fd658ddebafbe5583977ddee536b2bacc491ad10b5a067388aec66f/google_cloud_aiplatform-1.133.0.tar.gz", hash = "sha256:3a6540711956dd178daaab3c2c05db476e46d94ac25912b8cf4f59b00b058ae0", size = 9921309, upload-time = "2026-01-08T22:11:25.079Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/01/5b/ef74ff65aebb74eaba51078e33ddd897247ba0d1197fd5a7953126205519/google_cloud_aiplatform-1.133.0-py2.py3-none-any.whl", hash = "sha256:dfc81228e987ca10d1c32c7204e2131b3c8d6b7c8e0b4e23bf7c56816bc4c566", size = 8184595, upload-time = "2026-01-08T22:11:22.067Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-bigquery"
|
||||
version = "3.41.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-core" },
|
||||
{ name = "google-resumable-media" },
|
||||
{ name = "packaging" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ce/13/6515c7aab55a4a0cf708ffd309fb9af5bab54c13e32dc22c5acd6497193c/google_cloud_bigquery-3.41.0.tar.gz", hash = "sha256:2217e488b47ed576360c9b2cc07d59d883a54b83167c0ef37f915c26b01a06fe", size = 513434, upload-time = "2026-03-30T22:50:55.347Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/40/33/1d3902efadef9194566d499d61507e1f038454e0b55499d2d7f8ab2a4fee/google_cloud_bigquery-3.41.0-py3-none-any.whl", hash = "sha256:2a5b5a737b401cbd824a6e5eac7554100b878668d908e6548836b5d8aaa4dcaa", size = 262343, upload-time = "2026-03-30T22:48:45.444Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-core"
|
||||
version = "2.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core" },
|
||||
{ name = "google-auth" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dc/24/6ca08b0a03c7b0c620427503ab00353a4ae806b848b93bcea18b6b76fde6/google_cloud_core-2.5.1.tar.gz", hash = "sha256:3dc94bdec9d05a31d9f355045ed0f369fbc0d8c665076c734f065d729800f811", size = 36078, upload-time = "2026-03-30T22:50:08.057Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/73/d9/5bb050cb32826466aa9b25f79e2ca2879fe66cb76782d4ed798dd7506151/google_cloud_core-2.5.1-py3-none-any.whl", hash = "sha256:ea62cdf502c20e3e14be8a32c05ed02113d7bef454e40ff3fab6fe1ec9f1f4e7", size = 29452, upload-time = "2026-03-30T22:48:31.567Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-resource-manager"
|
||||
version = "1.17.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "grpc-google-iam-v1" },
|
||||
{ name = "grpcio" },
|
||||
{ name = "proto-plus" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b2/1a/13060cabf553d52d151d2afc26b39561e82853380d499dd525a0d422d9f0/google_cloud_resource_manager-1.17.0.tar.gz", hash = "sha256:0f486b62e2c58ff992a3a50fa0f4a96eef7750aa6c971bb373398ccb91828660", size = 464971, upload-time = "2026-03-26T22:17:29.204Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/f7/661d7a9023e877a226b5683429c3662f75a29ef45cb1464cf39adb689218/google_cloud_resource_manager-1.17.0-py3-none-any.whl", hash = "sha256:e479baf4b014a57f298e01b8279e3290b032e3476d69c8e5e1427af8f82739a5", size = 404403, upload-time = "2026-03-26T22:15:26.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-storage"
|
||||
version = "3.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-cloud-core" },
|
||||
{ name = "google-crc32c" },
|
||||
{ name = "google-resumable-media" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4c/47/205eb8e9a1739b5345843e5a425775cbdc472cc38e7eda082ba5b8d02450/google_cloud_storage-3.10.1.tar.gz", hash = "sha256:97db9aa4460727982040edd2bd13ff3d5e2260b5331ad22895802da1fc2a5286", size = 17309950, upload-time = "2026-03-23T09:35:23.409Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/ff/ca9ab2417fa913d75aae38bf40bf856bb2749a604b2e0f701b37cfcd23cc/google_cloud_storage-3.10.1-py3-none-any.whl", hash = "sha256:a72f656759b7b99bda700f901adcb3425a828d4a29f911bc26b3ea79c5b1217f", size = 324453, upload-time = "2026-03-23T09:35:21.368Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-crc32c"
|
||||
version = "1.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/ef/21ccfaab3d5078d41efe8612e0ed0bfc9ce22475de074162a91a25f7980d/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8", size = 31298, upload-time = "2025-12-16T00:20:32.241Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/b8/f8413d3f4b676136e965e764ceedec904fe38ae8de0cdc52a12d8eb1096e/google_crc32c-1.8.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7", size = 30872, upload-time = "2025-12-16T00:33:58.785Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/fd/33aa4ec62b290477181c55bb1c9302c9698c58c0ce9a6ab4874abc8b0d60/google_crc32c-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15", size = 33243, upload-time = "2025-12-16T00:40:21.46Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/03/4820b3bd99c9653d1a5210cb32f9ba4da9681619b4d35b6a052432df4773/google_crc32c-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a", size = 33608, upload-time = "2025-12-16T00:40:22.204Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/43/acf61476a11437bf9733fb2f70599b1ced11ec7ed9ea760fdd9a77d0c619/google_crc32c-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2", size = 34439, upload-time = "2025-12-16T00:35:20.458Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297, upload-time = "2025-12-16T00:23:20.709Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867, upload-time = "2025-12-16T00:43:14.628Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344, upload-time = "2025-12-16T00:40:24.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694, upload-time = "2025-12-16T00:40:25.505Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435, upload-time = "2025-12-16T00:35:22.107Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301, upload-time = "2025-12-16T00:24:48.527Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868, upload-time = "2025-12-16T00:48:12.163Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381, upload-time = "2025-12-16T00:40:26.268Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734, upload-time = "2025-12-16T00:40:27.028Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878, upload-time = "2025-12-16T00:35:23.142Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/c5/c171e4d8c44fec1422d801a6d2e5d7ddabd733eeda505c79730ee9607f07/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93", size = 28615, upload-time = "2025-12-16T00:40:29.298Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/97/7d75fe37a7a6ed171a2cf17117177e7aab7e6e0d115858741b41e9dd4254/google_crc32c-1.8.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c", size = 28800, upload-time = "2025-12-16T00:40:30.322Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.52.0"
|
||||
@@ -2200,6 +2324,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-resumable-media"
|
||||
version = "2.8.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-crc32c" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3f/d1/b1ea14b93b6b78f57fc580125de44e9f593ab88dd2460f1a8a8d18f74754/google_resumable_media-2.8.2.tar.gz", hash = "sha256:f3354a182ebd193ae3f42e3ef95e6c9b10f128320de23ac7637236713b1acd70", size = 2164510, upload-time = "2026-03-30T23:34:25.369Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/f8/50bfaf4658431ff9de45c5c3935af7ab01157a4903c603cd0eee6e78e087/google_resumable_media-2.8.2-py3-none-any.whl", hash = "sha256:82b6d8ccd11765268cdd2a2123f417ec806b8eef3000a9a38dfe3033da5fb220", size = 81511, upload-time = "2026-03-30T23:34:09.671Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
version = "1.72.0"
|
||||
@@ -2212,6 +2348,11 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
grpc = [
|
||||
{ name = "grpcio" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "greenlet"
|
||||
version = "3.2.4"
|
||||
@@ -2262,6 +2403,85 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpc-google-iam-v1"
|
||||
version = "0.14.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos", extra = ["grpc"] },
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/44/4f/d098419ad0bfc06c9ce440575f05aa22d8973b6c276e86ac7890093d3c37/grpc_google_iam_v1-0.14.4.tar.gz", hash = "sha256:392b3796947ed6334e61171d9ab06bf7eb357f554e5fc7556ad7aab6d0e17038", size = 23706, upload-time = "2026-04-01T01:57:49.813Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/89/22/c2dd50c09bf679bd38173656cd4402d2511e563b33bc88f90009cf50613c/grpc_google_iam_v1-0.14.4-py3-none-any.whl", hash = "sha256:412facc320fcbd94034b4df3d557662051d4d8adfa86e0ddb4dca70a3f739964", size = 32675, upload-time = "2026-04-01T01:57:47.69Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.80.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio-status"
|
||||
version = "1.80.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos" },
|
||||
{ name = "grpcio" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/ed/105f619bdd00cb47a49aa2feea6232ea2bbb04199d52a22cc6a7d603b5cb/grpcio_status-1.80.0.tar.gz", hash = "sha256:df73802a4c89a3ea88aa2aff971e886fccce162bc2e6511408b3d67a144381cd", size = 13901, upload-time = "2026-03-30T08:54:34.784Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/76/80/58cd2dfc19a07d022abe44bde7c365627f6c7cb6f692ada6c65ca437d09a/grpcio_status-1.80.0-py3-none-any.whl", hash = "sha256:4b56990363af50dbf2c2ebb80f1967185c07d87aa25aa2bea45ddb75fc181dbe", size = 14638, upload-time = "2026-03-30T08:54:01.569Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.16.0"
|
||||
@@ -3164,6 +3384,11 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e6/05/3516cc7386b220d388aa0bd833308c677e94eceb82b2756dd95e06f6a13f/litellm-1.81.6-py3-none-any.whl", hash = "sha256:573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", size = 12224343, upload-time = "2026-02-01T04:02:23.704Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
google = [
|
||||
{ name = "google-cloud-aiplatform" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "locket"
|
||||
version = "1.0.0"
|
||||
@@ -3278,14 +3503,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
version = "1.2.4"
|
||||
version = "1.3.11"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markupsafe" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/05/5f/2ba6e026d33a0e6ddc1dddf9958677f76f5f80c236bd65309d280b166d3e/Mako-1.2.4.tar.gz", hash = "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34", size = 497021, upload-time = "2022-11-15T14:37:51.327Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/59/8a/805404d0c0b9f3d7a326475ca008db57aea9c5c9f2e1e39ed0faa335571c/mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069", size = 399811, upload-time = "2026-04-14T20:19:51.493Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/03/3b/68690a035ba7347860f1b8c0cde853230ba69ff41df5884ea7d89fe68cd3/Mako-1.2.4-py3-none-any.whl", hash = "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818", size = 78672, upload-time = "2022-11-15T14:37:53.675Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/a5/19d7aaa7e433713ffe881df33705925a196afb9532efc8475d26593921a6/mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77", size = 78503, upload-time = "2026-04-14T20:19:53.233Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4204,7 +4429,7 @@ dependencies = [
|
||||
{ name = "fastapi" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "kubernetes" },
|
||||
{ name = "litellm" },
|
||||
{ name = "litellm", extra = ["google"] },
|
||||
{ name = "openai" },
|
||||
{ name = "prometheus-client" },
|
||||
{ name = "prometheus-fastapi-instrumentator" },
|
||||
@@ -4377,7 +4602,7 @@ requires-dist = [
|
||||
{ name = "fastapi", specifier = "==0.133.1" },
|
||||
{ name = "google-genai", specifier = "==1.52.0" },
|
||||
{ name = "kubernetes", specifier = ">=31.0.0" },
|
||||
{ name = "litellm", specifier = "==1.81.6" },
|
||||
{ name = "litellm", extras = ["google"], specifier = "==1.81.6" },
|
||||
{ name = "openai", specifier = "==2.14.0" },
|
||||
{ name = "prometheus-client", specifier = ">=0.21.1" },
|
||||
{ name = "prometheus-fastapi-instrumentator", specifier = "==7.1.0" },
|
||||
@@ -4430,7 +4655,7 @@ backend = [
|
||||
{ name = "langfuse", specifier = "==3.10.0" },
|
||||
{ name = "lazy-imports", specifier = "==1.0.1" },
|
||||
{ name = "lxml", specifier = "==5.3.0" },
|
||||
{ name = "mako", specifier = "==1.2.4" },
|
||||
{ name = "mako", specifier = "==1.3.11" },
|
||||
{ name = "markitdown", extras = ["pdf", "docx", "pptx", "xlsx", "xls"], specifier = "==0.1.2" },
|
||||
{ name = "mcp", extras = ["cli"], specifier = "==1.26.0" },
|
||||
{ name = "mistune", specifier = "==3.2.0" },
|
||||
@@ -4453,7 +4678,7 @@ backend = [
|
||||
{ name = "pygithub", specifier = "==2.5.0" },
|
||||
{ name = "pympler", specifier = "==1.1" },
|
||||
{ name = "pypandoc-binary", specifier = "==1.16.2" },
|
||||
{ name = "pypdf", specifier = "==6.10.0" },
|
||||
{ name = "pypdf", specifier = "==6.10.2" },
|
||||
{ name = "pytest-mock", specifier = "==3.12.0" },
|
||||
{ name = "pytest-playwright", specifier = "==0.7.2" },
|
||||
{ name = "python-dateutil", specifier = "==2.8.2" },
|
||||
@@ -5703,11 +5928,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "6.10.0"
|
||||
version = "6.10.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b8/9f/ca96abf18683ca12602065e4ed2bec9050b672c87d317f1079abc7b6d993/pypdf-6.10.0.tar.gz", hash = "sha256:4c5a48ba258c37024ec2505f7e8fd858525f5502784a2e1c8d415604af29f6ef", size = 5314833, upload-time = "2026-04-10T09:34:57.102Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/3f/9f2167401c2e94833ca3b69535bad89e533b5de75fefe4197a2c224baec2/pypdf-6.10.2.tar.gz", hash = "sha256:7d09ce108eff6bf67465d461b6ef352dcb8d84f7a91befc02f904455c6eea11d", size = 5315679, upload-time = "2026-04-15T16:37:36.978Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/55/f2/7ebe366f633f30a6ad105f650f44f24f98cb1335c4157d21ae47138b3482/pypdf-6.10.0-py3-none-any.whl", hash = "sha256:90005e959e1596c6e6c84c8b0ad383285b3e17011751cedd17f2ce8fcdfc86de", size = 334459, upload-time = "2026-04-10T09:34:54.966Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0c/d6/1d5c60cc17bbdf37c1552d9c03862fc6d32c5836732a0415b2d637edc2d0/pypdf-6.10.2-py3-none-any.whl", hash = "sha256:aa53be9826655b51c96741e5d7983ca224d898ac0a77896e64636810517624aa", size = 336308, upload-time = "2026-04-15T16:37:34.851Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -82,7 +82,10 @@ ARG NODE_OPTIONS
|
||||
# SENTRY_AUTH_TOKEN is injected via BuildKit secret mount so it is never written
|
||||
# to any image layer, build cache, or registry manifest.
|
||||
# Use NODE_OPTIONS in the build command
|
||||
RUN --mount=type=secret,id=sentry_auth_token,env=SENTRY_AUTH_TOKEN \
|
||||
RUN --mount=type=secret,id=sentry_auth_token \
|
||||
if [ -f /run/secrets/sentry_auth_token ]; then \
|
||||
export SENTRY_AUTH_TOKEN="$(cat /run/secrets/sentry_auth_token)"; \
|
||||
fi && \
|
||||
NODE_OPTIONS="${NODE_OPTIONS}" npx next build
|
||||
|
||||
# Step 2. Production image, copy all the files and run next
|
||||
|
||||
@@ -68,9 +68,7 @@ SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")"
|
||||
|
||||
# Run the conversion into a temp file so a failed run doesn't destroy an existing .tsx
|
||||
TMPFILE="${BASE_NAME}.tsx.tmp"
|
||||
bunx @svgr/cli "$SVG_FILE" --typescript --svgo-config "$SVGO_CONFIG" --template "${SCRIPT_DIR}/icon-template.js" > "$TMPFILE"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
if bunx @svgr/cli "$SVG_FILE" --typescript --svgo-config "$SVGO_CONFIG" --template "${SCRIPT_DIR}/icon-template.js" > "$TMPFILE"; then
|
||||
# Verify the temp file has content before replacing the destination
|
||||
if [ ! -s "$TMPFILE" ]; then
|
||||
rm -f "$TMPFILE"
|
||||
@@ -84,16 +82,14 @@ if [ $? -eq 0 ]; then
|
||||
# Using perl for cross-platform compatibility (works on macOS, Linux, Windows with WSL)
|
||||
# Note: perl -i returns 0 even on some failures, so we validate the output
|
||||
|
||||
perl -i -pe 's/<svg/<svg width={size} height={size}/g' "${BASE_NAME}.tsx"
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! perl -i -pe 's/<svg/<svg width={size} height={size}/g' "${BASE_NAME}.tsx"; then
|
||||
echo "Error: Failed to add width/height attributes" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Icons additionally get stroke="currentColor"
|
||||
if [ "$MODE" = "icon" ]; then
|
||||
perl -i -pe 's/\{\.\.\.props\}/stroke="currentColor" {...props}/g' "${BASE_NAME}.tsx"
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! perl -i -pe 's/\{\.\.\.props\}/stroke="currentColor" {...props}/g' "${BASE_NAME}.tsx"; then
|
||||
echo "Error: Failed to add stroke attribute" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
import React from "react";
|
||||
import type { Meta, StoryObj } from "@storybook/react";
|
||||
import { LinkButton } from "@opal/components";
|
||||
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
|
||||
|
||||
const meta: Meta<typeof LinkButton> = {
|
||||
title: "opal/components/LinkButton",
|
||||
component: LinkButton,
|
||||
tags: ["autodocs"],
|
||||
decorators: [
|
||||
(Story) => (
|
||||
<TooltipPrimitive.Provider>
|
||||
<Story />
|
||||
</TooltipPrimitive.Provider>
|
||||
),
|
||||
],
|
||||
};
|
||||
|
||||
export default meta;
|
||||
type Story = StoryObj<typeof LinkButton>;
|
||||
|
||||
// ─── Anchor mode ────────────────────────────────────────────────────────────
|
||||
|
||||
export const Default: Story = {
|
||||
render: () => <LinkButton href="/">Home</LinkButton>,
|
||||
};
|
||||
|
||||
export const ExternalLink: Story = {
|
||||
render: () => (
|
||||
<LinkButton href="https://onyx.app" target="_blank">
|
||||
Onyx
|
||||
</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
export const LongLabel: Story = {
|
||||
render: () => (
|
||||
<LinkButton href="https://docs.onyx.app" target="_blank">
|
||||
Go read the full Onyx documentation site
|
||||
</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
// ─── Button mode ────────────────────────────────────────────────────────────
|
||||
|
||||
export const AsButton: Story = {
|
||||
render: () => (
|
||||
<LinkButton onClick={() => alert("clicked")}>Click me</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
// ─── Disabled ───────────────────────────────────────────────────────────────
|
||||
|
||||
export const DisabledLink: Story = {
|
||||
render: () => (
|
||||
<LinkButton href="/" disabled>
|
||||
Disabled link
|
||||
</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
export const DisabledButton: Story = {
|
||||
render: () => (
|
||||
<LinkButton onClick={() => alert("should not fire")} disabled>
|
||||
Disabled button
|
||||
</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
// ─── Tooltip ────────────────────────────────────────────────────────────────
|
||||
|
||||
export const Tooltip: Story = {
|
||||
render: () => (
|
||||
<LinkButton href="/" tooltip="This is a tooltip">
|
||||
Hover me
|
||||
</LinkButton>
|
||||
),
|
||||
};
|
||||
|
||||
export const TooltipSides: Story = {
|
||||
render: () => (
|
||||
<div className="flex flex-col gap-8 p-16">
|
||||
<LinkButton href="/" tooltip="Tooltip on top" tooltipSide="top">
|
||||
top
|
||||
</LinkButton>
|
||||
<LinkButton href="/" tooltip="Tooltip on right" tooltipSide="right">
|
||||
right
|
||||
</LinkButton>
|
||||
<LinkButton href="/" tooltip="Tooltip on bottom" tooltipSide="bottom">
|
||||
bottom
|
||||
</LinkButton>
|
||||
<LinkButton href="/" tooltip="Tooltip on left" tooltipSide="left">
|
||||
left
|
||||
</LinkButton>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
|
||||
// ─── Inline in prose ────────────────────────────────────────────────────────
|
||||
|
||||
export const InlineInProse: Story = {
|
||||
render: () => (
|
||||
<p style={{ maxWidth: "36rem", lineHeight: 1.7 }}>
|
||||
Modifying embedding settings requires a full re-index of all documents and
|
||||
may take hours or days depending on corpus size.{" "}
|
||||
<LinkButton href="https://docs.onyx.app" target="_blank">
|
||||
Learn more
|
||||
</LinkButton>
|
||||
.
|
||||
</p>
|
||||
),
|
||||
};
|
||||
60
web/lib/opal/src/components/buttons/link-button/README.md
Normal file
60
web/lib/opal/src/components/buttons/link-button/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# LinkButton
|
||||
|
||||
**Import:** `import { LinkButton, type LinkButtonProps } from "@opal/components";`
|
||||
|
||||
A compact, anchor-styled link with an underlined label and a trailing external-link glyph. Intended for **inline references** — "Pricing", "Docs", "Learn more" — not for interactive surfaces that need hover backgrounds or prominence tiers. Use [`Button`](../button/README.md) for those.
|
||||
|
||||
## Architecture
|
||||
|
||||
Deliberately **does not** use `Interactive.Stateless` / `Interactive.Container`. Those primitives come with height, rounding, padding, and a colour matrix designed for clickable surfaces — all wrong for an inline text link.
|
||||
|
||||
The component renders a plain `<a>` (when given `href`) or `<button>` (when given `onClick`) with:
|
||||
- `inline-flex` so the label + icon track naturally next to surrounding prose
|
||||
- `text-text-03` that shifts to `text-text-05` on hover
|
||||
- `underline` on the label only (the icon stays non-underlined)
|
||||
- `data-disabled` driven opacity + `cursor-not-allowed` for the disabled state
|
||||
|
||||
## Props
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
|------|------|---------|-------------|
|
||||
| `children` | `string` | — | Visible label (required) |
|
||||
| `href` | `string` | — | Destination URL. Renders the component as `<a>`. |
|
||||
| `target` | `string` | — | Anchor target (e.g. `"_blank"`). Adds `rel="noopener noreferrer"` automatically when `"_blank"`. |
|
||||
| `onClick` | `() => void` | — | Click handler. Without `href`, renders the component as `<button>`. |
|
||||
| `disabled` | `boolean` | `false` | Applies disabled styling + suppresses navigation / clicks |
|
||||
| `tooltip` | `string \| RichStr` | — | Hover tooltip text. Pass `markdown(...)` for inline markdown. |
|
||||
| `tooltipSide` | `TooltipSide` | `"top"` | Tooltip placement |
|
||||
|
||||
Exactly one of `href` / `onClick` is expected. Passing both is allowed but only `href` takes effect (renders as an anchor).
|
||||
|
||||
## Usage
|
||||
|
||||
```tsx
|
||||
import { LinkButton } from "@opal/components";
|
||||
|
||||
// External link — automatic rel="noopener noreferrer"
|
||||
<LinkButton href="https://docs.onyx.app" target="_blank">
|
||||
Read the docs
|
||||
</LinkButton>
|
||||
|
||||
// Internal link
|
||||
<LinkButton href="/admin/settings">Settings</LinkButton>
|
||||
|
||||
// Button-mode (no href)
|
||||
<LinkButton onClick={openModal}>Learn more</LinkButton>
|
||||
|
||||
// Disabled
|
||||
<LinkButton href="/" disabled>
|
||||
Not available
|
||||
</LinkButton>
|
||||
|
||||
// With a tooltip
|
||||
<LinkButton
|
||||
href="/docs/pricing"
|
||||
tooltip="See plan details"
|
||||
tooltipSide="bottom"
|
||||
>
|
||||
Pricing
|
||||
</LinkButton>
|
||||
```
|
||||
117
web/lib/opal/src/components/buttons/link-button/components.tsx
Normal file
117
web/lib/opal/src/components/buttons/link-button/components.tsx
Normal file
@@ -0,0 +1,117 @@
|
||||
import "@opal/components/buttons/link-button/styles.css";
|
||||
import type { RichStr } from "@opal/types";
|
||||
import type { TooltipSide } from "@opal/components/tooltip/components";
|
||||
|
||||
// Direct file imports to avoid circular resolution through the @opal/components
|
||||
// and @opal/icons barrels, which break CJS-based test runners (jest).
|
||||
import { Tooltip } from "@opal/components/tooltip/components";
|
||||
import SvgExternalLink from "@opal/icons/external-link";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface LinkButtonProps {
|
||||
/** Visible label. Always rendered as underlined link text. */
|
||||
children: string;
|
||||
|
||||
/** Destination URL. When provided, the component renders as an `<a>`. */
|
||||
href?: string;
|
||||
|
||||
/** Anchor `target` attribute (e.g. `"_blank"`). Only meaningful with `href`. */
|
||||
target?: string;
|
||||
|
||||
/** Click handler. When provided without `href`, the component renders as a `<button>`. */
|
||||
onClick?: () => void;
|
||||
|
||||
/** Applies disabled styling + suppresses navigation/clicks. */
|
||||
disabled?: boolean;
|
||||
|
||||
/** Tooltip text shown on hover. Pass `markdown(...)` for inline markdown. */
|
||||
tooltip?: string | RichStr;
|
||||
|
||||
/** Which side the tooltip appears on. @default "top" */
|
||||
tooltipSide?: TooltipSide;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LinkButton
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A bare, anchor-styled link with a trailing external-link glyph. Renders
|
||||
* as `<a>` when given `href`, or `<button>` when given `onClick`. Intended
|
||||
* for inline references — "Pricing", "Docs", etc. — not for interactive
|
||||
* surfaces that need hover backgrounds or prominence tiers (use `Button`
|
||||
* for those).
|
||||
*
|
||||
* Deliberately does NOT use `Interactive.Stateless` / `Interactive.Container`
|
||||
* — those come with height/rounding/padding and a colour matrix that are
|
||||
* wrong for an inline text link. Styling is kept to: underlined label,
|
||||
* small external-link icon, a subtle color shift on hover, and disabled
|
||||
* opacity.
|
||||
*/
|
||||
function LinkButton({
|
||||
children,
|
||||
href,
|
||||
target,
|
||||
onClick,
|
||||
disabled,
|
||||
tooltip,
|
||||
tooltipSide = "top",
|
||||
}: LinkButtonProps) {
|
||||
const inner = (
|
||||
<>
|
||||
<span className="opal-link-button-label font-secondary-body">
|
||||
{children}
|
||||
</span>
|
||||
<SvgExternalLink size={12} />
|
||||
</>
|
||||
);
|
||||
|
||||
// Always stop propagation so clicks don't bubble to interactive ancestors
|
||||
// (cards, list rows, etc. that commonly wrap a LinkButton). If disabled,
|
||||
// also preventDefault on anchors so the browser doesn't navigate.
|
||||
const handleAnchorClick = (e: React.MouseEvent<HTMLAnchorElement>) => {
|
||||
e.stopPropagation();
|
||||
if (disabled) e.preventDefault();
|
||||
};
|
||||
|
||||
const handleButtonClick = (e: React.MouseEvent<HTMLButtonElement>) => {
|
||||
e.stopPropagation();
|
||||
if (disabled) return;
|
||||
onClick?.();
|
||||
};
|
||||
|
||||
const element = href ? (
|
||||
<a
|
||||
className="opal-link-button"
|
||||
href={disabled ? undefined : href}
|
||||
target={target}
|
||||
rel={target === "_blank" ? "noopener noreferrer" : undefined}
|
||||
aria-disabled={disabled || undefined}
|
||||
data-disabled={disabled || undefined}
|
||||
onClick={handleAnchorClick}
|
||||
>
|
||||
{inner}
|
||||
</a>
|
||||
) : (
|
||||
<button
|
||||
type="button"
|
||||
className="opal-link-button"
|
||||
onClick={handleButtonClick}
|
||||
disabled={disabled}
|
||||
data-disabled={disabled || undefined}
|
||||
>
|
||||
{inner}
|
||||
</button>
|
||||
);
|
||||
|
||||
return (
|
||||
<Tooltip tooltip={tooltip} side={tooltipSide}>
|
||||
{element}
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
export { LinkButton, type LinkButtonProps };
|
||||
31
web/lib/opal/src/components/buttons/link-button/styles.css
Normal file
31
web/lib/opal/src/components/buttons/link-button/styles.css
Normal file
@@ -0,0 +1,31 @@
|
||||
/* ============================================================================
|
||||
LinkButton — a bare anchor-style link with a trailing external-link icon.
|
||||
|
||||
Intentionally does NOT use `Interactive.Stateless` / `Interactive.Container`.
|
||||
Styling is minimal: inline-flex, underlined label, subtle color shift on
|
||||
hover, disabled opacity. The icon inherits the parent's text color via
|
||||
`currentColor` so `text-text-03` on the root cascades through.
|
||||
============================================================================ */
|
||||
|
||||
.opal-link-button {
|
||||
@apply inline-flex flex-row items-center gap-0.5 text-text-03;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
padding: 0;
|
||||
transition: color 150ms ease-out;
|
||||
}
|
||||
|
||||
.opal-link-button:hover:not([data-disabled]) {
|
||||
@apply text-text-05;
|
||||
}
|
||||
|
||||
.opal-link-button[data-disabled] {
|
||||
@apply opacity-50 cursor-not-allowed;
|
||||
}
|
||||
|
||||
/* `font-secondary-body` is a plain CSS class defined in globals.css (not a
|
||||
Tailwind utility), so `@apply` can't consume it — other Opal components
|
||||
attach it via `className` on the JSX element, and we do the same here. */
|
||||
.opal-link-button-label {
|
||||
@apply underline;
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { Meta, StoryObj } from "@storybook/react";
|
||||
import { Card } from "@opal/components";
|
||||
import { useState } from "react";
|
||||
import { Button, Card } from "@opal/components";
|
||||
|
||||
const BACKGROUND_VARIANTS = ["none", "light", "heavy"] as const;
|
||||
const BORDER_VARIANTS = ["none", "dashed", "solid"] as const;
|
||||
@@ -100,3 +101,83 @@ export const AllCombinations: Story = {
|
||||
</div>
|
||||
),
|
||||
};
|
||||
|
||||
// ─── Expandable mode ─────────────────────────────────────────────────────────
|
||||
|
||||
export const Expandable: Story = {
|
||||
render: function ExpandableStory() {
|
||||
const [open, setOpen] = useState(false);
|
||||
return (
|
||||
<div className="w-96">
|
||||
<Card
|
||||
expandable
|
||||
expanded={open}
|
||||
border="solid"
|
||||
expandedContent={
|
||||
<div className="flex flex-col gap-2">
|
||||
<p>First model</p>
|
||||
<p>Second model</p>
|
||||
<p>Third model</p>
|
||||
</div>
|
||||
}
|
||||
>
|
||||
<Button
|
||||
prominence="tertiary"
|
||||
width="full"
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
>
|
||||
Toggle (expanded={String(open)})
|
||||
</Button>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export const ExpandableNoContent: Story = {
|
||||
render: function ExpandableNoContentStory() {
|
||||
const [open, setOpen] = useState(false);
|
||||
return (
|
||||
<div className="w-96">
|
||||
<Card expandable expanded={open} border="solid">
|
||||
<Button
|
||||
prominence="tertiary"
|
||||
width="full"
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
>
|
||||
Toggle (no content — renders like a plain card)
|
||||
</Button>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export const ExpandableRoundingVariants: Story = {
|
||||
render: function ExpandableRoundingStory() {
|
||||
const [openKey, setOpenKey] =
|
||||
useState<(typeof ROUNDING_VARIANTS)[number]>("md");
|
||||
return (
|
||||
<div className="flex flex-col gap-4 w-96">
|
||||
{ROUNDING_VARIANTS.map((rounding) => (
|
||||
<Card
|
||||
key={rounding}
|
||||
expandable
|
||||
expanded={openKey === rounding}
|
||||
rounding={rounding}
|
||||
border="solid"
|
||||
expandedContent={<p>content for rounding={rounding}</p>}
|
||||
>
|
||||
<Button
|
||||
prominence="tertiary"
|
||||
width="full"
|
||||
onClick={() => setOpenKey(rounding)}
|
||||
>
|
||||
rounding={rounding} (click to expand)
|
||||
</Button>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -2,11 +2,36 @@
|
||||
|
||||
**Import:** `import { Card, type CardProps } from "@opal/components";`
|
||||
|
||||
A plain container component with configurable background, border, padding, and rounding. Uses a simple `<div>` internally with `overflow-clip`.
|
||||
A container component with configurable background, border, padding, and rounding. Has two mutually-exclusive modes:
|
||||
|
||||
## Architecture
|
||||
- **Plain** (default) — renders children inside a single styled `<div>`.
|
||||
- **Expandable** (`expandable: true`) — renders children as an always-visible header plus an `expandedContent` prop that animates open/closed.
|
||||
|
||||
Padding and rounding are controlled independently:
|
||||
## Plain mode
|
||||
|
||||
Default behavior — a plain container.
|
||||
|
||||
```tsx
|
||||
import { Card } from "@opal/components";
|
||||
|
||||
<Card padding="md" border="solid">
|
||||
<p>Hello</p>
|
||||
</Card>
|
||||
```
|
||||
|
||||
### Plain mode props
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
|------|------|---------|-------------|
|
||||
| `padding` | `PaddingVariants` | `"md"` | Padding preset |
|
||||
| `rounding` | `RoundingVariants` | `"md"` | Border-radius preset |
|
||||
| `background` | `"none" \| "light" \| "heavy"` | `"light"` | Background fill intensity |
|
||||
| `border` | `"none" \| "dashed" \| "solid"` | `"none"` | Border style |
|
||||
| `borderColor` | `StatusVariants` | `"default"` | Status-palette border color (needs `border` ≠ `"none"`) |
|
||||
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
|
||||
| `children` | `React.ReactNode` | — | Card content |
|
||||
|
||||
### Padding scale
|
||||
|
||||
| `padding` | Class |
|
||||
|-----------|---------|
|
||||
@@ -17,6 +42,8 @@ Padding and rounding are controlled independently:
|
||||
| `"2xs"` | `p-0.5` |
|
||||
| `"fit"` | `p-0` |
|
||||
|
||||
### Rounding scale
|
||||
|
||||
| `rounding` | Class |
|
||||
|------------|--------------|
|
||||
| `"xs"` | `rounded-04` |
|
||||
@@ -24,40 +51,92 @@ Padding and rounding are controlled independently:
|
||||
| `"md"` | `rounded-12` |
|
||||
| `"lg"` | `rounded-16` |
|
||||
|
||||
## Props
|
||||
## Expandable mode
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
|------|------|---------|-------------|
|
||||
| `padding` | `PaddingVariants` | `"sm"` | Padding preset |
|
||||
| `rounding` | `RoundingVariants` | `"md"` | Border-radius preset |
|
||||
| `background` | `"none" \| "light" \| "heavy"` | `"light"` | Background fill intensity |
|
||||
| `border` | `"none" \| "dashed" \| "solid"` | `"none"` | Border style |
|
||||
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
|
||||
| `children` | `React.ReactNode` | — | Card content |
|
||||
|
||||
## Usage
|
||||
Enabled by passing `expandable: true`. The type is a discriminated union — `expanded` and `expandedContent` are only available (and type-checked) when `expandable: true`.
|
||||
|
||||
```tsx
|
||||
import { Card } from "@opal/components";
|
||||
import { useState } from "react";
|
||||
|
||||
// Default card (light background, no border, sm padding, md rounding)
|
||||
<Card>
|
||||
<h2>Card Title</h2>
|
||||
<p>Card content</p>
|
||||
</Card>
|
||||
function ProviderCard() {
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
// Large padding + rounding with solid border
|
||||
<Card padding="lg" rounding="lg" border="solid">
|
||||
<p>Spacious card</p>
|
||||
</Card>
|
||||
|
||||
// Compact card with solid border
|
||||
<Card padding="xs" rounding="sm" border="solid">
|
||||
<p>Compact card</p>
|
||||
</Card>
|
||||
|
||||
// Empty state card
|
||||
<Card background="none" border="dashed">
|
||||
<p>No items yet</p>
|
||||
</Card>
|
||||
return (
|
||||
<Card
|
||||
expandable
|
||||
expanded={open}
|
||||
expandedContent={<ModelList />}
|
||||
border="solid"
|
||||
rounding="lg"
|
||||
>
|
||||
{/* always visible — the header region */}
|
||||
<div
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
className="flex items-center justify-between cursor-pointer"
|
||||
>
|
||||
<ProviderInfo />
|
||||
<SvgChevronDown
|
||||
className={cn("transition-transform", open && "rotate-180")}
|
||||
/>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
### Expandable mode props
|
||||
|
||||
Everything from plain mode, **plus**:
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
|------|------|---------|-------------|
|
||||
| `expandable` | `true` | — | Required to enable the expandable variant |
|
||||
| `expanded` | `boolean` | `false` | Controlled expanded state. Card never mutates this. |
|
||||
| `expandedContent` | `React.ReactNode` | — | The body that animates open/closed below the header |
|
||||
|
||||
### Behavior
|
||||
|
||||
- **No trigger baked in.** Card does not attach any click handlers. Callers wire their own `onClick` / keyboard / button / etc. to toggle state. This keeps `padding` semantics consistent across modes and avoids surprises with interactive children.
|
||||
- **Always controlled.** `expanded` is a pure one-way visual prop. There is no `defaultExpanded` or `onExpandChange` — the caller owns state entirely (`useState` at the call site).
|
||||
- **No React context.** The component renders a flat tree; there are no compound sub-components (`Card.Header` / `Card.Content`) and no exported context hooks.
|
||||
- **Rounding adapts automatically.** When `expanded && expandedContent !== undefined`, the header's bottom corners flatten and the content's top corners flatten so they meet seamlessly. When collapsed (or when `expandedContent` is undefined), the header is fully rounded.
|
||||
- **Content background is always transparent.** The `background` prop applies to the header only; the content slot never fills its own background so the page shows through and keeps the two regions visually distinct.
|
||||
- **Content has no intrinsic padding.** The `padding` prop applies to the header only. Callers own any padding inside whatever they pass to `expandedContent` — wrap it in a `<div className="p-4">` (or whatever) if you want spacing.
|
||||
- **Animation.** Content uses a pure CSS grid `0fr ↔ 1fr` animation with an opacity fade (~200ms ease-out). No `@radix-ui/react-collapsible` dependency.
|
||||
|
||||
### Accessibility
|
||||
|
||||
Because Card doesn't own the trigger, it also doesn't generate IDs or ARIA attributes. Consumers are responsible for wiring `aria-expanded`, `aria-controls`, `aria-labelledby`, etc. on their trigger element.
|
||||
|
||||
## Complete prop reference
|
||||
|
||||
```ts
|
||||
type CardBaseProps = {
|
||||
padding?: PaddingVariants;
|
||||
rounding?: RoundingVariants;
|
||||
background?: "none" | "light" | "heavy";
|
||||
border?: "none" | "dashed" | "solid";
|
||||
borderColor?: StatusVariants;
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
children?: React.ReactNode;
|
||||
};
|
||||
|
||||
type CardPlainProps = CardBaseProps & { expandable?: false };
|
||||
|
||||
type CardExpandableProps = CardBaseProps & {
|
||||
expandable: true;
|
||||
expanded?: boolean;
|
||||
expandedContent?: React.ReactNode;
|
||||
};
|
||||
|
||||
type CardProps = CardPlainProps | CardExpandableProps;
|
||||
```
|
||||
|
||||
The discriminated union enforces:
|
||||
|
||||
```tsx
|
||||
<Card expanded>…</Card> // ❌ TS error — `expanded` not in plain mode
|
||||
<Card expandable expandedContent={…}>…</Card> // ✅ expandable mode
|
||||
<Card border="solid">…</Card> // ✅ plain mode
|
||||
```
|
||||
|
||||
@@ -1,6 +1,17 @@
|
||||
import "@opal/components/cards/shared.css";
|
||||
import "@opal/components/cards/card/styles.css";
|
||||
import type { PaddingVariants, RoundingVariants } from "@opal/types";
|
||||
import { paddingVariants, cardRoundingVariants } from "@opal/shared";
|
||||
import type {
|
||||
PaddingVariants,
|
||||
RoundingVariants,
|
||||
SizeVariants,
|
||||
StatusVariants,
|
||||
} from "@opal/types";
|
||||
import {
|
||||
paddingVariants,
|
||||
cardRoundingVariants,
|
||||
cardTopRoundingVariants,
|
||||
cardBottomRoundingVariants,
|
||||
} from "@opal/shared";
|
||||
import { cn } from "@opal/utils";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -10,7 +21,10 @@ import { cn } from "@opal/utils";
|
||||
type BackgroundVariant = "none" | "light" | "heavy";
|
||||
type BorderVariant = "none" | "dashed" | "solid";
|
||||
|
||||
type CardProps = {
|
||||
/**
|
||||
* Props shared by both plain and expandable Card modes.
|
||||
*/
|
||||
type CardBaseProps = {
|
||||
/**
|
||||
* Padding preset.
|
||||
*
|
||||
@@ -23,6 +37,10 @@ type CardProps = {
|
||||
* | `"2xs"` | `p-0.5` |
|
||||
* | `"fit"` | `p-0` |
|
||||
*
|
||||
* In expandable mode, applied **only** to the header region. The
|
||||
* `expandedContent` slot has no intrinsic padding — callers own any padding
|
||||
* inside the content they pass in.
|
||||
*
|
||||
* @default "md"
|
||||
*/
|
||||
padding?: PaddingVariants;
|
||||
@@ -37,6 +55,10 @@ type CardProps = {
|
||||
* | `"md"` | `rounded-12` |
|
||||
* | `"lg"` | `rounded-16` |
|
||||
*
|
||||
* In expandable mode when expanded, rounding applies only to the header's
|
||||
* top corners and the expandedContent's bottom corners so the two join seamlessly.
|
||||
* When collapsed, rounding applies to all four corners of the header.
|
||||
*
|
||||
* @default "md"
|
||||
*/
|
||||
rounding?: RoundingVariants;
|
||||
@@ -61,35 +83,177 @@ type CardProps = {
|
||||
*/
|
||||
border?: BorderVariant;
|
||||
|
||||
/**
|
||||
* Border color, drawn from the same status palette as {@link MessageCard}.
|
||||
* Has no visual effect when `border="none"`.
|
||||
*
|
||||
* @default "default"
|
||||
*/
|
||||
borderColor?: StatusVariants;
|
||||
|
||||
/** Ref forwarded to the root `<div>`. */
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
|
||||
/**
|
||||
* In plain mode, the card body. In expandable mode, the always-visible
|
||||
* header region (the part that stays put whether expanded or collapsed).
|
||||
*/
|
||||
children?: React.ReactNode;
|
||||
};
|
||||
|
||||
type CardPlainProps = CardBaseProps & {
|
||||
/**
|
||||
* When `false` (or omitted), renders a plain card — same behavior as before
|
||||
* this prop existed. No fold behavior, no `expandedContent` slot.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
expandable?: false;
|
||||
};
|
||||
|
||||
type CardExpandableProps = CardBaseProps & {
|
||||
/**
|
||||
* Enables the expandable variant. Renders `children` as the always-visible
|
||||
* header and `expandedContent` as the body that animates open/closed based on
|
||||
* `expanded`.
|
||||
*/
|
||||
expandable: true;
|
||||
|
||||
/**
|
||||
* Controlled expanded state. The caller owns the state and any trigger
|
||||
* (click-to-toggle) — Card is purely visual and never mutates this value.
|
||||
*
|
||||
* @default false
|
||||
*/
|
||||
expanded?: boolean;
|
||||
|
||||
/**
|
||||
* The expandable body. Rendered below the header, animating open/closed
|
||||
* when `expanded` changes. If `undefined`, the card behaves visually like
|
||||
* a plain card (no divider, no bottom slot).
|
||||
*/
|
||||
expandedContent?: React.ReactNode;
|
||||
|
||||
/**
|
||||
* Max-height constraint on the expandable content area.
|
||||
* - `"md"` (default): caps at 20rem with vertical scroll.
|
||||
* - `"fit"`: no max-height — content takes its natural height.
|
||||
*
|
||||
* @default "md"
|
||||
*/
|
||||
expandableContentHeight?: Extract<SizeVariants, "md" | "fit">;
|
||||
};
|
||||
|
||||
type CardProps = CardPlainProps | CardExpandableProps;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Card
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function Card({
|
||||
padding: paddingProp = "md",
|
||||
rounding: roundingProp = "md",
|
||||
background = "light",
|
||||
border = "none",
|
||||
ref,
|
||||
children,
|
||||
}: CardProps) {
|
||||
/**
|
||||
* A container with configurable background, border, padding, and rounding.
|
||||
*
|
||||
* Has two mutually-exclusive modes:
|
||||
*
|
||||
* - **Plain** (default): renders `children` inside a single styled `<div>`.
|
||||
* Same shape as the original Card.
|
||||
*
|
||||
* - **Expandable** (`expandable: true`): renders `children` as the header
|
||||
* region and the `expandedContent` prop as an animating body below. Fold state is
|
||||
* fully controlled via the `expanded` prop — Card does not own state and
|
||||
* does not wire a click trigger. Callers attach their own
|
||||
* `onClick={() => setExpanded(v => !v)}` to whatever element they want to
|
||||
* act as the toggle.
|
||||
*
|
||||
* @example Plain
|
||||
* ```tsx
|
||||
* <Card padding="md" border="solid">
|
||||
* <p>Hello</p>
|
||||
* </Card>
|
||||
* ```
|
||||
*
|
||||
* @example Expandable, controlled
|
||||
* ```tsx
|
||||
* const [open, setOpen] = useState(false);
|
||||
* <Card
|
||||
* expandable
|
||||
* expanded={open}
|
||||
* expandedContent={<ModelList />}
|
||||
* border="solid"
|
||||
* >
|
||||
* <button onClick={() => setOpen(v => !v)}>Toggle</button>
|
||||
* </Card>
|
||||
* ```
|
||||
*/
|
||||
function Card(props: CardProps) {
|
||||
const {
|
||||
padding: paddingProp = "md",
|
||||
rounding: roundingProp = "md",
|
||||
background = "light",
|
||||
border = "none",
|
||||
borderColor = "default",
|
||||
ref,
|
||||
children,
|
||||
} = props;
|
||||
|
||||
const padding = paddingVariants[paddingProp];
|
||||
const rounding = cardRoundingVariants[roundingProp];
|
||||
|
||||
// Plain mode — unchanged behavior
|
||||
if (!props.expandable) {
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
className={cn("opal-card", padding, cardRoundingVariants[roundingProp])}
|
||||
data-background={background}
|
||||
data-border={border}
|
||||
data-opal-status-border={borderColor}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Expandable mode
|
||||
const {
|
||||
expanded = false,
|
||||
expandedContent,
|
||||
expandableContentHeight = "md",
|
||||
} = props;
|
||||
const showContent = expanded && expandedContent !== undefined;
|
||||
const headerRounding = showContent
|
||||
? cardTopRoundingVariants[roundingProp]
|
||||
: cardRoundingVariants[roundingProp];
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
className={cn("opal-card", padding, rounding)}
|
||||
data-background={background}
|
||||
data-border={border}
|
||||
>
|
||||
{children}
|
||||
<div ref={ref} className="opal-card-expandable">
|
||||
<div
|
||||
className={cn("opal-card-expandable-header", padding, headerRounding)}
|
||||
data-background={background}
|
||||
data-border={border}
|
||||
data-opal-status-border={borderColor}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
{expandedContent !== undefined && (
|
||||
<div
|
||||
className="opal-card-expandable-wrapper"
|
||||
data-expanded={showContent ? "true" : "false"}
|
||||
>
|
||||
<div className="opal-card-expandable-inner">
|
||||
<div
|
||||
className={cn(
|
||||
"opal-card-expandable-body",
|
||||
cardBottomRoundingVariants[roundingProp]
|
||||
)}
|
||||
data-border={border}
|
||||
data-opal-status-border={borderColor}
|
||||
data-content-height={expandableContentHeight}
|
||||
>
|
||||
{expandedContent}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
/* ============================================================================
|
||||
Plain Card
|
||||
============================================================================ */
|
||||
|
||||
.opal-card {
|
||||
@apply w-full overflow-clip;
|
||||
}
|
||||
@@ -15,7 +19,8 @@
|
||||
@apply bg-background-tint-01;
|
||||
}
|
||||
|
||||
/* Border variants */
|
||||
/* Border variants. Border *color* lives in `cards/shared.css` and is keyed
|
||||
off the `data-opal-status-border` attribute. */
|
||||
.opal-card[data-border="none"] {
|
||||
border: none;
|
||||
}
|
||||
@@ -27,3 +32,101 @@
|
||||
.opal-card[data-border="solid"] {
|
||||
@apply border;
|
||||
}
|
||||
|
||||
/* ============================================================================
|
||||
Expandable Card
|
||||
|
||||
Structure:
|
||||
.opal-card-expandable (flex-col wrapper, no styling)
|
||||
.opal-card-expandable-header (bg + border + padding + rounding)
|
||||
.opal-card-expandable-wrapper (grid animation, overflow clip)
|
||||
.opal-card-expandable-inner (min-h:0, overflow clip for grid)
|
||||
.opal-card-expandable-body (bg + border-minus-top + padding)
|
||||
|
||||
Animation: pure CSS grid `0fr ↔ 1fr` with opacity fade on the wrapper.
|
||||
No JS state machine, no Radix.
|
||||
============================================================================ */
|
||||
|
||||
.opal-card-expandable {
|
||||
@apply w-full flex flex-col;
|
||||
}
|
||||
|
||||
/* ── Header ──────────────────────────────────────────────────────────── */
|
||||
|
||||
.opal-card-expandable-header {
|
||||
@apply w-full overflow-clip transition-[border-radius] duration-200 ease-out;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-background="none"] {
|
||||
@apply bg-transparent;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-background="light"] {
|
||||
@apply bg-background-tint-00;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-background="heavy"] {
|
||||
@apply bg-background-tint-01;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-border="none"] {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-border="dashed"] {
|
||||
@apply border border-dashed;
|
||||
}
|
||||
|
||||
.opal-card-expandable-header[data-border="solid"] {
|
||||
@apply border;
|
||||
}
|
||||
|
||||
/* ── Content wrapper: grid 0fr↔1fr animation ─────────────────────────── */
|
||||
|
||||
.opal-card-expandable-wrapper {
|
||||
display: grid;
|
||||
grid-template-rows: 0fr;
|
||||
opacity: 0;
|
||||
overflow: hidden;
|
||||
transition:
|
||||
grid-template-rows 200ms ease-out,
|
||||
opacity 200ms ease-out;
|
||||
}
|
||||
|
||||
.opal-card-expandable-wrapper[data-expanded="true"] {
|
||||
grid-template-rows: 1fr;
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* ── Content inner: clips the grid child so it collapses to 0 cleanly ── */
|
||||
|
||||
.opal-card-expandable-inner {
|
||||
@apply overflow-hidden;
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
/* ── Content body: carries border + padding. Background is always
|
||||
transparent so the page background shows through the content slot,
|
||||
keeping it visually distinct from the header. ─ */
|
||||
|
||||
.opal-card-expandable-body {
|
||||
@apply w-full bg-transparent;
|
||||
}
|
||||
|
||||
.opal-card-expandable-body[data-content-height="md"] {
|
||||
@apply max-h-[20rem] overflow-y-auto;
|
||||
}
|
||||
|
||||
/* "fit" = no constraint; natural height, no scroll. */
|
||||
|
||||
.opal-card-expandable-body[data-border="none"] {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.opal-card-expandable-body[data-border="dashed"] {
|
||||
@apply border border-t-0 border-dashed;
|
||||
}
|
||||
|
||||
.opal-card-expandable-body[data-border="solid"] {
|
||||
@apply border border-t-0;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { Meta, StoryObj } from "@storybook/react";
|
||||
import { EmptyMessageCard } from "@opal/components";
|
||||
import { SvgSparkle, SvgUsers } from "@opal/icons";
|
||||
import { SvgActions, SvgServer, SvgSparkle, SvgUsers } from "@opal/icons";
|
||||
|
||||
const PADDING_VARIANTS = ["fit", "2xs", "xs", "sm", "md", "lg"] as const;
|
||||
|
||||
@@ -26,6 +26,22 @@ export const WithCustomIcon: Story = {
|
||||
},
|
||||
};
|
||||
|
||||
export const MainUi: Story = {
|
||||
args: {
|
||||
sizePreset: "main-ui",
|
||||
title: "No Actions Found",
|
||||
icon: SvgActions,
|
||||
description: "Provide OpenAPI schema to preview actions here.",
|
||||
},
|
||||
};
|
||||
|
||||
export const MainUiNoDescription: Story = {
|
||||
args: {
|
||||
sizePreset: "main-ui",
|
||||
title: "No Knowledge",
|
||||
},
|
||||
};
|
||||
|
||||
export const PaddingVariants: Story = {
|
||||
render: () => (
|
||||
<div className="flex flex-col gap-4 w-96">
|
||||
@@ -46,6 +62,12 @@ export const Multiple: Story = {
|
||||
<EmptyMessageCard title="No models available." />
|
||||
<EmptyMessageCard icon={SvgSparkle} title="No agents selected." />
|
||||
<EmptyMessageCard icon={SvgUsers} title="No groups added." />
|
||||
<EmptyMessageCard
|
||||
sizePreset="main-ui"
|
||||
icon={SvgServer}
|
||||
title="No Discord servers configured yet"
|
||||
description="Create a server configuration to get started."
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
|
||||
@@ -6,25 +6,44 @@ A pre-configured Card for empty states. Renders a transparent card with a dashed
|
||||
|
||||
## Props
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
| --------- | --------------------------- | ---------- | -------------------------------- |
|
||||
| `icon` | `IconFunctionComponent` | `SvgEmpty` | Icon displayed alongside the title |
|
||||
| `title` | `string` | — | Primary message text (required) |
|
||||
| `padding` | `PaddingVariants` | `"sm"` | Padding preset for the card |
|
||||
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
|
||||
### Base props (all presets)
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
| ------------ | ----------------------------- | ------------- | ---------------------------------- |
|
||||
| `sizePreset` | `"secondary" \| "main-ui"` | `"secondary"` | Controls layout and text sizing |
|
||||
| `icon` | `IconFunctionComponent` | `SvgEmpty` | Icon displayed alongside the title |
|
||||
| `title` | `string \| RichStr` | — | Primary message text (required) |
|
||||
| `padding` | `PaddingVariants` | `"md"` | Padding preset for the card |
|
||||
| `ref` | `React.Ref<HTMLDivElement>` | — | Ref forwarded to the root div |
|
||||
|
||||
### `sizePreset="main-ui"` only
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
| ------------- | ------------------- | ------- | ------------------------ |
|
||||
| `description` | `string \| RichStr` | — | Optional description text |
|
||||
|
||||
> `description` is **not accepted** when `sizePreset` is `"secondary"` (the default).
|
||||
|
||||
## Usage
|
||||
|
||||
```tsx
|
||||
import { EmptyMessageCard } from "@opal/components";
|
||||
import { SvgSparkle, SvgFileText } from "@opal/icons";
|
||||
import { SvgSparkle, SvgFileText, SvgActions } from "@opal/icons";
|
||||
|
||||
// Default empty state
|
||||
// Default empty state (secondary)
|
||||
<EmptyMessageCard title="No items yet." />
|
||||
|
||||
// With custom icon
|
||||
<EmptyMessageCard icon={SvgSparkle} title="No agents selected." />
|
||||
|
||||
// With custom padding
|
||||
// main-ui with description
|
||||
<EmptyMessageCard
|
||||
sizePreset="main-ui"
|
||||
icon={SvgActions}
|
||||
title="No Actions Found"
|
||||
description="Provide OpenAPI schema to preview actions here."
|
||||
/>
|
||||
|
||||
// Custom padding
|
||||
<EmptyMessageCard padding="xs" icon={SvgFileText} title="No documents available." />
|
||||
```
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Card } from "@opal/components/cards/card/components";
|
||||
import { Content, SizePreset } from "@opal/layouts";
|
||||
import { Content } from "@opal/layouts";
|
||||
import { SvgEmpty } from "@opal/icons";
|
||||
import type {
|
||||
IconFunctionComponent,
|
||||
@@ -32,7 +32,7 @@ type EmptyMessageCardProps =
|
||||
})
|
||||
| (EmptyMessageCardBaseProps & {
|
||||
sizePreset: "main-ui";
|
||||
/** Description text. Only supported when `sizePreset` is `"main-ui"`. */
|
||||
/** Optional description text. */
|
||||
description?: string | RichStr;
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import "@opal/components/cards/shared.css";
|
||||
import "@opal/components/cards/message-card/styles.css";
|
||||
import { cn } from "@opal/utils";
|
||||
import type { RichStr, IconFunctionComponent } from "@opal/types";
|
||||
import type {
|
||||
IconFunctionComponent,
|
||||
PaddingVariants,
|
||||
RichStr,
|
||||
StatusVariants,
|
||||
} from "@opal/types";
|
||||
import { paddingVariants } from "@opal/shared";
|
||||
import { ContentAction } from "@opal/layouts";
|
||||
import { Button, Divider } from "@opal/components";
|
||||
import {
|
||||
@@ -15,11 +22,9 @@ import {
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type MessageCardVariant = "default" | "info" | "success" | "warning" | "error";
|
||||
|
||||
interface MessageCardBaseProps {
|
||||
/** Visual variant controlling background, border, and icon. @default "default" */
|
||||
variant?: MessageCardVariant;
|
||||
variant?: StatusVariants;
|
||||
|
||||
/** Override the default variant icon. */
|
||||
icon?: IconFunctionComponent;
|
||||
@@ -30,6 +35,9 @@ interface MessageCardBaseProps {
|
||||
/** Optional description below the title. */
|
||||
description?: string | RichStr;
|
||||
|
||||
/** Padding preset. @default "sm" */
|
||||
padding?: Extract<PaddingVariants, "sm" | "xs">;
|
||||
|
||||
/**
|
||||
* Content rendered below a divider, under the main content area.
|
||||
* When provided, a `Divider` is inserted between the `ContentAction` and this node.
|
||||
@@ -59,7 +67,7 @@ type MessageCardProps = MessageCardBaseProps &
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const VARIANT_CONFIG: Record<
|
||||
MessageCardVariant,
|
||||
StatusVariants,
|
||||
{ icon: IconFunctionComponent; iconClass: string }
|
||||
> = {
|
||||
default: { icon: SvgAlertCircle, iconClass: "stroke-text-03" },
|
||||
@@ -113,6 +121,7 @@ function MessageCard({
|
||||
icon: iconOverride,
|
||||
title,
|
||||
description,
|
||||
padding = "sm",
|
||||
bottomChildren,
|
||||
rightChildren,
|
||||
onClose,
|
||||
@@ -134,7 +143,11 @@ function MessageCard({
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="opal-message-card" data-variant={variant} ref={ref}>
|
||||
<div
|
||||
className={cn("opal-message-card", paddingVariants[padding])}
|
||||
data-variant={variant}
|
||||
ref={ref}
|
||||
>
|
||||
<ContentAction
|
||||
icon={(props) => (
|
||||
<Icon {...props} className={cn(props.className, iconClass)} />
|
||||
@@ -143,7 +156,7 @@ function MessageCard({
|
||||
description={description}
|
||||
sizePreset="main-ui"
|
||||
variant="section"
|
||||
paddingVariant="lg"
|
||||
paddingVariant="md"
|
||||
rightChildren={right}
|
||||
/>
|
||||
|
||||
@@ -157,4 +170,4 @@ function MessageCard({
|
||||
);
|
||||
}
|
||||
|
||||
export { MessageCard, type MessageCardProps, type MessageCardVariant };
|
||||
export { MessageCard, type MessageCardProps };
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
.opal-message-card {
|
||||
@apply flex flex-col self-stretch rounded-16 border p-2;
|
||||
@apply flex flex-col w-full self-stretch rounded-16 border;
|
||||
}
|
||||
|
||||
/* Variant colors */
|
||||
|
||||
32
web/lib/opal/src/components/cards/shared.css
Normal file
32
web/lib/opal/src/components/cards/shared.css
Normal file
@@ -0,0 +1,32 @@
|
||||
/* ============================================================================
|
||||
Shared status-border palette for card components.
|
||||
|
||||
Sets the `border-color` per `StatusVariants` value. Components opt in by
|
||||
setting the `data-opal-status-border` attribute on their root element
|
||||
(and, separately, declaring an actual border via Tailwind's `border` /
|
||||
`border-dashed` utility).
|
||||
|
||||
Used by:
|
||||
- Card (`borderColor` prop)
|
||||
- MessageCard (`variant` prop, in addition to its own background)
|
||||
============================================================================ */
|
||||
|
||||
[data-opal-status-border="default"] {
|
||||
@apply border-border-01;
|
||||
}
|
||||
|
||||
[data-opal-status-border="info"] {
|
||||
@apply border-status-info-02;
|
||||
}
|
||||
|
||||
[data-opal-status-border="success"] {
|
||||
@apply border-status-success-02;
|
||||
}
|
||||
|
||||
[data-opal-status-border="warning"] {
|
||||
@apply border-status-warning-02;
|
||||
}
|
||||
|
||||
[data-opal-status-border="error"] {
|
||||
@apply border-status-error-02;
|
||||
}
|
||||
@@ -42,6 +42,12 @@ export {
|
||||
type SidebarTabProps,
|
||||
} from "@opal/components/buttons/sidebar-tab/components";
|
||||
|
||||
/* LinkButton */
|
||||
export {
|
||||
LinkButton,
|
||||
type LinkButtonProps,
|
||||
} from "@opal/components/buttons/link-button/components";
|
||||
|
||||
/* Text */
|
||||
export {
|
||||
Text,
|
||||
@@ -87,7 +93,6 @@ export {
|
||||
export {
|
||||
MessageCard,
|
||||
type MessageCardProps,
|
||||
type MessageCardVariant,
|
||||
} from "@opal/components/cards/message-card/components";
|
||||
|
||||
/* Pagination */
|
||||
|
||||
@@ -1,48 +1,62 @@
|
||||
import { Content, type ContentProps } from "@opal/layouts/content/components";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type CardHeaderProps = ContentProps & {
|
||||
/** Content rendered to the right of the Content block. */
|
||||
rightChildren?: React.ReactNode;
|
||||
interface CardHeaderProps {
|
||||
/** Content rendered in the top-left header slot — typically a {@link Content} block. */
|
||||
headerChildren?: React.ReactNode;
|
||||
|
||||
/** Content rendered below `rightChildren` in the same column. */
|
||||
/** Content rendered to the right of `headerChildren` (top of right column). */
|
||||
topRightChildren?: React.ReactNode;
|
||||
|
||||
/** Content rendered below `topRightChildren`, in the same column. */
|
||||
bottomRightChildren?: React.ReactNode;
|
||||
|
||||
/**
|
||||
* Content rendered below the header row, full-width.
|
||||
* Use for expandable sections, search bars, or any content
|
||||
* that should appear beneath the icon/title/actions row.
|
||||
* Content rendered below the entire header (left + right columns),
|
||||
* spanning the full width. Use for expandable sections, search bars, or
|
||||
* any content that should appear beneath the icon/title/actions row.
|
||||
*/
|
||||
children?: React.ReactNode;
|
||||
};
|
||||
bottomChildren?: React.ReactNode;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Card.Header
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A card header layout that pairs a {@link Content} block (with `p-2`)
|
||||
* with a right-side column.
|
||||
* A card header layout with three optional slots arranged in two independent
|
||||
* columns, plus a full-width `bottomChildren` slot below.
|
||||
*
|
||||
* The right column contains two vertically stacked slots —
|
||||
* `rightChildren` on top, `bottomRightChildren` below — with no
|
||||
* padding or gap between them.
|
||||
* ```
|
||||
* +------------------+----------------+
|
||||
* | headerChildren | topRight |
|
||||
* + +----------------+
|
||||
* | | bottomRight |
|
||||
* +------------------+----------------+
|
||||
* | bottomChildren (full width) |
|
||||
* +-----------------------------------+
|
||||
* ```
|
||||
*
|
||||
* The optional `children` slot renders below the full header row,
|
||||
* spanning the entire width.
|
||||
* The left column grows to fill available space; the right column shrinks
|
||||
* to fit its content. The two columns are independent in height.
|
||||
*
|
||||
* For the typical icon/title/description pattern, pass a {@link Content}
|
||||
* (or {@link ContentAction}) into `headerChildren`.
|
||||
*
|
||||
* @example
|
||||
* ```tsx
|
||||
* <Card.Header
|
||||
* icon={SvgGlobe}
|
||||
* title="Google"
|
||||
* description="Search engine"
|
||||
* sizePreset="main-ui"
|
||||
* variant="section"
|
||||
* rightChildren={<Button>Connect</Button>}
|
||||
* headerChildren={
|
||||
* <Content
|
||||
* icon={SvgGlobe}
|
||||
* title="Google"
|
||||
* description="Search engine"
|
||||
* sizePreset="main-ui"
|
||||
* variant="section"
|
||||
* />
|
||||
* }
|
||||
* topRightChildren={<Button>Connect</Button>}
|
||||
* bottomRightChildren={
|
||||
* <>
|
||||
* <Button icon={SvgUnplug} size="sm" prominence="tertiary" />
|
||||
@@ -53,29 +67,29 @@ type CardHeaderProps = ContentProps & {
|
||||
* ```
|
||||
*/
|
||||
function Header({
|
||||
rightChildren,
|
||||
headerChildren,
|
||||
topRightChildren,
|
||||
bottomRightChildren,
|
||||
children,
|
||||
...contentProps
|
||||
bottomChildren,
|
||||
}: CardHeaderProps) {
|
||||
const hasRight = rightChildren || bottomRightChildren;
|
||||
const hasRight = topRightChildren != null || bottomRightChildren != null;
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-full">
|
||||
<div className="flex flex-row items-stretch w-full">
|
||||
<div className="flex-1 min-w-0 self-start p-2">
|
||||
<Content {...contentProps} />
|
||||
</div>
|
||||
<div className="flex flex-row items-start w-full">
|
||||
{headerChildren != null && (
|
||||
<div className="self-start p-2 grow min-w-0">{headerChildren}</div>
|
||||
)}
|
||||
{hasRight && (
|
||||
<div className="flex flex-col items-end shrink-0">
|
||||
{rightChildren && <div className="flex-1">{rightChildren}</div>}
|
||||
{bottomRightChildren && (
|
||||
{topRightChildren != null && <div>{topRightChildren}</div>}
|
||||
{bottomRightChildren != null && (
|
||||
<div className="flex flex-row">{bottomRightChildren}</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{children && <div className="w-full">{children}</div>}
|
||||
{bottomChildren != null && <div className="w-full">{bottomChildren}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -134,6 +134,20 @@ const cardRoundingVariants: Record<RoundingVariants, string> = {
|
||||
xs: "rounded-04",
|
||||
};
|
||||
|
||||
const cardTopRoundingVariants: Record<RoundingVariants, string> = {
|
||||
lg: "rounded-t-16",
|
||||
md: "rounded-t-12",
|
||||
sm: "rounded-t-08",
|
||||
xs: "rounded-t-04",
|
||||
};
|
||||
|
||||
const cardBottomRoundingVariants: Record<RoundingVariants, string> = {
|
||||
lg: "rounded-b-16",
|
||||
md: "rounded-b-12",
|
||||
sm: "rounded-b-08",
|
||||
xs: "rounded-b-04",
|
||||
};
|
||||
|
||||
export {
|
||||
type ExtremaSizeVariants,
|
||||
type ContainerSizeVariants,
|
||||
@@ -144,6 +158,8 @@ export {
|
||||
paddingXVariants,
|
||||
paddingYVariants,
|
||||
cardRoundingVariants,
|
||||
cardTopRoundingVariants,
|
||||
cardBottomRoundingVariants,
|
||||
widthVariants,
|
||||
heightVariants,
|
||||
};
|
||||
|
||||
@@ -81,6 +81,22 @@ export type ExtremaSizeVariants = Extract<SizeVariants, "fit" | "full">;
|
||||
*/
|
||||
export type OverridableExtremaSizeVariants = ExtremaSizeVariants | number;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status Variants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Severity / status variants used by alert-style components (e.g. {@link
|
||||
* MessageCard}, {@link Card}'s `borderColor`). Each variant maps to a
|
||||
* dedicated background/border/icon palette in the design system.
|
||||
*/
|
||||
export type StatusVariants =
|
||||
| "default"
|
||||
| "info"
|
||||
| "success"
|
||||
| "warning"
|
||||
| "error";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Icon Props
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -152,6 +152,11 @@ const nextConfig = {
|
||||
destination: "/ee/agents/:path*",
|
||||
permanent: true,
|
||||
},
|
||||
{
|
||||
source: "/admin/configuration/llm",
|
||||
destination: "/admin/configuration/language-models",
|
||||
permanent: true,
|
||||
},
|
||||
];
|
||||
},
|
||||
};
|
||||
|
||||
@@ -5,7 +5,6 @@ import { useSearchParams, useRouter } from "next/navigation";
|
||||
import { mutate } from "swr";
|
||||
import * as SettingsLayouts from "@/layouts/settings-layouts";
|
||||
import { Section } from "@/layouts/general-layouts";
|
||||
import Button from "@/refresh-components/buttons/Button";
|
||||
import Text from "@/refresh-components/texts/Text";
|
||||
import { SvgArrowUpCircle, SvgWallet } from "@opal/icons";
|
||||
import type { IconProps } from "@opal/types";
|
||||
@@ -19,7 +18,7 @@ import {
|
||||
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
|
||||
import { SWR_KEYS } from "@/lib/swr-keys";
|
||||
import { useUser } from "@/providers/UserProvider";
|
||||
import { MessageCard } from "@opal/components";
|
||||
import { LinkButton, MessageCard } from "@opal/components";
|
||||
|
||||
import PlansView from "./PlansView";
|
||||
import CheckoutView from "./CheckoutView";
|
||||
@@ -72,25 +71,10 @@ function FooterLinks({
|
||||
<Text secondaryBody text03>
|
||||
Have a license key?
|
||||
</Text>
|
||||
{/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
|
||||
<Button action tertiary onClick={onActivateLicense}>
|
||||
<Text secondaryBody text05 className="underline">
|
||||
{licenseText}
|
||||
</Text>
|
||||
</Button>
|
||||
<LinkButton onClick={onActivateLicense}>{licenseText}</LinkButton>
|
||||
</>
|
||||
)}
|
||||
{/* TODO(@raunakab): migrate to opal Button once className/iconClassName is resolved */}
|
||||
<Button
|
||||
action
|
||||
tertiary
|
||||
href={billingHelpHref}
|
||||
className="billing-text-link"
|
||||
>
|
||||
<Text secondaryBody text03 className="underline">
|
||||
Billing Help
|
||||
</Text>
|
||||
</Button>
|
||||
<LinkButton href={billingHelpHref}>Billing Help</LinkButton>
|
||||
</Section>
|
||||
);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user