Compare commits

..

19 Commits

Author SHA1 Message Date
Dane Urban
037540225c . 2026-03-05 15:18:39 -08:00
Dane Urban
1be71fd2af comment 2026-03-05 15:05:23 -08:00
Dane Urban
8bef7ab8fb . 2026-03-05 15:00:57 -08:00
Dane Urban
8ca7c1af5a . 2026-03-05 14:55:50 -08:00
Dane Urban
089fc6ed3e .: 2026-03-05 14:52:41 -08:00
Dane Urban
a25079ac23 . 2026-03-05 14:48:08 -08:00
Dane Urban
00dca7c3ec . 2026-03-05 14:08:23 -08:00
Dane Urban
a8c7b322cb . 2026-03-05 13:51:52 -08:00
Dane Urban
e549944bce . 2026-03-05 13:05:48 -08:00
Dane Urban
723637f379 . 2026-03-05 13:03:47 -08:00
Dane Urban
e9913876c0 Comment 2026-03-05 13:00:55 -08:00
Dane Urban
20948e2ea3 Fixes 2026-03-05 12:53:05 -08:00
Dane Urban
5dcbc91643 . 2026-03-05 12:42:38 -08:00
Dane Urban
c8e874df49 Make stateless 2026-03-05 11:19:38 -08:00
Dane Urban
b1a6a08eed Remove comments 2026-03-05 10:46:37 -08:00
Dane Urban
ec3e571a7f Fixes 2026-03-05 10:44:05 -08:00
Dane Urban
b03a0f8cac Fixes 2026-03-05 10:43:50 -08:00
Dane Urban
8163ca704a . 2026-03-05 10:37:57 -08:00
Dane Urban
b7abf3991a Stream argument packets 2026-03-05 10:10:54 -08:00
36 changed files with 1180 additions and 2126 deletions

15
.vscode/launch.json vendored
View File

@@ -485,6 +485,21 @@
"group": "3"
}
},
{
"name": "Clear and Restart OpenSearch Container",
// Generic debugger type, required arg but has no bearing on bash.
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": [
"${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Eval CLI",
"type": "debugpy",

View File

@@ -39,13 +39,9 @@ CT = TypeVar("CT", bound=ConnectorCheckpoint)
class SlimConnectorExtractionResult(BaseModel):
"""Result of extracting document IDs and hierarchy nodes from a connector.
"""Result of extracting document IDs and hierarchy nodes from a connector."""
raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
"""
raw_id_to_parent: dict[str, str | None]
doc_ids: set[str]
hierarchy_nodes: list[HierarchyNode]
@@ -97,37 +93,30 @@ def _get_failure_id(failure: ConnectorFailure) -> str | None:
return None
class BatchResult(BaseModel):
raw_id_to_parent: dict[str, str | None]
hierarchy_nodes: list[HierarchyNode]
def _extract_from_batch(
doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
) -> BatchResult:
"""Separate a batch into document IDs (with parent mapping) and hierarchy nodes.
) -> tuple[set[str], list[HierarchyNode]]:
"""Separate a batch into document IDs and hierarchy nodes.
ConnectorFailure items have their failed document/entity IDs added to the
ID dict so that failed-to-retrieve documents are not accidentally pruned.
ID set so that failed-to-retrieve documents are not accidentally pruned.
"""
ids: dict[str, str | None] = {}
ids: set[str] = set()
hierarchy_nodes: list[HierarchyNode] = []
for item in doc_list:
if isinstance(item, HierarchyNode):
hierarchy_nodes.append(item)
if item.raw_node_id not in ids:
ids[item.raw_node_id] = None
ids.add(item.raw_node_id)
elif isinstance(item, ConnectorFailure):
failed_id = _get_failure_id(item)
if failed_id:
ids[failed_id] = None
ids.add(failed_id)
logger.warning(
f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
)
else:
parent_raw = getattr(item, "parent_hierarchy_raw_node_id", None)
ids[item.id] = parent_raw
return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)
ids.add(item.id)
return ids, hierarchy_nodes
def extract_ids_from_runnable_connector(
@@ -143,7 +132,7 @@ def extract_ids_from_runnable_connector(
Optionally, a callback can be passed to handle the length of each document batch.
"""
all_raw_id_to_parent: dict[str, str | None] = {}
all_connector_doc_ids: set[str] = set()
all_hierarchy_nodes: list[HierarchyNode] = []
# Sequence (covariant) lets all the specific list[...] iterator types unify here
@@ -188,20 +177,15 @@ def extract_ids_from_runnable_connector(
"extract_ids_from_runnable_connector: Stop signal detected"
)
batch_result = _extract_from_batch(doc_list)
batch_ids = batch_result.raw_id_to_parent
batch_nodes = batch_result.hierarchy_nodes
doc_batch_processing_func(batch_ids)
for k, v in batch_ids.items():
if v is not None or k not in all_raw_id_to_parent:
all_raw_id_to_parent[k] = v
batch_ids, batch_nodes = _extract_from_batch(doc_list)
all_connector_doc_ids.update(doc_batch_processing_func(batch_ids))
all_hierarchy_nodes.extend(batch_nodes)
if callback:
callback.progress("extract_ids_from_runnable_connector", len(batch_ids))
return SlimConnectorExtractionResult(
raw_id_to_parent=all_raw_id_to_parent,
doc_ids=all_connector_doc_ids,
hierarchy_nodes=all_hierarchy_nodes,
)

View File

@@ -29,7 +29,6 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -48,8 +47,6 @@ from onyx.db.enums import AccessType
from onyx.db.enums import ConnectorCredentialPairStatus
from onyx.db.enums import SyncStatus
from onyx.db.enums import SyncType
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import ConnectorCredentialPair
from onyx.db.sync_record import insert_sync_record
@@ -60,8 +57,6 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
from onyx.redis.redis_hierarchy import ensure_source_node_exists
from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
from onyx.redis.redis_pool import get_redis_client
from onyx.redis.redis_pool import get_redis_replica_client
@@ -118,38 +113,6 @@ class PruneCallback(IndexingCallbackBase):
super().progress(tag, amount)
def _resolve_and_update_document_parents(
db_session: Session,
redis_client: Redis,
source: DocumentSource,
raw_id_to_parent: dict[str, str | None],
) -> None:
"""Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
each document and bulk-update the DB. Mirrors the resolution logic in
run_docfetching.py."""
source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent_id in raw_id_to_parent.items():
if raw_parent_id is None:
continue
node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
resolved[doc_id] = node_id if found else source_node_id
if not resolved:
return
update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
task_logger.info(
f"Pruning: resolved and updated parent hierarchy for "
f"{len(resolved)} documents (source={source.value})"
)
"""Jobs / utils for kicking off pruning tasks."""
@@ -572,22 +535,22 @@ def connector_pruning_generator_task(
extraction_result = extract_ids_from_runnable_connector(
runnable_connector, callback
)
all_connector_doc_ids = extraction_result.raw_id_to_parent
all_connector_doc_ids = extraction_result.doc_ids
# Process hierarchy nodes (same as docfetching):
# upsert to Postgres and cache in Redis
source = cc_pair.connector.source
redis_client = get_redis_client(tenant_id=tenant_id)
if extraction_result.hierarchy_nodes:
is_connector_public = cc_pair.access_type == AccessType.PUBLIC
ensure_source_node_exists(redis_client, db_session, source)
redis_client = get_redis_client(tenant_id=tenant_id)
ensure_source_node_exists(
redis_client, db_session, cc_pair.connector.source
)
upserted_nodes = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=extraction_result.hierarchy_nodes,
source=source,
source=cc_pair.connector.source,
commit=True,
is_connector_public=is_connector_public,
)
@@ -598,7 +561,7 @@ def connector_pruning_generator_task(
]
cache_hierarchy_nodes_batch(
redis_client=redis_client,
source=source,
source=cc_pair.connector.source,
entries=cache_entries,
)
@@ -607,26 +570,6 @@ def connector_pruning_generator_task(
f"hierarchy nodes for cc_pair={cc_pair_id}"
)
ensure_source_node_exists(redis_client, db_session, source)
# Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
# and bulk-update documents, mirroring the docfetching resolution
_resolve_and_update_document_parents(
db_session=db_session,
redis_client=redis_client,
source=source,
raw_id_to_parent=all_connector_doc_ids,
)
# Link hierarchy nodes to documents for sources where pages can be
# both hierarchy nodes AND documents (e.g. Notion, Confluence)
all_doc_id_list = list(all_connector_doc_ids.keys())
link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=all_doc_id_list,
source=source,
commit=True,
)
# a list of docs in our local index
all_indexed_document_ids = {
doc.id
@@ -638,9 +581,7 @@ def connector_pruning_generator_task(
}
# generate list of docs to remove (no longer in the source)
doc_ids_to_remove = list(
all_indexed_document_ids - all_connector_doc_ids.keys()
)
doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
task_logger.info(
"Pruning set collected: "

View File

@@ -15,6 +15,7 @@ from onyx.chat.citation_processor import DynamicCitationProcessor
from onyx.chat.emitter import Emitter
from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import LlmStepResult
from onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta
from onyx.configs.app_configs import LOG_ONYX_MODEL_INTERACTIONS
from onyx.configs.app_configs import PROMPT_CACHE_CHAT_HISTORY
from onyx.configs.constants import MessageType
@@ -1018,6 +1019,7 @@ def run_llm_step_pkt_generator(
)
id_to_tool_call_map: dict[int, dict[str, Any]] = {}
arg_scan_offsets: dict[int, int] = {}
reasoning_start = False
answer_start = False
accumulated_reasoning = ""
@@ -1224,7 +1226,14 @@ def run_llm_step_pkt_generator(
yield from _close_reasoning_if_active()
for tool_call_delta in delta.tool_calls:
# maybe_emit depends and update being called first and attaching the delta
_update_tool_call_with_delta(id_to_tool_call_map, tool_call_delta)
yield from maybe_emit_argument_delta(
tool_calls_in_progress=id_to_tool_call_map,
tool_call_delta=tool_call_delta,
placement=_current_placement(),
scan_offsets=arg_scan_offsets,
)
# Flush any tail text buffered while checking for split "<function_calls" markers.
filtered_content_tail = xml_tool_call_content_filter.flush()

View File

@@ -0,0 +1,236 @@
import json
from collections.abc import Generator
from collections.abc import Mapping
from typing import Any
from typing import NamedTuple
from typing import Type
from onyx.llm.model_response import ChatCompletionDeltaToolCall
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
from onyx.tools.built_in_tools import TOOL_NAME_TO_CLASS
from onyx.tools.interface import Tool
from onyx.utils.logger import setup_logger
logger = setup_logger()
def _get_tool_class(
tool_calls_in_progress: Mapping[int, Mapping[str, Any]],
tool_call_delta: ChatCompletionDeltaToolCall,
) -> Type[Tool] | None:
"""Look up the Tool subclass for a streaming tool call delta."""
tool_name = tool_calls_in_progress.get(tool_call_delta.index, {}).get("name")
if not tool_name:
return None
return TOOL_NAME_TO_CLASS.get(tool_name)
class _Token(NamedTuple):
"""A parsed JSON string with position info."""
value: str # raw content between the quotes
start: int # index of first char inside the quotes
end: int # index of closing quote, or len(text) if incomplete
complete: bool # whether the closing quote was found
def _parse_json_string(text: str, pos: int) -> _Token:
"""Parse a JSON string starting at the opening quote at ``pos``."""
i = pos + 1
while i < len(text):
if text[i] == "\\":
i += 2
elif text[i] == '"':
return _Token(text[pos + 1 : i], pos + 1, i, complete=True)
else:
i += 1
return _Token(text[pos + 1 :], pos + 1, len(text), complete=False)
def _skip_json_value(text: str, pos: int) -> int:
"""Skip past a non-string JSON value (number, bool, null, array, object).
Tracks ``[]`` / ``{}`` nesting depth and skips over embedded strings so
that internal commas and braces don't terminate the scan early. Stops
at the next top-level ``,`` or ``}`` (not consumed).
"""
depth = 0
while pos < len(text):
ch = text[pos]
if ch == '"':
tok = _parse_json_string(text, pos)
pos = tok.end + 1 if tok.complete else tok.end
continue
if ch in ("{", "["):
depth += 1
elif ch in ("}", "]"):
if depth == 0:
break
depth -= 1
elif ch == "," and depth == 0:
break
pos += 1
return pos
def _skip(text: str, pos: int, chars: str = " \t\n\r,") -> int:
"""Advance ``pos`` past any characters in ``chars``."""
while pos < len(text) and text[pos] in chars:
pos += 1
return pos
def _decode_partial_json_string(raw: str) -> str:
"""Decode JSON escapes (``\\n`` → newline) from a possibly incomplete value.
Progressively trims up to 6 trailing chars to handle partial escape
sequences (the longest JSON escape is ``\\uXXXX``).
"""
for trim in range(min(7, len(raw) + 1)):
candidate = raw[: len(raw) - trim] if trim else raw
try:
result = json.loads('"' + candidate + '"')
if trim > 0 and not result and raw:
logger.warning(
"Dropped %d chars from partial JSON string value (trim=%d)",
len(raw),
trim,
)
return result
except (json.JSONDecodeError, ValueError):
continue
logger.warning(
"Failed to decode partial JSON string value; dropping %d chars", len(raw)
)
return ""
def _extract_delta_args(
pre: str, delta: str, scan_offset: int = 0
) -> tuple[dict[str, str], int]:
"""Extract decoded argument values contributed by ``delta``.
Walks ``pre + delta`` as a partial JSON object (``{"k": "v", ...}``),
and for each string value returns only the decoded content that falls
within the ``delta`` portion. Escape sequences that straddle the
boundary are handled correctly.
Returns ``(argument_deltas, next_scan_offset)`` where
``next_scan_offset`` should be passed to the next call to skip
completed key-value pairs, reducing cost from O(accumulated) to
O(delta) per call.
"""
full = pre + delta
delta_start = len(pre)
result: dict[str, str] = {}
if scan_offset > 0:
pos = scan_offset
else:
pos = full.find("{")
if pos == -1:
return result, 0
pos += 1
resume = pos
while pos < len(full):
pos = _skip(full, pos)
if pos >= len(full) or full[pos] == "}":
break
resume = pos # remember start of this key-value pair
# Key
if full[pos] != '"':
break
key = _parse_json_string(full, pos)
if not key.complete:
break
pos = key.end + 1
# Colon
pos = _skip(full, pos, " \t\n\r")
if pos >= len(full) or full[pos] != ":":
break
pos += 1
# Value
pos = _skip(full, pos, " \t\n\r")
if pos >= len(full):
break
if full[pos] != '"':
# Skip non-string values (number, boolean, null, array, object).
# They are available in the final tool-call kickoff packet;
# emitting them here as strings would be ambiguous for consumers
# (e.g. the number 30 vs the string "30").
pos = _skip_json_value(full, pos)
continue
val = _parse_json_string(full, pos)
# Only include the portion of this value that overlaps with delta
lo = max(val.start, delta_start)
hi = val.end
if lo < hi:
# Decode from value start through both boundaries so escape
# sequences straddling the delta edge are handled correctly.
decoded_before = _decode_partial_json_string(full[val.start : lo])
decoded_through = _decode_partial_json_string(full[val.start : hi])
new_content = decoded_through[len(decoded_before) :]
if new_content:
result[key.value] = new_content
if not val.complete:
break
pos = val.end + 1
return result, resume
def maybe_emit_argument_delta(
tool_calls_in_progress: Mapping[int, Mapping[str, Any]],
tool_call_delta: ChatCompletionDeltaToolCall,
placement: Placement,
scan_offsets: dict[int, int],
) -> Generator[Packet, None, None]:
"""Emit decoded tool-call argument deltas to the frontend.
NOTE: Currently skips non-string arguments
``scan_offsets`` is a mutable dict keyed by tool-call index that allows
each call to skip past already-processed key-value pairs, reducing
per-call cost from O(accumulated) to O(delta).
"""
tool_cls = _get_tool_class(tool_calls_in_progress, tool_call_delta)
if not tool_cls or not tool_cls.do_emit_argument_deltas():
return
fn = tool_call_delta.function
delta_fragment = fn.arguments if fn else None
if not delta_fragment:
return
tc_data = tool_calls_in_progress[tool_call_delta.index]
accumulated_args = tc_data["arguments"]
prev_args = accumulated_args[: -len(delta_fragment)]
idx = tool_call_delta.index
offset = scan_offsets.get(idx, 0)
argument_deltas, new_offset = _extract_delta_args(prev_args, delta_fragment, offset)
scan_offsets[idx] = new_offset
if not argument_deltas:
return
yield Packet(
placement=placement,
obj=ToolCallArgumentDelta(
tool_type=tc_data.get("name", ""),
tool_id=tc_data.get("id", ""),
argument_deltas=argument_deltas,
),
)

View File

@@ -943,9 +943,6 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
page
),
)
)
@@ -995,7 +992,6 @@ class ConfluenceConnector(
if include_permissions
else None
),
parent_hierarchy_raw_node_id=page_id,
)
)

View File

@@ -781,5 +781,4 @@ def build_slim_document(
return SlimDocument(
id=onyx_document_id_from_drive_file(file),
external_access=external_access,
parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
)

View File

@@ -902,11 +902,6 @@ class JiraConnector(
external_access=self._get_project_permissions(
project_key, add_prefix=False
),
parent_hierarchy_raw_node_id=(
self._get_parent_hierarchy_raw_node_id(issue, project_key)
if project_key
else None
),
)
)
current_offset += 1

View File

@@ -385,7 +385,6 @@ class IndexingDocument(Document):
class SlimDocument(BaseModel):
id: str
external_access: ExternalAccess | None = None
parent_hierarchy_raw_node_id: str | None = None
class HierarchyNode(BaseModel):

View File

@@ -772,7 +772,6 @@ def _convert_driveitem_to_slim_document(
drive_name: str,
ctx: ClientContext,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
) -> SlimDocument:
if driveitem.id is None:
raise ValueError("DriveItem ID is required")
@@ -788,15 +787,11 @@ def _convert_driveitem_to_slim_document(
return SlimDocument(
id=driveitem.id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
def _convert_sitepage_to_slim_document(
site_page: dict[str, Any],
ctx: ClientContext | None,
graph_client: GraphClient,
parent_hierarchy_raw_node_id: str | None = None,
site_page: dict[str, Any], ctx: ClientContext | None, graph_client: GraphClient
) -> SlimDocument:
"""Convert a SharePoint site page to a SlimDocument object."""
if site_page.get("id") is None:
@@ -813,7 +808,6 @@ def _convert_sitepage_to_slim_document(
return SlimDocument(
id=id,
external_access=external_access,
parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
)
@@ -1600,22 +1594,12 @@ class SharepointConnector(
)
)
parent_hierarchy_url: str | None = None
if drive_web_url:
parent_hierarchy_url = self._get_parent_hierarchy_url(
site_url, drive_web_url, drive_name, driveitem
)
try:
logger.debug(f"Processing: {driveitem.web_url}")
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_driveitem_to_slim_document(
driveitem,
drive_name,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=parent_hierarchy_url,
driveitem, drive_name, ctx, self.graph_client
)
)
except Exception as e:
@@ -1635,10 +1619,7 @@ class SharepointConnector(
ctx = self._create_rest_client_context(site_descriptor.url)
doc_batch.append(
_convert_sitepage_to_slim_document(
site_page,
ctx,
self.graph_client,
parent_hierarchy_raw_node_id=site_descriptor.url,
site_page, ctx, self.graph_client
)
)
if len(doc_batch) >= SLIM_BATCH_SIZE:

View File

@@ -565,7 +565,6 @@ def _get_all_doc_ids(
channel_id=channel_id, thread_ts=message["ts"]
),
external_access=external_access,
parent_hierarchy_raw_node_id=channel_id,
)
)

View File

@@ -1,7 +1,5 @@
"""CRUD operations for HierarchyNode."""
from collections import defaultdict
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -527,53 +525,6 @@ def get_document_parent_hierarchy_node_ids(
return {doc_id: parent_id for doc_id, parent_id in results}
def update_document_parent_hierarchy_nodes(
db_session: Session,
doc_parent_map: dict[str, int | None],
commit: bool = True,
) -> int:
"""Bulk-update Document.parent_hierarchy_node_id for multiple documents.
Only updates rows whose current value differs from the desired value to
avoid unnecessary writes.
Args:
db_session: SQLAlchemy session
doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
commit: Whether to commit the transaction
Returns:
Number of documents actually updated
"""
if not doc_parent_map:
return 0
doc_ids = list(doc_parent_map.keys())
existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)
by_parent: dict[int | None, list[str]] = defaultdict(list)
for doc_id, desired_parent_id in doc_parent_map.items():
current = existing.get(doc_id)
if current == desired_parent_id or doc_id not in existing:
continue
by_parent[desired_parent_id].append(doc_id)
updated = 0
for desired_parent_id, ids in by_parent.items():
db_session.query(Document).filter(Document.id.in_(ids)).update(
{Document.parent_hierarchy_node_id: desired_parent_id},
synchronize_session=False,
)
updated += len(ids)
if commit:
db_session.commit()
elif updated:
db_session.flush()
return updated
def update_hierarchy_node_permissions(
db_session: Session,
raw_node_id: str,

View File

@@ -129,7 +129,7 @@ def get_current_search_settings(db_session: Session) -> SearchSettings:
latest_settings = result.scalars().first()
if not latest_settings:
raise RuntimeError("No search settings specified; DB is not in a valid state.")
raise RuntimeError("No search settings specified, DB is not in a valid state")
return latest_settings

View File

@@ -32,6 +32,9 @@ def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
Determines whether to enable multipass and large chunks by examining
the current search settings and the embedder configuration.
"""
if not search_settings:
return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
multipass = should_use_multipass(search_settings)
enable_large_chunks = SearchSettings.can_use_large_chunks(
multipass, search_settings.model_name, search_settings.provider_type

View File

@@ -26,10 +26,11 @@ def get_default_document_index(
To be used for retrieval only. Indexing should be done through both indices
until Vespa is deprecated.
Pre-existing docstring for this function, although secondary indices are not
currently supported:
Primary index is the index that is used for querying/updating etc. Secondary
index is for when both the currently used index and the upcoming index both
need to be updated. Updates are applied to both indices.
WARNING: In that case, get_all_document_indices should be used.
need to be updated, updates are applied to both indices.
"""
if DISABLE_VECTOR_DB:
return DisabledDocumentIndex(
@@ -50,26 +51,11 @@ def get_default_document_index(
opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
if opensearch_retrieval_enabled:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
return OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=secondary_index_name,
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=secondary_large_chunks_enabled,
multitenant=MULTI_TENANT,
@@ -100,7 +86,8 @@ def get_all_document_indices(
Used for indexing only. Until Vespa is deprecated we will index into both
document indices. Retrieval is done through only one index however.
Large chunks are not currently supported so we hardcode appropriate values.
Large chunks and secondary indices are not currently supported so we
hardcode appropriate values.
NOTE: Make sure the Vespa index object is returned first. In the rare event
that there is some conflict between indexing and the migration task, it is
@@ -136,36 +123,13 @@ def get_all_document_indices(
opensearch_document_index: OpenSearchOldDocumentIndex | None = None
if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
indexing_setting = IndexingSetting.from_db_model(search_settings)
secondary_indexing_setting = (
IndexingSetting.from_db_model(secondary_search_settings)
if secondary_search_settings
else None
)
opensearch_document_index = OpenSearchOldDocumentIndex(
index_name=search_settings.index_name,
embedding_dim=indexing_setting.final_embedding_dim,
embedding_precision=indexing_setting.embedding_precision,
secondary_index_name=(
secondary_search_settings.index_name
if secondary_search_settings
else None
),
secondary_embedding_dim=(
secondary_indexing_setting.final_embedding_dim
if secondary_indexing_setting
else None
),
secondary_embedding_precision=(
secondary_indexing_setting.embedding_precision
if secondary_indexing_setting
else None
),
large_chunks_enabled=search_settings.large_chunks_enabled,
secondary_large_chunks_enabled=(
secondary_search_settings.large_chunks_enabled
if secondary_search_settings
else None
),
secondary_index_name=None,
large_chunks_enabled=False,
secondary_large_chunks_enabled=None,
multitenant=MULTI_TENANT,
httpx_client=httpx_client,
)

View File

@@ -271,9 +271,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
embedding_dim: int,
embedding_precision: EmbeddingPrecision,
secondary_index_name: str | None,
secondary_embedding_dim: int | None,
secondary_embedding_precision: EmbeddingPrecision | None,
# NOTE: We do not support large chunks right now.
large_chunks_enabled: bool, # noqa: ARG002
secondary_large_chunks_enabled: bool | None, # noqa: ARG002
multitenant: bool = False,
@@ -289,25 +286,12 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
f"Expected {MULTI_TENANT}, got {multitenant}."
)
tenant_id = get_current_tenant_id()
tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
self._real_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
index_name=index_name,
embedding_dim=embedding_dim,
embedding_precision=embedding_precision,
)
self._secondary_real_index: OpenSearchDocumentIndex | None = None
if self.secondary_index_name:
if secondary_embedding_dim is None or secondary_embedding_precision is None:
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
self._secondary_real_index = OpenSearchDocumentIndex(
tenant_state=tenant_state,
index_name=self.secondary_index_name,
embedding_dim=secondary_embedding_dim,
embedding_precision=secondary_embedding_precision,
)
@staticmethod
def register_multitenant_indices(
@@ -323,38 +307,19 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
self,
primary_embedding_dim: int,
primary_embedding_precision: EmbeddingPrecision,
secondary_index_embedding_dim: int | None,
secondary_index_embedding_precision: EmbeddingPrecision | None,
secondary_index_embedding_dim: int | None, # noqa: ARG002
secondary_index_embedding_precision: EmbeddingPrecision | None, # noqa: ARG002
) -> None:
self._real_index.verify_and_create_index_if_necessary(
# Only handle primary index for now, ignore secondary.
return self._real_index.verify_and_create_index_if_necessary(
primary_embedding_dim, primary_embedding_precision
)
if self.secondary_index_name:
if (
secondary_index_embedding_dim is None
or secondary_index_embedding_precision is None
):
raise ValueError(
"Bug: Secondary index embedding dimension and precision are not set."
)
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.verify_and_create_index_if_necessary(
secondary_index_embedding_dim, secondary_index_embedding_precision
)
def index(
self,
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
# Convert IndexBatchParams to IndexingMetadata.
chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
@@ -386,20 +351,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
tenant_id: str, # noqa: ARG002
chunk_count: int | None,
) -> int:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
total_chunks_deleted += self._secondary_real_index.delete(
doc_id, chunk_count
)
return total_chunks_deleted
return self._real_index.delete(doc_id, chunk_count)
def update_single(
self,
@@ -410,11 +362,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
fields: VespaDocumentFields | None,
user_fields: VespaDocumentUserFields | None,
) -> None:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
f"Tried to update document {doc_id} with no updated fields or user fields."
@@ -445,11 +392,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
try:
self._real_index.update([update_request])
if self.secondary_index_name:
assert (
self._secondary_real_index is not None
), "Bug: Secondary index is not initialized."
self._secondary_real_index.update([update_request])
except NotFoundError:
logger.exception(
f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "

View File

@@ -465,12 +465,6 @@ class VespaIndex(DocumentIndex):
chunks: list[DocMetadataAwareIndexChunk],
index_batch_params: IndexBatchParams,
) -> set[OldDocumentInsertionRecord]:
"""
NOTE: Do NOT consider the secondary index here. A separate indexing
pipeline will be responsible for indexing to the secondary index. This
design is not ideal and we should reconsider this when revamping index
swapping.
"""
if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
index_batch_params.doc_id_to_new_chunk_cnt
):
@@ -665,10 +659,6 @@ class VespaIndex(DocumentIndex):
"""Note: if the document id does not exist, the update will be a no-op and the
function will complete with no errors or exceptions.
Handle other exceptions if you wish to implement retry behavior
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for updating chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
if fields is None and user_fields is None:
logger.warning(
@@ -689,6 +679,13 @@ class VespaIndex(DocumentIndex):
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
project_ids: set[int] | None = None
if user_fields is not None and user_fields.user_projects is not None:
project_ids = set(user_fields.user_projects)
@@ -708,20 +705,7 @@ class VespaIndex(DocumentIndex):
persona_ids=persona_ids,
)
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
vespa_document_index.update([update_request])
vespa_document_index.update([update_request])
def delete_single(
self,
@@ -730,11 +714,6 @@ class VespaIndex(DocumentIndex):
tenant_id: str,
chunk_count: int | None,
) -> int:
"""
NOTE: Remember to handle the secondary index here. There is no separate
pipeline for deleting chunks in the secondary index. This design is not
ideal and we should reconsider this when revamping index swapping.
"""
tenant_state = TenantState(
tenant_id=get_current_tenant_id(),
multitenant=MULTI_TENANT,
@@ -747,25 +726,13 @@ class VespaIndex(DocumentIndex):
raise ValueError(
f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
)
indices = [self.index_name]
if self.secondary_index_name:
indices.append(self.secondary_index_name)
total_chunks_deleted = 0
for index_name in indices:
vespa_document_index = VespaDocumentIndex(
index_name=index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.index_to_large_chunks_enabled.get(
index_name, False
),
httpx_client=self.httpx_client,
)
total_chunks_deleted += vespa_document_index.delete(
document_id=doc_id, chunk_count=chunk_count
)
return total_chunks_deleted
vespa_document_index = VespaDocumentIndex(
index_name=self.index_name,
tenant_state=tenant_state,
large_chunks_enabled=self.large_chunks_enabled,
httpx_client=self.httpx_client,
)
return vespa_document_index.delete(document_id=doc_id, chunk_count=chunk_count)
def id_based_retrieval(
self,

View File

@@ -6,11 +6,8 @@ from sqlalchemy.orm import Session
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.connector_credential_pair import get_connector_credential_pairs
from onyx.db.connector_credential_pair import resync_cc_pair
from onyx.db.engine.sql_engine import get_session
from onyx.db.index_attempt import expire_index_attempts
from onyx.db.llm import fetch_existing_llm_provider
@@ -18,25 +15,20 @@ from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import update_no_default_contextual_rag_provider
from onyx.db.models import IndexModelStatus
from onyx.db.models import User
from onyx.db.search_settings import create_search_settings
from onyx.db.search_settings import delete_search_settings
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_embedding_provider_from_provider_type
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.search_settings import update_current_search_settings
from onyx.db.search_settings import update_search_settings_status
from onyx.document_index.factory import get_all_document_indices
from onyx.document_index.factory import get_default_document_index
from onyx.file_processing.unstructured import delete_unstructured_api_key
from onyx.file_processing.unstructured import get_unstructured_api_key
from onyx.file_processing.unstructured import update_unstructured_api_key
from onyx.natural_language_processing.search_nlp_models import clean_model_name
from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
from onyx.server.manage.models import FullModelVersionResponse
from onyx.server.models import IdReturn
from onyx.server.utils_vector_db import require_vector_db
from onyx.utils.logger import setup_logger
from shared_configs.configs import ALT_INDEX_SUFFIX
from shared_configs.configs import MULTI_TENANT
router = APIRouter(prefix="/search-settings")
@@ -49,99 +41,110 @@ def set_new_search_settings(
_: User = Depends(current_admin_user),
db_session: Session = Depends(get_session), # noqa: ARG001
) -> IdReturn:
"""Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
Gives an error if the same model name is used as the current or secondary index
"""
Creates a new SearchSettings row and cancels the previous secondary indexing
if any exists.
"""
if search_settings_new.index_name:
logger.warning("Index name was specified by request, this is not suggested")
# Disallow contextual RAG for cloud deployments.
if MULTI_TENANT and search_settings_new.enable_contextual_rag:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Contextual RAG disabled in Onyx Cloud",
)
# Validate cloud provider exists or create new LiteLLM provider.
if search_settings_new.provider_type is not None:
cloud_provider = get_embedding_provider_from_provider_type(
db_session, provider_type=search_settings_new.provider_type
)
if cloud_provider is None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
)
validate_contextual_rag_model(
provider_name=search_settings_new.contextual_rag_llm_provider,
model_name=search_settings_new.contextual_rag_llm_name,
db_session=db_session,
# TODO(andrei): Re-enable.
# NOTE Enable integration external dependency tests in test_search_settings.py
# when this is reenabled. They are currently skipped
logger.error("Setting new search settings is temporarily disabled.")
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="Setting new search settings is temporarily disabled.",
)
# if search_settings_new.index_name:
# logger.warning("Index name was specified by request, this is not suggested")
search_settings = get_current_search_settings(db_session)
# # Disallow contextual RAG for cloud deployments
# if MULTI_TENANT and search_settings_new.enable_contextual_rag:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail="Contextual RAG disabled in Onyx Cloud",
# )
if search_settings_new.index_name is None:
# We define index name here.
index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
if (
search_settings_new.model_name == search_settings.model_name
and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
):
index_name += ALT_INDEX_SUFFIX
search_values = search_settings_new.model_dump()
search_values["index_name"] = index_name
new_search_settings_request = SavedSearchSettings(**search_values)
else:
new_search_settings_request = SavedSearchSettings(
**search_settings_new.model_dump()
)
# # Validate cloud provider exists or create new LiteLLM provider
# if search_settings_new.provider_type is not None:
# cloud_provider = get_embedding_provider_from_provider_type(
# db_session, provider_type=search_settings_new.provider_type
# )
secondary_search_settings = get_secondary_search_settings(db_session)
# if cloud_provider is None:
# raise HTTPException(
# status_code=status.HTTP_400_BAD_REQUEST,
# detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
# )
if secondary_search_settings:
# Cancel any background indexing jobs.
expire_index_attempts(
search_settings_id=secondary_search_settings.id, db_session=db_session
)
# validate_contextual_rag_model(
# provider_name=search_settings_new.contextual_rag_llm_provider,
# model_name=search_settings_new.contextual_rag_llm_name,
# db_session=db_session,
# )
# Mark previous model as a past model directly.
update_search_settings_status(
search_settings=secondary_search_settings,
new_status=IndexModelStatus.PAST,
db_session=db_session,
)
# search_settings = get_current_search_settings(db_session)
new_search_settings = create_search_settings(
search_settings=new_search_settings_request, db_session=db_session
)
# if search_settings_new.index_name is None:
# # We define index name here
# index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
# if (
# search_settings_new.model_name == search_settings.model_name
# and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
# ):
# index_name += ALT_INDEX_SUFFIX
# search_values = search_settings_new.model_dump()
# search_values["index_name"] = index_name
# new_search_settings_request = SavedSearchSettings(**search_values)
# else:
# new_search_settings_request = SavedSearchSettings(
# **search_settings_new.model_dump()
# )
# Ensure the document indices have the new index immediately.
document_indices = get_all_document_indices(search_settings, new_search_settings)
for document_index in document_indices:
document_index.ensure_indices_exist(
primary_embedding_dim=search_settings.final_embedding_dim,
primary_embedding_precision=search_settings.embedding_precision,
secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
secondary_index_embedding_precision=new_search_settings.embedding_precision,
)
# secondary_search_settings = get_secondary_search_settings(db_session)
# Pause index attempts for the currently in-use index to preserve resources.
if DISABLE_INDEX_UPDATE_ON_SWAP:
expire_index_attempts(
search_settings_id=search_settings.id, db_session=db_session
)
for cc_pair in get_connector_credential_pairs(db_session):
resync_cc_pair(
cc_pair=cc_pair,
search_settings_id=new_search_settings.id,
db_session=db_session,
)
# if secondary_search_settings:
# # Cancel any background indexing jobs
# expire_index_attempts(
# search_settings_id=secondary_search_settings.id, db_session=db_session
# )
db_session.commit()
return IdReturn(id=new_search_settings.id)
# # Mark previous model as a past model directly
# update_search_settings_status(
# search_settings=secondary_search_settings,
# new_status=IndexModelStatus.PAST,
# db_session=db_session,
# )
# new_search_settings = create_search_settings(
# search_settings=new_search_settings_request, db_session=db_session
# )
# # Ensure Vespa has the new index immediately
# get_multipass_config(search_settings)
# get_multipass_config(new_search_settings)
# document_index = get_default_document_index(
# search_settings, new_search_settings, db_session
# )
# document_index.ensure_indices_exist(
# primary_embedding_dim=search_settings.final_embedding_dim,
# primary_embedding_precision=search_settings.embedding_precision,
# secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
# secondary_index_embedding_precision=new_search_settings.embedding_precision,
# )
# # Pause index attempts for the currently in use index to preserve resources
# if DISABLE_INDEX_UPDATE_ON_SWAP:
# expire_index_attempts(
# search_settings_id=search_settings.id, db_session=db_session
# )
# for cc_pair in get_connector_credential_pairs(db_session):
# resync_cc_pair(
# cc_pair=cc_pair,
# search_settings_id=new_search_settings.id,
# db_session=db_session,
# )
# db_session.commit()
# return IdReturn(id=new_search_settings.id)
@router.post("/cancel-new-embedding", dependencies=[Depends(require_vector_db)])

View File

@@ -1,5 +1,6 @@
import datetime
import json
import os
from collections.abc import Generator
from datetime import timedelta
from uuid import UUID
@@ -60,6 +61,7 @@ from onyx.db.persona import get_persona_by_id
from onyx.db.usage import increment_usage
from onyx.db.usage import UsageType
from onyx.db.user_file import get_file_id_by_user_file_id
from onyx.file_processing.extract_file_text import docx_to_txt_filename
from onyx.file_store.file_store import get_default_file_store
from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import get_default_llm
@@ -810,6 +812,18 @@ def fetch_chat_file(
if not file_record:
raise HTTPException(status_code=404, detail="File not found")
original_file_name = file_record.display_name
if file_record.file_type.startswith(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
):
# Check if a converted text file exists for .docx files
txt_file_name = docx_to_txt_filename(original_file_name)
txt_file_id = os.path.join(os.path.dirname(file_id), txt_file_name)
txt_file_record = file_store.read_file_record(txt_file_id)
if txt_file_record:
file_record = txt_file_record
file_id = txt_file_id
media_type = file_record.file_type
file_io = file_store.read_file(file_id, mode="b")

View File

@@ -41,6 +41,7 @@ class StreamingType(Enum):
REASONING_DONE = "reasoning_done"
CITATION_INFO = "citation_info"
TOOL_CALL_DEBUG = "tool_call_debug"
TOOL_CALL_ARGUMENT_DELTA = "tool_call_argument_delta"
MEMORY_TOOL_START = "memory_tool_start"
MEMORY_TOOL_DELTA = "memory_tool_delta"
@@ -259,6 +260,16 @@ class CustomToolDelta(BaseObj):
file_ids: list[str] | None = None
class ToolCallArgumentDelta(BaseObj):
type: Literal["tool_call_argument_delta"] = (
StreamingType.TOOL_CALL_ARGUMENT_DELTA.value
)
tool_type: str
tool_id: str
argument_deltas: dict[str, Any]
################################################
# File Reader Packets
################################################
@@ -379,6 +390,7 @@ PacketObj = Union[
# Citation Packets
CitationInfo,
ToolCallDebug,
ToolCallArgumentDelta,
# Deep Research Packets
DeepResearchPlanStart,
DeepResearchPlanDelta,

View File

@@ -56,3 +56,23 @@ def get_built_in_tool_ids() -> list[str]:
def get_built_in_tool_by_id(in_code_tool_id: str) -> Type[BUILT_IN_TOOL_TYPES]:
return BUILT_IN_TOOL_MAP[in_code_tool_id]
def _build_tool_name_to_class() -> dict[str, Type[BUILT_IN_TOOL_TYPES]]:
"""Build a mapping from LLM-facing tool name to tool class."""
result: dict[str, Type[BUILT_IN_TOOL_TYPES]] = {}
for cls in BUILT_IN_TOOL_MAP.values():
name_attr = cls.__dict__.get("name")
if isinstance(name_attr, property) and name_attr.fget is not None:
tool_name = name_attr.fget(cls)
elif isinstance(name_attr, str):
tool_name = name_attr
else:
raise ValueError(
f"Built-in tool {cls.__name__} must define a valid LLM-facing tool name"
)
result[tool_name] = cls
return result
TOOL_NAME_TO_CLASS: dict[str, Type[BUILT_IN_TOOL_TYPES]] = _build_tool_name_to_class()

View File

@@ -92,3 +92,7 @@ class Tool(abc.ABC, Generic[TOverride]):
**llm_kwargs: Any,
) -> ToolResponse:
raise NotImplementedError
@classmethod
def do_emit_argument_deltas(cls) -> bool:
return False

View File

@@ -376,3 +376,8 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
rich_response=None,
llm_facing_response=llm_response,
)
@classmethod
@override
def do_emit_argument_deltas(cls) -> bool:
return True

View File

@@ -1,20 +1,10 @@
#!/bin/bash
set -e
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
COMPOSE_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.yml"
COMPOSE_DEV_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.dev.yml"
stop_and_remove_containers() {
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled stop opensearch 2>/dev/null || true
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled rm -f opensearch 2>/dev/null || true
}
cleanup() {
echo "Error occurred. Cleaning up..."
stop_and_remove_containers
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
}
# Trap errors and output a message, then cleanup
@@ -22,26 +12,16 @@ trap 'echo "Error occurred on line $LINENO. Exiting script." >&2; cleanup' ERR
# Usage of the script with optional volume arguments
# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]
# [minio_volume] [--keep-opensearch-data]
KEEP_OPENSEARCH_DATA=false
POSITIONAL_ARGS=()
for arg in "$@"; do
if [[ "$arg" == "--keep-opensearch-data" ]]; then
KEEP_OPENSEARCH_DATA=true
else
POSITIONAL_ARGS+=("$arg")
fi
done
VESPA_VOLUME=${POSITIONAL_ARGS[0]:-""}
POSTGRES_VOLUME=${POSITIONAL_ARGS[1]:-""}
REDIS_VOLUME=${POSITIONAL_ARGS[2]:-""}
MINIO_VOLUME=${POSITIONAL_ARGS[3]:-""}
VESPA_VOLUME=${1:-""} # Default is empty if not provided
POSTGRES_VOLUME=${2:-""} # Default is empty if not provided
REDIS_VOLUME=${3:-""} # Default is empty if not provided
MINIO_VOLUME=${4:-""} # Default is empty if not provided
# Stop and remove the existing containers
echo "Stopping and removing existing containers..."
stop_and_remove_containers
docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
# Start the PostgreSQL container with optional volume
echo "Starting PostgreSQL container..."
@@ -59,29 +39,6 @@ else
docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
fi
# If OPENSEARCH_ADMIN_PASSWORD is not already set, try loading it from
# .vscode/.env so existing dev setups that stored it there aren't silently
# broken.
VSCODE_ENV="$SCRIPT_DIR/../../.vscode/.env"
if [[ -z "${OPENSEARCH_ADMIN_PASSWORD:-}" && -f "$VSCODE_ENV" ]]; then
set -a
# shellcheck source=/dev/null
source "$VSCODE_ENV"
set +a
fi
# Start the OpenSearch container using the same service from docker-compose that
# our users use, setting OPENSEARCH_INITIAL_ADMIN_PASSWORD from the env's
# OPENSEARCH_ADMIN_PASSWORD if it exists, else defaulting to StrongPassword123!.
# Pass --keep-opensearch-data to preserve the opensearch-data volume across
# restarts, else the volume is deleted so the container starts fresh.
if [[ "$KEEP_OPENSEARCH_DATA" == "false" ]]; then
echo "Deleting opensearch-data volume..."
docker volume rm onyx_opensearch-data 2>/dev/null || true
fi
echo "Starting OpenSearch container..."
docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled up --force-recreate -d opensearch
# Start the Redis container with optional volume
echo "Starting Redis container..."
if [[ -n "$REDIS_VOLUME" ]]; then
@@ -103,6 +60,7 @@ echo "Starting Code Interpreter container..."
docker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api
# Ensure alembic runs in the correct directory (backend/)
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PARENT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PARENT_DIR"

View File

@@ -0,0 +1,10 @@
#!/bin/bash
# We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
source "$(dirname "$0")/../../.vscode/.env"
cd "$(dirname "$0")/../../deployment/docker_compose"
# Start OpenSearch.
echo "Forcefully starting fresh OpenSearch container..."
docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch

View File

@@ -5,8 +5,6 @@ Verifies that:
1. extract_ids_from_runnable_connector correctly separates hierarchy nodes from doc IDs
2. Extracted hierarchy nodes are correctly upserted to Postgres via upsert_hierarchy_nodes_batch
3. Upserting is idempotent (running twice doesn't duplicate nodes)
4. Document-to-hierarchy-node linkage is updated during pruning
5. link_hierarchy_nodes_to_documents links nodes that are also documents
Uses a mock SlimConnectorWithPermSync that yields known hierarchy nodes and slim documents,
combined with a real PostgreSQL database for verifying persistence.
@@ -29,13 +27,9 @@ from onyx.db.enums import HierarchyNodeType
from onyx.db.hierarchy import ensure_source_node_exists
from onyx.db.hierarchy import get_all_hierarchy_nodes_for_source
from onyx.db.hierarchy import get_hierarchy_node_by_raw_id
from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
from onyx.db.models import Document as DbDocument
from onyx.db.models import HierarchyNode as DBHierarchyNode
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.kg.models import KGStage
# ---------------------------------------------------------------------------
# Constants
@@ -95,18 +89,8 @@ def _make_hierarchy_nodes() -> list[PydanticHierarchyNode]:
]
DOC_PARENT_MAP = {
"msg-001": CHANNEL_A_ID,
"msg-002": CHANNEL_A_ID,
"msg-003": CHANNEL_B_ID,
}
def _make_slim_docs() -> list[SlimDocument | PydanticHierarchyNode]:
return [
SlimDocument(id=doc_id, parent_hierarchy_raw_node_id=DOC_PARENT_MAP.get(doc_id))
for doc_id in SLIM_DOC_IDS
]
return [SlimDocument(id=doc_id) for doc_id in SLIM_DOC_IDS]
class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
@@ -142,31 +126,14 @@ class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
# ---------------------------------------------------------------------------
def _cleanup_test_data(db_session: Session) -> None:
"""Remove all test hierarchy nodes and documents to isolate tests."""
for doc_id in SLIM_DOC_IDS:
db_session.query(DbDocument).filter(DbDocument.id == doc_id).delete()
def _cleanup_test_hierarchy_nodes(db_session: Session) -> None:
"""Remove all hierarchy nodes for TEST_SOURCE to isolate tests."""
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == TEST_SOURCE
).delete()
db_session.commit()
def _create_test_documents(db_session: Session) -> list[DbDocument]:
"""Insert minimal Document rows for our test doc IDs."""
docs = []
for doc_id in SLIM_DOC_IDS:
doc = DbDocument(
id=doc_id,
semantic_id=doc_id,
kg_stage=KGStage.NOT_STARTED,
)
db_session.add(doc)
docs.append(doc)
db_session.commit()
return docs
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
@@ -180,14 +147,14 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None: # noqa:
result = extract_ids_from_runnable_connector(connector, callback=None)
# Doc IDs should include both slim doc IDs and hierarchy node raw_node_ids
# (hierarchy node IDs are added to raw_id_to_parent so they aren't pruned)
# (hierarchy node IDs are added to doc_ids so they aren't pruned)
expected_ids = {
CHANNEL_A_ID,
CHANNEL_B_ID,
CHANNEL_C_ID,
*SLIM_DOC_IDS,
}
assert result.raw_id_to_parent.keys() == expected_ids
assert result.doc_ids == expected_ids
# Hierarchy nodes should be the 3 channels
assert len(result.hierarchy_nodes) == 3
@@ -198,7 +165,7 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None: # noqa:
def test_pruning_upserts_hierarchy_nodes_to_db(db_session: Session) -> None:
"""Full flow: extract hierarchy nodes from mock connector, upsert to Postgres,
then verify the DB state (node count, parent relationships, permissions)."""
_cleanup_test_data(db_session)
_cleanup_test_hierarchy_nodes(db_session)
# Step 1: ensure the SOURCE node exists (mirrors what the pruning task does)
source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -263,7 +230,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
) -> None:
"""When the connector's access type is PUBLIC, all hierarchy nodes must be
marked is_public=True regardless of their external_access settings."""
_cleanup_test_data(db_session)
_cleanup_test_hierarchy_nodes(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -290,7 +257,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
"""Upserting the same hierarchy nodes twice must not create duplicates.
The second call should update existing rows in place."""
_cleanup_test_data(db_session)
_cleanup_test_hierarchy_nodes(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -328,7 +295,7 @@ def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> None:
"""Upserting a hierarchy node with changed fields should update the existing row."""
_cleanup_test_data(db_session)
_cleanup_test_hierarchy_nodes(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -375,193 +342,3 @@ def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> No
assert db_node.is_public is True
assert db_node.external_user_emails is not None
assert set(db_node.external_user_emails) == {"new_user@example.com"}
# ---------------------------------------------------------------------------
# Document-to-hierarchy-node linkage tests
# ---------------------------------------------------------------------------
def test_extraction_preserves_parent_hierarchy_raw_node_id(
db_session: Session, # noqa: ARG001
) -> None:
"""extract_ids_from_runnable_connector should carry the
parent_hierarchy_raw_node_id from SlimDocument into the raw_id_to_parent dict."""
connector = MockSlimConnectorWithPermSync()
result = extract_ids_from_runnable_connector(connector, callback=None)
for doc_id, expected_parent in DOC_PARENT_MAP.items():
assert (
result.raw_id_to_parent[doc_id] == expected_parent
), f"raw_id_to_parent[{doc_id}] should be {expected_parent}"
# Hierarchy node entries have None parent (they aren't documents)
for channel_id in [CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID]:
assert result.raw_id_to_parent[channel_id] is None
def test_update_document_parent_hierarchy_nodes(db_session: Session) -> None:
"""update_document_parent_hierarchy_nodes should set
Document.parent_hierarchy_node_id for each document in the mapping."""
_cleanup_test_data(db_session)
source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
upserted = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=_make_hierarchy_nodes(),
source=TEST_SOURCE,
commit=True,
is_connector_public=False,
)
node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
# Create documents with no parent set
docs = _create_test_documents(db_session)
for doc in docs:
assert doc.parent_hierarchy_node_id is None
# Build resolved map (same logic as _resolve_and_update_document_parents)
resolved: dict[str, int | None] = {}
for doc_id, raw_parent in DOC_PARENT_MAP.items():
resolved[doc_id] = node_id_by_raw.get(raw_parent, source_node.id)
updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert updated == len(SLIM_DOC_IDS)
# Verify each document now points to the correct hierarchy node
db_session.expire_all()
for doc_id, raw_parent in DOC_PARENT_MAP.items():
tmp_doc = db_session.get(DbDocument, doc_id)
assert tmp_doc is not None
doc = tmp_doc
expected_node_id = node_id_by_raw[raw_parent]
assert (
doc.parent_hierarchy_node_id == expected_node_id
), f"Document {doc_id} should point to node for {raw_parent}"
def test_update_document_parent_is_idempotent(db_session: Session) -> None:
"""Running update_document_parent_hierarchy_nodes a second time with the
same mapping should update zero rows."""
_cleanup_test_data(db_session)
ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
upserted = upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=_make_hierarchy_nodes(),
source=TEST_SOURCE,
commit=True,
is_connector_public=False,
)
node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
_create_test_documents(db_session)
resolved: dict[str, int | None] = {
doc_id: node_id_by_raw[raw_parent]
for doc_id, raw_parent in DOC_PARENT_MAP.items()
}
first_updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert first_updated == len(SLIM_DOC_IDS)
second_updated = update_document_parent_hierarchy_nodes(
db_session=db_session,
doc_parent_map=resolved,
commit=True,
)
assert second_updated == 0
def test_link_hierarchy_nodes_to_documents_for_confluence(
db_session: Session,
) -> None:
"""For sources in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS (e.g. Confluence),
link_hierarchy_nodes_to_documents should set HierarchyNode.document_id
when a hierarchy node's raw_node_id matches a document ID."""
_cleanup_test_data(db_session)
confluence_source = DocumentSource.CONFLUENCE
# Clean up any existing Confluence hierarchy nodes
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == confluence_source
).delete()
db_session.commit()
ensure_source_node_exists(db_session, confluence_source, commit=True)
# Create a hierarchy node whose raw_node_id matches a document ID
page_node_id = "confluence-page-123"
nodes = [
PydanticHierarchyNode(
raw_node_id=page_node_id,
raw_parent_id=None,
display_name="Test Page",
link="https://wiki.example.com/page/123",
node_type=HierarchyNodeType.PAGE,
),
]
upsert_hierarchy_nodes_batch(
db_session=db_session,
nodes=nodes,
source=confluence_source,
commit=True,
is_connector_public=False,
)
# Verify the node exists but has no document_id yet
db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
assert db_node is not None
assert db_node.document_id is None
# Create a document with the same ID as the hierarchy node
doc = DbDocument(
id=page_node_id,
semantic_id="Test Page",
kg_stage=KGStage.NOT_STARTED,
)
db_session.add(doc)
db_session.commit()
# Link nodes to documents
linked = link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=[page_node_id],
source=confluence_source,
commit=True,
)
assert linked == 1
# Verify the hierarchy node now has document_id set
db_session.expire_all()
db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
assert db_node is not None
assert db_node.document_id == page_node_id
# Cleanup
db_session.query(DbDocument).filter(DbDocument.id == page_node_id).delete()
db_session.query(DBHierarchyNode).filter(
DBHierarchyNode.source == confluence_source
).delete()
db_session.commit()
def test_link_hierarchy_nodes_skips_non_hierarchy_sources(
db_session: Session,
) -> None:
"""link_hierarchy_nodes_to_documents should return 0 for sources that
don't support hierarchy-node-as-document (e.g. Slack, Google Drive)."""
linked = link_hierarchy_nodes_to_documents(
db_session=db_session,
document_ids=SLIM_DOC_IDS,
source=TEST_SOURCE, # Slack — not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS
commit=False,
)
assert linked == 0

View File

@@ -11,7 +11,6 @@ from onyx.context.search.models import SavedSearchSettings
from onyx.context.search.models import SearchSettingsCreationRequest
from onyx.db.enums import EmbeddingPrecision
from onyx.db.llm import fetch_default_contextual_rag_model
from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import update_default_contextual_model
from onyx.db.llm import upsert_llm_provider
from onyx.db.models import IndexModelStatus
@@ -38,8 +37,6 @@ def _create_llm_provider_and_model(
model_name: str,
) -> None:
"""Insert an LLM provider with a single visible model configuration."""
if fetch_existing_llm_provider(name=provider_name, db_session=db_session):
return
upsert_llm_provider(
LLMProviderUpsertRequest(
name=provider_name,
@@ -149,8 +146,8 @@ def baseline_search_settings(
)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -158,7 +155,6 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
mock_get_all_doc_indices: MagicMock,
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
@@ -200,8 +196,8 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -209,7 +205,6 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
mock_get_all_doc_indices: MagicMock,
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
@@ -271,7 +266,7 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
)
@patch("onyx.server.manage.search_settings.get_all_document_indices")
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -279,7 +274,6 @@ def test_indexing_pipeline_skips_llm_when_contextual_rag_disabled(
mock_index_handler: MagicMock,
mock_get_llm: MagicMock,
mock_get_doc_index: MagicMock, # noqa: ARG001
mock_get_all_doc_indices_search_settings: MagicMock, # noqa: ARG001
baseline_search_settings: None, # noqa: ARG001
db_session: Session,
) -> None:

View File

@@ -950,6 +950,7 @@ from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PythonToolDelta
from onyx.server.query_and_chat.streaming_models import PythonToolStart
from onyx.server.query_and_chat.streaming_models import SectionEnd
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
from onyx.tools.tool_implementations.python.python_tool import PythonTool
from tests.external_dependency_unit.answer.stream_test_builder import StreamTestBuilder
from tests.external_dependency_unit.answer.stream_test_utils import create_chat_session
@@ -1294,9 +1295,19 @@ def test_code_interpreter_replay_packets_include_code_and_output(
).expect(
Packet(
placement=create_placement(0),
obj=PythonToolStart(code=code),
obj=ToolCallArgumentDelta(
tool_type="python",
tool_id="call_replay_test",
argument_deltas={"code": code},
),
),
forward=2,
).expect(
Packet(
placement=create_placement(0),
obj=PythonToolStart(code=code),
),
forward=False,
).expect(
Packet(
placement=create_placement(0),

View File

@@ -1,3 +1,4 @@
import pytest
import requests
from tests.integration.common_utils.constants import API_SERVER_URL
@@ -364,6 +365,7 @@ def test_update_contextual_rag_missing_model_name(
assert "Provider name and model name are required" in response.json()["detail"]
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_with_contextual_rag(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -392,6 +394,7 @@ def test_set_new_search_settings_with_contextual_rag(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_without_contextual_rag(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -416,6 +419,7 @@ def test_set_new_search_settings_without_contextual_rag(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_then_update_inference_settings(
reset: None, # noqa: ARG001
admin_user: DATestUser,
@@ -453,6 +457,7 @@ def test_set_new_then_update_inference_settings(
_cancel_new_embedding(admin_user)
@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
def test_set_new_search_settings_replaces_previous_secondary(
reset: None, # noqa: ARG001
admin_user: DATestUser,

View File

@@ -0,0 +1,584 @@
from typing import Any
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.chat.tool_call_args_streaming import maybe_emit_argument_delta
from onyx.server.query_and_chat.placement import Placement
from onyx.server.query_and_chat.streaming_models import ToolCallArgumentDelta
def _make_tool_call_delta(
index: int = 0,
tool_id: str | None = None,
name: str | None = None,
arguments: str | None = None,
function_is_none: bool = False,
) -> MagicMock:
"""Create a mock tool_call_delta matching the LiteLLM streaming shape."""
delta = MagicMock()
delta.index = index
delta.id = tool_id
if function_is_none:
delta.function = None
else:
delta.function = MagicMock()
delta.function.name = name
delta.function.arguments = arguments
return delta
def _make_placement() -> Placement:
return Placement(turn_index=0, tab_index=0)
def _mock_tool_class(emit: bool = True) -> MagicMock:
cls = MagicMock()
cls.do_emit_argument_deltas.return_value = emit
return cls
def _collect(
tc_map: dict[int, dict[str, Any]],
delta: MagicMock,
placement: Placement | None = None,
scan_offsets: dict[int, int] | None = None,
) -> list[Any]:
"""Run maybe_emit_argument_delta and return the yielded packets."""
return list(
maybe_emit_argument_delta(
tc_map,
delta,
placement or _make_placement(),
scan_offsets if scan_offsets is not None else {},
)
)
def _stream_fragments(
fragments: list[str],
tc_map: dict[int, dict[str, Any]],
placement: Placement | None = None,
) -> list[str]:
"""Feed fragments into maybe_emit_argument_delta one by one, returning
all emitted content values concatenated per-key as a flat list."""
pl = placement or _make_placement()
scan_offsets: dict[int, int] = {}
emitted: list[str] = []
for frag in fragments:
tc_map[0]["arguments"] += frag
delta = _make_tool_call_delta(arguments=frag)
for packet in maybe_emit_argument_delta(
tc_map, delta, pl, scan_offsets=scan_offsets
):
obj = packet.obj
assert isinstance(obj, ToolCallArgumentDelta)
for value in obj.argument_deltas.values():
emitted.append(value)
return emitted
class TestMaybeEmitArgumentDeltaGuards:
"""Tests for conditions that cause no packet to be emitted."""
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_when_tool_does_not_opt_in(
self, mock_get_tool: MagicMock
) -> None:
"""Tools that return False from do_emit_argument_deltas emit nothing."""
mock_get_tool.return_value = _mock_tool_class(emit=False)
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
}
assert _collect(tc_map, _make_tool_call_delta(arguments="x")) == []
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_when_tool_class_unknown(
self, mock_get_tool: MagicMock
) -> None:
mock_get_tool.return_value = None
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "unknown", "arguments": '{"code": "x'}
}
assert _collect(tc_map, _make_tool_call_delta(arguments="x")) == []
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_when_no_argument_fragment(
self, mock_get_tool: MagicMock
) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
}
assert _collect(tc_map, _make_tool_call_delta(arguments=None)) == []
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_when_key_value_not_started(
self, mock_get_tool: MagicMock
) -> None:
"""Key exists in JSON but its string value hasn't begun yet."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code":'}
}
assert _collect(tc_map, _make_tool_call_delta(arguments=":")) == []
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_before_any_key(self, mock_get_tool: MagicMock) -> None:
"""Only the opening brace has arrived — no key to stream yet."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": "{"}
}
assert _collect(tc_map, _make_tool_call_delta(arguments="{")) == []
class TestMaybeEmitArgumentDeltaBasic:
"""Tests for correct packet content and incremental emission."""
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_emits_packet_with_correct_fields(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "print(1)',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="print(1)"))
assert len(packets) == 1
obj = packets[0].obj
assert isinstance(obj, ToolCallArgumentDelta)
assert obj.tool_type == "python"
assert obj.tool_id == "tc_1"
assert obj.argument_deltas == {"code": "print(1)"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_emits_only_new_content_on_subsequent_call(
self, mock_get_tool: MagicMock
) -> None:
"""After a first emission, subsequent calls emit only the diff."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "abc'}
}
packets_1 = _collect(tc_map, _make_tool_call_delta(arguments="abc"))
assert packets_1[0].obj.argument_deltas == {"code": "abc"}
tc_map[0]["arguments"] = '{"code": "abcdef'
packets_2 = _collect(tc_map, _make_tool_call_delta(arguments="def"))
assert packets_2[0].obj.argument_deltas == {"code": "def"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_handles_multiple_keys_sequentially(self, mock_get_tool: MagicMock) -> None:
"""When a second key starts, emissions switch to that key."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
}
packets_1 = _collect(tc_map, _make_tool_call_delta(arguments="x"))
assert packets_1[0].obj.argument_deltas == {"code": "x"}
tc_map[0]["arguments"] = '{"code": "x", "output": "hello'
packets_2 = _collect(tc_map, _make_tool_call_delta(arguments="hello"))
assert packets_2[0].obj.argument_deltas == {"output": "hello"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_delta_spans_key_boundary(self, mock_get_tool: MagicMock) -> None:
"""A single delta contains the end of one value and the start of the next key."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
}
packets_1 = _collect(tc_map, _make_tool_call_delta(arguments="x"))
assert packets_1[0].obj.argument_deltas == {"code": "x"}
# Delta carries closing of "code" value + opening of "lang" key + start of value
tc_map[0]["arguments"] = '{"code": "xy", "lang": "py'
packets_2 = _collect(tc_map, _make_tool_call_delta(arguments='y", "lang": "py'))
assert len(packets_2) == 1
assert packets_2[0].obj.argument_deltas == {"code": "y", "lang": "py"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_empty_value_emits_nothing(self, mock_get_tool: MagicMock) -> None:
"""An empty string value has nothing to emit."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "'}
}
# Opening quote just arrived, value is empty
assert _collect(tc_map, _make_tool_call_delta(arguments='"')) == []
class TestMaybeEmitArgumentDeltaDecoding:
"""Tests verifying that JSON escape sequences are properly decoded."""
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_decodes_newlines(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "line1\\nline2',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="line1\\nline2"))
assert packets[0].obj.argument_deltas == {"code": "line1\nline2"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_decodes_tabs(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "\\tindented',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="\\tindented"))
assert packets[0].obj.argument_deltas == {"code": "\tindented"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_decodes_escaped_quotes(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "say \\"hi\\"',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments='say \\"hi\\"'))
assert packets[0].obj.argument_deltas == {"code": 'say "hi"'}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_decodes_escaped_backslashes(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "path\\\\dir',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="path\\\\dir"))
assert packets[0].obj.argument_deltas == {"code": "path\\dir"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_decodes_unicode_escape(self, mock_get_tool: MagicMock) -> None:
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "\\u0041',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="\\u0041"))
assert packets[0].obj.argument_deltas == {"code": "A"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_incomplete_escape_at_end_trims_safely(
self, mock_get_tool: MagicMock
) -> None:
"""A trailing backslash (incomplete escape) is handled gracefully."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "hello\\',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="hello\\"))
# "hello" can be decoded; the trailing backslash is trimmed
assert packets[0].obj.argument_deltas == {"code": "hello"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_incomplete_unicode_escape_trims_safely(
self, mock_get_tool: MagicMock
) -> None:
"""A partial \\uXX sequence is trimmed, emitting what can be decoded."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "hello\\u00',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="hello\\u00"))
assert packets[0].obj.argument_deltas == {"code": "hello"}
class TestArgumentDeltaStreamingE2E:
"""Simulates realistic sequences of LLM argument deltas to verify
the full pipeline produces correct decoded output."""
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_realistic_python_code_streaming(self, mock_get_tool: MagicMock) -> None:
"""Streams: {"code": "print('hello')\\nprint('world')"}"""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"',
"code",
'": "',
"print(",
"'hello')",
"\\n",
"print(",
"'world')",
'"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == "print('hello')\nprint('world')"
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_streaming_with_tabs_and_newlines(self, mock_get_tool: MagicMock) -> None:
"""Streams code with tabs and newlines."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"code": "',
"if True:",
"\\n",
"\\t",
"pass",
'"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == "if True:\n\tpass"
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_split_escape_sequence(self, mock_get_tool: MagicMock) -> None:
"""An escape sequence split across two fragments (backslash in one,
'n' in the next) should still decode correctly."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"code": "hello',
"\\",
"n",
'world"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == "hello\nworld"
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_multiple_newlines_and_indentation(self, mock_get_tool: MagicMock) -> None:
"""Streams a multi-line function with multiple escape sequences."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"code": "',
"def foo():",
"\\n",
"\\t",
"x = 1",
"\\n",
"\\t",
"return x",
'"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == "def foo():\n\tx = 1\n\treturn x"
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_two_keys_streamed_sequentially(self, mock_get_tool: MagicMock) -> None:
"""Streams code first, then a second key (language) — both decoded."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"code": "',
"x = 1",
'", "language": "',
"python",
'"}',
]
emitted = _stream_fragments(fragments, tc_map)
# Should have emissions for both keys
full = "".join(emitted)
assert "x = 1" in full
assert "python" in full
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_code_containing_dict_literal(self, mock_get_tool: MagicMock) -> None:
"""Python code like `x = {"key": "val"}` contains JSON-like patterns.
The escaped quotes inside the *outer* JSON value should prevent the
inner `"key":` from being mistaken for a top-level JSON key."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
# The LLM sends: {"code": "x = {\"key\": \"val\"}"}
# The inner quotes are escaped as \" in the JSON value.
fragments = [
'{"code": "',
"x = {",
'\\"key\\"',
": ",
'\\"val\\"',
"}",
'"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == 'x = {"key": "val"}'
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_code_with_colon_in_value(self, mock_get_tool: MagicMock) -> None:
"""Colons inside the string value should not confuse key detection."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": ""}
}
fragments = [
'{"code": "',
"url = ",
'\\"https://example.com\\"',
'"}',
]
full = "".join(_stream_fragments(fragments, tc_map))
assert full == 'url = "https://example.com"'
class TestMaybeEmitArgumentDeltaEdgeCases:
"""Edge cases not covered by the standard test classes."""
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_no_emission_when_function_is_none(self, mock_get_tool: MagicMock) -> None:
"""Some delta chunks have function=None (e.g. role-only deltas)."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "x'}
}
delta = _make_tool_call_delta(arguments=None, function_is_none=True)
assert _collect(tc_map, delta) == []
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_multiple_concurrent_tool_calls(self, mock_get_tool: MagicMock) -> None:
"""Two tool calls streaming at different indices in parallel."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": '{"code": "aaa'},
1: {"id": "tc_2", "name": "python", "arguments": '{"code": "bbb'},
}
# Delta for index 0
packets_0 = _collect(tc_map, _make_tool_call_delta(index=0, arguments="aaa"))
assert len(packets_0) == 1
assert packets_0[0].obj.tool_id == "tc_1"
assert packets_0[0].obj.argument_deltas == {"code": "aaa"}
# Delta for index 1
packets_1 = _collect(tc_map, _make_tool_call_delta(index=1, arguments="bbb"))
assert len(packets_1) == 1
assert packets_1[0].obj.tool_id == "tc_2"
assert packets_1[0].obj.argument_deltas == {"code": "bbb"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_delta_with_four_arguments(self, mock_get_tool: MagicMock) -> None:
"""A single delta contains four complete key-value pairs."""
mock_get_tool.return_value = _mock_tool_class()
accumulated = '{"a": "one", "b": "two", "c": "three", "d": "four'
tc_map: dict[int, dict[str, Any]] = {
0: {"id": "tc_1", "name": "python", "arguments": accumulated}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments=accumulated))
assert len(packets) == 1
assert packets[0].obj.argument_deltas == {
"a": "one",
"b": "two",
"c": "three",
"d": "four",
}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_delta_on_second_arg_after_first_complete(
self, mock_get_tool: MagicMock
) -> None:
"""First argument is fully complete; delta only adds to the second."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"code": "print(1)", "lang": "py',
}
}
packets = _collect(tc_map, _make_tool_call_delta(arguments="py"))
assert len(packets) == 1
assert packets[0].obj.argument_deltas == {"lang": "py"}
@patch("onyx.chat.tool_call_args_streaming._get_tool_class")
def test_non_string_values_skipped(self, mock_get_tool: MagicMock) -> None:
"""Non-string values (numbers, booleans, null) are skipped — they are
available in the final tool-call kickoff packet. String arguments
following them are still emitted."""
mock_get_tool.return_value = _mock_tool_class()
tc_map: dict[int, dict[str, Any]] = {
0: {
"id": "tc_1",
"name": "python",
"arguments": '{"timeout": 30, "code": "hello',
}
}
packets = _collect(
tc_map, _make_tool_call_delta(arguments='30, "code": "hello')
)
assert len(packets) == 1
assert packets[0].obj.argument_deltas == {"code": "hello"}

View File

@@ -1,8 +1,8 @@
#!/bin/bash
set -euo pipefail
set -e
# Expected resource requirements (overridden below if --lite)
# Expected resource requirements
EXPECTED_DOCKER_RAM_GB=10
EXPECTED_DISK_GB=32
@@ -10,10 +10,6 @@ EXPECTED_DISK_GB=32
SHUTDOWN_MODE=false
DELETE_DATA_MODE=false
INCLUDE_CRAFT=false # Disabled by default, use --include-craft to enable
LITE_MODE=false # Disabled by default, use --lite to enable
NO_PROMPT=false
DRY_RUN=false
VERBOSE=false
while [[ $# -gt 0 ]]; do
case $1 in
@@ -29,22 +25,6 @@ while [[ $# -gt 0 ]]; do
INCLUDE_CRAFT=true
shift
;;
--lite)
LITE_MODE=true
shift
;;
--no-prompt)
NO_PROMPT=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--verbose)
VERBOSE=true
shift
;;
--help|-h)
echo "Onyx Installation Script"
echo ""
@@ -52,21 +32,15 @@ while [[ $# -gt 0 ]]; do
echo ""
echo "Options:"
echo " --include-craft Enable Onyx Craft (AI-powered web app building)"
echo " --lite Deploy Onyx Lite (no Vespa, Redis, or model servers)"
echo " --shutdown Stop (pause) Onyx containers"
echo " --delete-data Remove all Onyx data (containers, volumes, and files)"
echo " --no-prompt Run non-interactively with defaults (for CI/automation)"
echo " --dry-run Show what would be done without making changes"
echo " --verbose Show detailed output for debugging"
echo " --help, -h Show this help message"
echo ""
echo "Examples:"
echo " $0 # Install Onyx"
echo " $0 --lite # Install Onyx Lite (minimal deployment)"
echo " $0 --include-craft # Install Onyx with Craft enabled"
echo " $0 --shutdown # Pause Onyx services"
echo " $0 --delete-data # Completely remove Onyx and all data"
echo " $0 --no-prompt # Non-interactive install with defaults"
exit 0
;;
*)
@@ -77,116 +51,8 @@ while [[ $# -gt 0 ]]; do
esac
done
if [[ "$VERBOSE" = true ]]; then
set -x
fi
if [[ "$LITE_MODE" = true ]] && [[ "$INCLUDE_CRAFT" = true ]]; then
echo "ERROR: --lite and --include-craft cannot be used together."
echo "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
exit 1
fi
# Lite mode needs far fewer resources (no Vespa, Redis, or model servers)
if [[ "$LITE_MODE" = true ]]; then
EXPECTED_DOCKER_RAM_GB=4
EXPECTED_DISK_GB=16
fi
INSTALL_ROOT="${INSTALL_PREFIX:-onyx_data}"
LITE_COMPOSE_FILE="docker-compose.onyx-lite.yml"
# Build the -f flags for docker compose. For shutdown/delete-data we auto-detect
# whether the lite overlay was previously downloaded; for install we use --lite.
compose_file_args() {
local args="-f docker-compose.yml"
if [[ "$LITE_MODE" = true ]] || [[ -f "${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}" ]]; then
args="$args -f ${LITE_COMPOSE_FILE}"
fi
echo "$args"
}
# --- Temp file cleanup ---
TMPFILES=()
cleanup_tmpfiles() {
local f
for f in "${TMPFILES[@]:-}"; do
rm -rf "$f" 2>/dev/null || true
done
}
trap cleanup_tmpfiles EXIT
mktempfile() {
local f
f="$(mktemp)"
TMPFILES+=("$f")
echo "$f"
}
# --- Downloader detection (curl with wget fallback) ---
DOWNLOADER=""
detect_downloader() {
if command -v curl &> /dev/null; then
DOWNLOADER="curl"
return 0
fi
if command -v wget &> /dev/null; then
DOWNLOADER="wget"
return 0
fi
echo "ERROR: Neither curl nor wget found. Please install one and retry."
exit 1
}
detect_downloader
download_file() {
local url="$1"
local output="$2"
if [[ "$DOWNLOADER" == "curl" ]]; then
curl -fsSL --retry 3 --retry-delay 2 --retry-connrefused -o "$output" "$url"
else
wget -q --tries=3 --timeout=20 -O "$output" "$url"
fi
}
# --- Interactive prompt helpers ---
is_interactive() {
[[ "$NO_PROMPT" = false ]] && [[ -t 0 ]]
}
prompt_or_default() {
local prompt_text="$1"
local default_value="$2"
if is_interactive; then
read -p "$prompt_text" -r REPLY
if [[ -z "$REPLY" ]]; then
REPLY="$default_value"
fi
else
REPLY="$default_value"
fi
}
prompt_yn_or_default() {
local prompt_text="$1"
local default_value="$2"
if is_interactive; then
read -p "$prompt_text" -n 1 -r
echo ""
else
REPLY="$default_value"
fi
}
prompt_enter_or_skip() {
local prompt_text="$1"
if is_interactive; then
echo -e "$prompt_text"
read -r
fi
}
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
@@ -245,7 +111,7 @@ if [ "$SHUTDOWN_MODE" = true ]; then
fi
# Stop containers (without removing them)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) stop)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml stop)
if [ $? -eq 0 ]; then
print_success "Onyx containers stopped (paused)"
else
@@ -274,17 +140,12 @@ if [ "$DELETE_DATA_MODE" = true ]; then
echo " • All downloaded files and configurations"
echo " • All user data and documents"
echo ""
if is_interactive; then
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
echo ""
if [ "$REPLY" != "DELETE" ]; then
print_info "Operation cancelled."
exit 0
fi
else
print_error "Cannot confirm destructive operation in non-interactive mode."
print_info "Run interactively or remove the ${INSTALL_ROOT} directory manually."
exit 1
read -p "Are you sure you want to continue? Type 'DELETE' to confirm: " -r
echo ""
if [ "$REPLY" != "DELETE" ]; then
print_info "Operation cancelled."
exit 0
fi
print_info "Removing Onyx containers and volumes..."
@@ -303,7 +164,7 @@ if [ "$DELETE_DATA_MODE" = true ]; then
fi
# Stop and remove containers with volumes
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) down -v)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml down -v)
if [ $? -eq 0 ]; then
print_success "Onyx containers and volumes removed"
else
@@ -337,13 +198,8 @@ echo " \____/|_| |_|\__, /_/\_\ "
echo " __/ | "
echo " |___/ "
echo -e "${NC}"
if [[ "$LITE_MODE" = true ]]; then
echo "Welcome to Onyx Lite Installation Script"
echo "========================================="
else
echo "Welcome to Onyx Installation Script"
echo "===================================="
fi
echo "Welcome to Onyx Installation Script"
echo "===================================="
echo ""
# User acknowledgment section
@@ -351,14 +207,10 @@ echo -e "${YELLOW}${BOLD}This script will:${NC}"
echo "1. Download deployment files for Onyx into a new '${INSTALL_ROOT}' directory"
echo "2. Check your system resources (Docker, memory, disk space)"
echo "3. Guide you through deployment options (version, authentication)"
if [[ "$LITE_MODE" = true ]]; then
echo ""
echo -e "${YELLOW}${BOLD}Lite mode:${NC} Vespa, Redis, and model servers will NOT be started."
echo "This gives you the core chat experience with lower resource requirements."
fi
echo ""
if is_interactive; then
# Only prompt for acknowledgment if running interactively
if [ -t 0 ]; then
echo -e "${YELLOW}${BOLD}Please acknowledge and press Enter to continue...${NC}"
read -r
echo ""
@@ -367,26 +219,6 @@ else
echo ""
fi
# Detect OS (including WSL)
IS_WSL=false
if [[ -n "${WSL_DISTRO_NAME:-}" ]] || grep -qi microsoft /proc/version 2>/dev/null; then
IS_WSL=true
fi
# Dry-run: show plan and exit
if [[ "$DRY_RUN" = true ]]; then
print_info "Dry run mode — showing what would happen:"
echo " • Install root: ${INSTALL_ROOT}"
echo " • Lite mode: ${LITE_MODE}"
echo " • Include Craft: ${INCLUDE_CRAFT}"
echo " • OS type: ${OSTYPE:-unknown} (WSL: ${IS_WSL})"
echo " • Downloader: ${DOWNLOADER}"
echo " • Min RAM: ${EXPECTED_DOCKER_RAM_GB}GB, Min disk: ${EXPECTED_DISK_GB}GB"
echo ""
print_success "Dry run complete (no changes made)"
exit 0
fi
# GitHub repo base URL - using main branch
GITHUB_RAW_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose"
@@ -428,35 +260,41 @@ else
exit 1
fi
# Returns 0 if $1 <= $2, 1 if $1 > $2
# Handles missing or non-numeric parts gracefully (treats them as 0)
# Function to compare version numbers
version_compare() {
local version1="${1:-0.0.0}"
local version2="${2:-0.0.0}"
# Returns 0 if $1 <= $2, 1 if $1 > $2
local version1=$1
local version2=$2
local v1_major v1_minor v1_patch v2_major v2_minor v2_patch
v1_major=$(echo "$version1" | cut -d. -f1)
v1_minor=$(echo "$version1" | cut -d. -f2)
v1_patch=$(echo "$version1" | cut -d. -f3)
v2_major=$(echo "$version2" | cut -d. -f1)
v2_minor=$(echo "$version2" | cut -d. -f2)
v2_patch=$(echo "$version2" | cut -d. -f3)
# Split versions into components
local v1_major=$(echo $version1 | cut -d. -f1)
local v1_minor=$(echo $version1 | cut -d. -f2)
local v1_patch=$(echo $version1 | cut -d. -f3)
# Default non-numeric or empty parts to 0
[[ "$v1_major" =~ ^[0-9]+$ ]] || v1_major=0
[[ "$v1_minor" =~ ^[0-9]+$ ]] || v1_minor=0
[[ "$v1_patch" =~ ^[0-9]+$ ]] || v1_patch=0
[[ "$v2_major" =~ ^[0-9]+$ ]] || v2_major=0
[[ "$v2_minor" =~ ^[0-9]+$ ]] || v2_minor=0
[[ "$v2_patch" =~ ^[0-9]+$ ]] || v2_patch=0
local v2_major=$(echo $version2 | cut -d. -f1)
local v2_minor=$(echo $version2 | cut -d. -f2)
local v2_patch=$(echo $version2 | cut -d. -f3)
if [ "$v1_major" -lt "$v2_major" ]; then return 0
elif [ "$v1_major" -gt "$v2_major" ]; then return 1; fi
# Compare major version
if [ "$v1_major" -lt "$v2_major" ]; then
return 0
elif [ "$v1_major" -gt "$v2_major" ]; then
return 1
fi
if [ "$v1_minor" -lt "$v2_minor" ]; then return 0
elif [ "$v1_minor" -gt "$v2_minor" ]; then return 1; fi
# Compare minor version
if [ "$v1_minor" -lt "$v2_minor" ]; then
return 0
elif [ "$v1_minor" -gt "$v2_minor" ]; then
return 1
fi
[ "$v1_patch" -le "$v2_patch" ]
# Compare patch version
if [ "$v1_patch" -le "$v2_patch" ]; then
return 0
else
return 1
fi
}
# Check Docker daemon
@@ -533,7 +371,8 @@ if [ "$RESOURCE_WARNING" = true ]; then
echo ""
print_warning "Onyx recommends at least ${EXPECTED_DOCKER_RAM_GB}GB RAM and ${EXPECTED_DISK_GB}GB disk space for optimal performance."
echo ""
prompt_yn_or_default "Do you want to continue anyway? (y/N): " "y"
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Installation cancelled. Please allocate more resources and try again."
exit 1
@@ -558,9 +397,6 @@ print_info "This step downloads all necessary configuration files from GitHub...
echo ""
print_info "Downloading the following files:"
echo " • docker-compose.yml - Main Docker Compose configuration"
if [[ "$LITE_MODE" = true ]]; then
echo "${LITE_COMPOSE_FILE} - Lite mode overlay"
fi
echo " • env.template - Environment variables template"
echo " • nginx/app.conf.template - Nginx web server configuration"
echo " • nginx/run-nginx.sh - Nginx startup script"
@@ -570,7 +406,7 @@ echo ""
# Download Docker Compose file
COMPOSE_FILE="${INSTALL_ROOT}/deployment/docker-compose.yml"
print_info "Downloading docker-compose.yml..."
if download_file "${GITHUB_RAW_URL}/docker-compose.yml" "$COMPOSE_FILE" 2>/dev/null; then
if curl -fsSL -o "$COMPOSE_FILE" "${GITHUB_RAW_URL}/docker-compose.yml" 2>/dev/null; then
print_success "Docker Compose file downloaded successfully"
# Check if Docker Compose version is older than 2.24.0 and show warning
@@ -595,7 +431,8 @@ if download_file "${GITHUB_RAW_URL}/docker-compose.yml" "$COMPOSE_FILE" 2>/dev/n
echo ""
print_warning "The installation will continue, but may fail if Docker Compose cannot parse the file."
echo ""
prompt_yn_or_default "Do you want to continue anyway? (y/N): " "y"
read -p "Do you want to continue anyway? (y/N): " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Installation cancelled. Please upgrade Docker Compose or manually edit the docker-compose.yml file."
exit 1
@@ -608,23 +445,10 @@ else
exit 1
fi
# Download lite overlay if --lite was requested
if [[ "$LITE_MODE" = true ]]; then
LITE_FILE="${INSTALL_ROOT}/deployment/${LITE_COMPOSE_FILE}"
print_info "Downloading ${LITE_COMPOSE_FILE} (lite overlay)..."
if download_file "${GITHUB_RAW_URL}/${LITE_COMPOSE_FILE}" "$LITE_FILE" 2>/dev/null; then
print_success "Lite overlay downloaded successfully"
else
print_error "Failed to download lite overlay"
print_info "Please ensure you have internet connection and try again"
exit 1
fi
fi
# Download env.template file
ENV_TEMPLATE="${INSTALL_ROOT}/deployment/env.template"
print_info "Downloading env.template..."
if download_file "${GITHUB_RAW_URL}/env.template" "$ENV_TEMPLATE" 2>/dev/null; then
if curl -fsSL -o "$ENV_TEMPLATE" "${GITHUB_RAW_URL}/env.template" 2>/dev/null; then
print_success "Environment template downloaded successfully"
else
print_error "Failed to download env.template"
@@ -638,7 +462,7 @@ NGINX_BASE_URL="https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deploym
# Download app.conf.template
NGINX_CONFIG="${INSTALL_ROOT}/data/nginx/app.conf.template"
print_info "Downloading nginx configuration template..."
if download_file "$NGINX_BASE_URL/app.conf.template" "$NGINX_CONFIG" 2>/dev/null; then
if curl -fsSL -o "$NGINX_CONFIG" "$NGINX_BASE_URL/app.conf.template" 2>/dev/null; then
print_success "Nginx configuration template downloaded"
else
print_error "Failed to download nginx configuration template"
@@ -649,7 +473,7 @@ fi
# Download run-nginx.sh script
NGINX_RUN_SCRIPT="${INSTALL_ROOT}/data/nginx/run-nginx.sh"
print_info "Downloading nginx startup script..."
if download_file "$NGINX_BASE_URL/run-nginx.sh" "$NGINX_RUN_SCRIPT" 2>/dev/null; then
if curl -fsSL -o "$NGINX_RUN_SCRIPT" "$NGINX_BASE_URL/run-nginx.sh" 2>/dev/null; then
chmod +x "$NGINX_RUN_SCRIPT"
print_success "Nginx startup script downloaded and made executable"
else
@@ -661,7 +485,7 @@ fi
# Download README file
README_FILE="${INSTALL_ROOT}/README.md"
print_info "Downloading README.md..."
if download_file "${GITHUB_RAW_URL}/README.md" "$README_FILE" 2>/dev/null; then
if curl -fsSL -o "$README_FILE" "${GITHUB_RAW_URL}/README.md" 2>/dev/null; then
print_success "README.md downloaded successfully"
else
print_error "Failed to download README.md"
@@ -689,7 +513,7 @@ if [ -d "${INSTALL_ROOT}/deployment" ] && [ -f "${INSTALL_ROOT}/deployment/docke
if [ -n "$COMPOSE_CMD" ]; then
# Check if any containers are running
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null | wc -l)
RUNNING_CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null | wc -l)
if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
print_error "Onyx services are currently running!"
echo ""
@@ -710,7 +534,7 @@ if [ -f "$ENV_FILE" ]; then
echo "• Press Enter to restart with current configuration"
echo "• Type 'update' to update to a newer version"
echo ""
prompt_or_default "Choose an option [default: restart]: " ""
read -p "Choose an option [default: restart]: " -r
echo ""
if [ "$REPLY" = "update" ]; then
@@ -719,19 +543,22 @@ if [ -f "$ENV_FILE" ]; then
echo "• Press Enter for latest (recommended)"
echo "• Type a specific tag (e.g., v0.1.0)"
echo ""
# If --include-craft was passed, default to craft-latest
if [ "$INCLUDE_CRAFT" = true ]; then
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
VERSION="$REPLY"
read -p "Enter tag [default: craft-latest]: " -r VERSION
else
prompt_or_default "Enter tag [default: latest]: " "latest"
VERSION="$REPLY"
read -p "Enter tag [default: latest]: " -r VERSION
fi
echo ""
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
print_info "Selected: craft-latest (Craft enabled)"
elif [ "$VERSION" = "latest" ]; then
print_info "Selected: Latest version"
if [ -z "$VERSION" ]; then
if [ "$INCLUDE_CRAFT" = true ]; then
VERSION="craft-latest"
print_info "Selected: craft-latest (Craft enabled)"
else
VERSION="latest"
print_info "Selected: Latest version"
fi
else
print_info "Selected: $VERSION"
fi
@@ -768,21 +595,23 @@ else
echo "• Press Enter for craft-latest (recommended for Craft)"
echo "• Type a specific tag (e.g., craft-v1.0.0)"
echo ""
prompt_or_default "Enter tag [default: craft-latest]: " "craft-latest"
VERSION="$REPLY"
read -p "Enter tag [default: craft-latest]: " -r VERSION
else
echo "• Press Enter for latest (recommended)"
echo "• Type a specific tag (e.g., v0.1.0)"
echo ""
prompt_or_default "Enter tag [default: latest]: " "latest"
VERSION="$REPLY"
read -p "Enter tag [default: latest]: " -r VERSION
fi
echo ""
if [ "$INCLUDE_CRAFT" = true ] && [ "$VERSION" = "craft-latest" ]; then
print_info "Selected: craft-latest (Craft enabled)"
elif [ "$VERSION" = "latest" ]; then
print_info "Selected: Latest tag"
if [ -z "$VERSION" ]; then
if [ "$INCLUDE_CRAFT" = true ]; then
VERSION="craft-latest"
print_info "Selected: craft-latest (Craft enabled)"
else
VERSION="latest"
print_info "Selected: Latest tag"
fi
else
print_info "Selected: $VERSION"
fi
@@ -857,13 +686,6 @@ else
echo ""
fi
# Reject craft image tags when running in lite mode
if [[ "$LITE_MODE" = true ]] && [[ "${VERSION:-}" == craft-* ]]; then
print_error "Cannot use a craft image tag (${VERSION}) with --lite."
print_info "Craft requires services (Vespa, Redis, background workers) that lite mode disables."
exit 1
fi
# Function to check if a port is available
is_port_available() {
local port=$1
@@ -949,7 +771,7 @@ print_step "Pulling Docker images"
print_info "This may take several minutes depending on your internet connection..."
echo ""
print_info "Downloading Docker images (this may take a while)..."
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) pull --quiet)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml pull --quiet)
if [ $? -eq 0 ]; then
print_success "Docker images downloaded successfully"
else
@@ -963,9 +785,9 @@ print_info "Launching containers..."
echo ""
if [ "$USE_LATEST" = true ]; then
print_info "Force pulling latest images and recreating containers..."
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d --pull always --force-recreate)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d --pull always --force-recreate)
else
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) up -d)
(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml up -d)
fi
if [ $? -ne 0 ]; then
print_error "Failed to start Onyx services"
@@ -987,7 +809,7 @@ echo ""
# Check for restart loops
print_info "Checking container health status..."
RESTART_ISSUES=false
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD $(compose_file_args) ps -q 2>/dev/null)
CONTAINERS=$(cd "${INSTALL_ROOT}/deployment" && $COMPOSE_CMD -f docker-compose.yml ps -q 2>/dev/null)
for CONTAINER in $CONTAINERS; do
PROJECT_NAME="$(basename "$INSTALL_ROOT")_deployment_"
@@ -1016,7 +838,7 @@ if [ "$RESTART_ISSUES" = true ]; then
print_error "Some containers are experiencing issues!"
echo ""
print_info "Please check the logs for more information:"
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD $(compose_file_args) logs)"
echo " (cd \"${INSTALL_ROOT}/deployment\" && $COMPOSE_CMD -f docker-compose.yml logs)"
echo ""
print_info "If the issue persists, please contact: founders@onyx.app"
@@ -1035,12 +857,8 @@ check_onyx_health() {
echo ""
while [ $attempt -le $max_attempts ]; do
local http_code=""
if [[ "$DOWNLOADER" == "curl" ]]; then
http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port" 2>/dev/null || echo "000")
else
http_code=$(wget -q --spider -S "http://localhost:$port" 2>&1 | grep "HTTP/" | tail -1 | awk '{print $2}' || echo "000")
fi
# Check for successful HTTP responses (200, 301, 302, etc.)
local http_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$port")
if echo "$http_code" | grep -qE "^(200|301|302|303|307|308)$"; then
return 0
fi
@@ -1096,18 +914,6 @@ print_info "If authentication is enabled, you can create your admin account here
echo " • Visit http://localhost:${HOST_PORT}/auth/signup to create your admin account"
echo " • The first user created will automatically have admin privileges"
echo ""
if [[ "$LITE_MODE" = true ]]; then
echo ""
print_info "Running in Lite mode — the following services are NOT started:"
echo " • Vespa (vector database)"
echo " • Redis (cache)"
echo " • Model servers (embedding/inference)"
echo " • Background workers (Celery)"
echo ""
print_info "Connectors and RAG search are disabled. LLM chat, tools, user file"
print_info "uploads, Projects, Agent knowledge, and code interpreter still work."
fi
echo ""
print_info "Refer to the README in the ${INSTALL_ROOT} directory for more information."
echo ""
print_info "For help or issues, contact: founders@onyx.app"

View File

@@ -1,190 +0,0 @@
"use client";
import { useState } from "react";
import Text from "@/refresh-components/texts/Text";
import TableQualifier from "@/refresh-components/table/TableQualifier";
import { TableSizeProvider } from "@/refresh-components/table/TableSizeContext";
import type { TableSize } from "@/refresh-components/table/TableSizeContext";
import type { QualifierContentType } from "@/refresh-components/table/types";
import { SvgCheckCircle } from "@opal/icons";
// ---------------------------------------------------------------------------
// Content type configurations
// ---------------------------------------------------------------------------
interface ContentConfig {
label: string;
content: QualifierContentType;
extraProps: Record<string, unknown>;
}
const CONTENT_TYPES: ContentConfig[] = [
{
label: "Simple",
content: "simple",
extraProps: {},
},
{
label: "Icon",
content: "icon",
extraProps: { icon: SvgCheckCircle },
},
{
label: "Image",
content: "image",
extraProps: {
imageSrc: "https://picsum.photos/36",
imageAlt: "Placeholder",
},
},
{
label: "Avatar Icon",
content: "avatar-icon",
extraProps: {},
},
{
label: "Avatar User",
content: "avatar-user",
extraProps: { initials: "AJ" },
},
];
// ---------------------------------------------------------------------------
// Row of qualifier states for a single content type
// ---------------------------------------------------------------------------
interface QualifierRowProps {
config: ContentConfig;
}
function QualifierRow({ config }: QualifierRowProps) {
const [selectableSelected, setSelectableSelected] = useState(false);
const [permanentSelected, setPermanentSelected] = useState(true);
return (
<div className="space-y-2">
<Text mainUiAction text02>
{config.label}
</Text>
<div className="flex items-start gap-8">
{/* Default */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={false}
selected={false}
disabled={false}
{...config.extraProps}
/>
<Text secondaryBody text04>
Default
</Text>
</div>
{/* Selectable (hover to reveal checkbox) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={selectableSelected}
disabled={false}
onSelectChange={setSelectableSelected}
{...config.extraProps}
/>
<Text secondaryBody text04>
Selectable
</Text>
</div>
{/* Selected */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={permanentSelected}
disabled={false}
onSelectChange={setPermanentSelected}
{...config.extraProps}
/>
<Text secondaryBody text04>
Selected
</Text>
</div>
{/* Disabled (unselected) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={false}
disabled={true}
{...config.extraProps}
/>
<Text secondaryBody text04>
Disabled
</Text>
</div>
{/* Disabled (selected) */}
<div className="flex w-20 flex-col items-center gap-2">
<TableQualifier
content={config.content}
selectable={true}
selected={true}
disabled={true}
{...config.extraProps}
/>
<Text secondaryBody text04>
Disabled+Sel
</Text>
</div>
</div>
</div>
);
}
// ---------------------------------------------------------------------------
// Size section — all content types at a given size
// ---------------------------------------------------------------------------
interface SizeSectionProps {
size: TableSize;
title: string;
}
function SizeSection({ size, title }: SizeSectionProps) {
return (
<div className="space-y-6">
<Text headingH3>{title}</Text>
<TableSizeProvider size={size}>
<div className="flex flex-col gap-8">
{CONTENT_TYPES.map((config) => (
<QualifierRow key={`${size}-${config.content}`} config={config} />
))}
</div>
</TableSizeProvider>
</div>
);
}
// ---------------------------------------------------------------------------
// Page
// ---------------------------------------------------------------------------
export default function TableQualifierDemoPage() {
return (
<div className="p-6 space-y-10">
<div className="space-y-4">
<Text headingH2>TableQualifier Demo</Text>
<Text mainContentMuted text03>
All content types, sizes, and interactive states. Hover selectable
variants to reveal the checkbox; click to toggle.
</Text>
</div>
<SizeSection size="regular" title="Regular (36px)" />
<SizeSection size="small" title="Small (28px)" />
</div>
);
}

View File

@@ -1,106 +0,0 @@
"use client";
import { useState } from "react";
import {
type Table,
type ColumnDef,
type RowData,
type VisibilityState,
} from "@tanstack/react-table";
import { Button } from "@opal/components";
import { SvgColumn, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import LineItem from "@/refresh-components/buttons/LineItem";
import Divider from "@/refresh-components/Divider";
// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------
interface ColumnVisibilityPopoverProps<TData extends RowData = RowData> {
table: Table<TData>;
columnVisibility: VisibilityState;
size?: "regular" | "small";
}
function ColumnVisibilityPopover<TData extends RowData>({
table,
columnVisibility,
size = "regular",
}: ColumnVisibilityPopoverProps<TData>) {
const [open, setOpen] = useState(false);
const hideableColumns = table
.getAllLeafColumns()
.filter((col) => col.getCanHide());
return (
<Popover open={open} onOpenChange={setOpen}>
<Popover.Trigger asChild>
<Button
icon={SvgColumn}
transient={open}
size={size === "small" ? "sm" : "md"}
prominence="internal"
tooltip="Columns"
/>
</Popover.Trigger>
<Popover.Content width="lg" align="end" side="bottom">
<Divider showTitle text="Shown Columns" />
<Popover.Menu>
{hideableColumns.map((column) => {
const isVisible = columnVisibility[column.id] !== false;
const label =
typeof column.columnDef.header === "string"
? column.columnDef.header
: column.id;
return (
<LineItem
key={column.id}
selected={isVisible}
emphasized
rightChildren={isVisible ? <SvgCheck size={16} /> : undefined}
onClick={() => {
column.toggleVisibility();
}}
>
{label}
</LineItem>
);
})}
</Popover.Menu>
</Popover.Content>
</Popover>
);
}
// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------
interface CreateColumnVisibilityColumnOptions {
size?: "regular" | "small";
}
function createColumnVisibilityColumn<TData>(
options?: CreateColumnVisibilityColumnOptions
): ColumnDef<TData, unknown> {
return {
id: "__columnVisibility",
size: 44,
enableHiding: false,
enableSorting: false,
enableResizing: false,
header: ({ table }) => (
<ColumnVisibilityPopover
table={table}
columnVisibility={table.getState().columnVisibility}
size={options?.size}
/>
),
cell: () => null,
};
}
export { ColumnVisibilityPopover, createColumnVisibilityColumn };

View File

@@ -1,260 +0,0 @@
"use client";
import { cn } from "@/lib/utils";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Pagination from "@/refresh-components/table/Pagination";
import { useTableSize } from "@/refresh-components/table/TableSizeContext";
import type { TableSize } from "@/refresh-components/table/TableSizeContext";
import { SvgEye, SvgXCircle } from "@opal/icons";
type SelectionState = "none" | "partial" | "all";
/**
* Footer mode for tables with selectable rows.
* Displays a selection message on the left (with optional view/clear actions)
* and a `count`-type pagination on the right.
*/
interface FooterSelectionModeProps {
mode: "selection";
/** Whether the table supports selecting multiple rows. */
multiSelect: boolean;
/** Current selection state: `"none"`, `"partial"`, or `"all"`. */
selectionState: SelectionState;
/** Number of currently selected items. */
selectedCount: number;
/** If provided, renders a "View" icon button when items are selected. */
onView?: () => void;
/** If provided, renders a "Clear" icon button when items are selected. */
onClear?: () => void;
/** Number of items displayed per page. */
pageSize: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** Controls overall footer sizing. `"regular"` (default) or `"small"`. */
size?: TableSize;
className?: string;
}
/**
* Footer mode for read-only tables (no row selection).
* Displays "Showing X~Y of Z" on the left and a `list`-type pagination
* on the right.
*/
interface FooterSummaryModeProps {
mode: "summary";
/** First item number in the current page (e.g. `1`). */
rangeStart: number;
/** Last item number in the current page (e.g. `25`). */
rangeEnd: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** Controls overall footer sizing. `"regular"` (default) or `"small"`. */
size?: TableSize;
className?: string;
}
/**
* Discriminated union of footer modes.
* Use `mode: "selection"` for tables with selectable rows, or
* `mode: "summary"` for read-only tables.
*/
export type FooterProps = FooterSelectionModeProps | FooterSummaryModeProps;
function getSelectionMessage(
state: SelectionState,
multi: boolean,
count: number
): string {
if (state === "none") {
return multi ? "Select items to continue" : "Select an item to continue";
}
if (!multi) return "Item selected";
return `${count} item${count !== 1 ? "s" : ""} selected`;
}
/**
* Table footer combining status information on the left with pagination on the
* right. Use `mode: "selection"` for tables with selectable rows, or
* `mode: "summary"` for read-only tables.
*/
export default function Footer(props: FooterProps) {
const contextSize = useTableSize();
const resolvedSize = props.size ?? contextSize;
const isSmall = resolvedSize === "small";
return (
<div
className={cn(
"table-footer",
"flex w-full items-center justify-between border-t border-border-01",
props.className
)}
data-size={resolvedSize}
>
{/* Left side */}
<div className="flex items-center gap-1 px-1">
{props.mode === "selection" ? (
<SelectionLeft
selectionState={props.selectionState}
multiSelect={props.multiSelect}
selectedCount={props.selectedCount}
onView={props.onView}
onClear={props.onClear}
isSmall={isSmall}
/>
) : (
<SummaryLeft
rangeStart={props.rangeStart}
rangeEnd={props.rangeEnd}
totalItems={props.totalItems}
isSmall={isSmall}
/>
)}
</div>
{/* Right side */}
<div className="flex items-center gap-2 px-1 py-2">
{props.mode === "selection" ? (
<Pagination
type="count"
pageSize={props.pageSize}
totalItems={props.totalItems}
currentPage={props.currentPage}
totalPages={props.totalPages}
onPageChange={props.onPageChange}
showUnits
size={isSmall ? "sm" : "md"}
/>
) : (
<Pagination
type="list"
currentPage={props.currentPage}
totalPages={props.totalPages}
onPageChange={props.onPageChange}
size={isSmall ? "md" : "lg"}
/>
)}
</div>
</div>
);
}
interface SelectionLeftProps {
selectionState: SelectionState;
multiSelect: boolean;
selectedCount: number;
onView?: () => void;
onClear?: () => void;
isSmall: boolean;
}
function SelectionLeft({
selectionState,
multiSelect,
selectedCount,
onView,
onClear,
isSmall,
}: SelectionLeftProps) {
const message = getSelectionMessage(
selectionState,
multiSelect,
selectedCount
);
const hasSelection = selectionState !== "none";
return (
<div className="flex flex-row gap-1 items-center justify-center w-fit flex-shrink-0 h-fit px-1">
{isSmall ? (
<Text
secondaryAction={hasSelection}
secondaryBody={!hasSelection}
text03
>
{message}
</Text>
) : (
<Text mainUiBody={hasSelection} mainUiMuted={!hasSelection} text03>
{message}
</Text>
)}
{hasSelection && (
<div className="flex flex-row items-center w-fit flex-shrink-0 h-fit">
{onView && (
<Button
icon={SvgEye}
onClick={onView}
tooltip="View"
size={isSmall ? "sm" : "md"}
prominence="tertiary"
/>
)}
{onClear && (
<Button
icon={SvgXCircle}
onClick={onClear}
tooltip="Clear selection"
size={isSmall ? "sm" : "md"}
prominence="tertiary"
/>
)}
</div>
)}
</div>
);
}
interface SummaryLeftProps {
rangeStart: number;
rangeEnd: number;
totalItems: number;
isSmall: boolean;
}
function SummaryLeft({
rangeStart,
rangeEnd,
totalItems,
isSmall,
}: SummaryLeftProps) {
return (
<div className="flex flex-row gap-1 items-center w-fit h-fit px-1">
{isSmall ? (
<Text secondaryBody text03>
Showing{" "}
<Text as="span" secondaryMono text03>
{rangeStart}~{rangeEnd}
</Text>{" "}
of{" "}
<Text as="span" secondaryMono text03>
{totalItems}
</Text>
</Text>
) : (
<Text mainUiMuted text03>
Showing{" "}
<Text as="span" mainUiMono text03>
{rangeStart}~{rangeEnd}
</Text>{" "}
of{" "}
<Text as="span" mainUiMono text03>
{totalItems}
</Text>
</Text>
)}
</div>
);
}

View File

@@ -1,393 +0,0 @@
"use client";
import { Button } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import { cn } from "@/lib/utils";
import { SvgChevronLeft, SvgChevronRight } from "@opal/icons";
type PaginationSize = "lg" | "md" | "sm";
/**
* Minimal page navigation showing `currentPage / totalPages` with prev/next arrows.
* Use when you only need simple forward/backward navigation.
*/
interface SimplePaginationProps {
type: "simple";
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `true`, displays the word "pages" after the page indicator. */
showUnits?: boolean;
/** When `false`, hides the page indicator between the prev/next arrows. Defaults to `true`. */
showPageIndicator?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. */
size?: PaginationSize;
className?: string;
}
/**
* Item-count pagination showing `currentItems of totalItems` with optional page
* controls and a "Go to" button. Use inside table footers that need to communicate
* how many items the user is viewing.
*/
interface CountPaginationProps {
type: "count";
/** Number of items displayed per page. Used to compute the visible range. */
pageSize: number;
/** Total number of items across all pages. */
totalItems: number;
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `false`, hides the page number between the prev/next arrows (arrows still visible). Defaults to `true`. */
showPageIndicator?: boolean;
/** When `true`, renders a "Go to" button. Requires `onGoTo`. */
showGoTo?: boolean;
/** Callback invoked when the "Go to" button is clicked. */
onGoTo?: () => void;
/** When `true`, displays the word "items" after the total count. */
showUnits?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. */
size?: PaginationSize;
className?: string;
}
/**
* Numbered page-list pagination with clickable page buttons and ellipsis
* truncation for large page counts. Does not support `"sm"` size.
*/
interface ListPaginationProps {
type: "list";
/** The 1-based current page number. */
currentPage: number;
/** Total number of pages. */
totalPages: number;
/** Called when the user navigates to a different page. */
onPageChange: (page: number) => void;
/** When `false`, hides the page buttons between the prev/next arrows. Defaults to `true`. */
showPageIndicator?: boolean;
/** Controls button and text sizing. Defaults to `"lg"`. Only `"lg"` and `"md"` are supported. */
size?: Exclude<PaginationSize, "sm">;
className?: string;
}
/**
* Discriminated union of all pagination variants.
* Use the `type` prop to select between `"simple"`, `"count"`, and `"list"`.
*/
export type PaginationProps =
| SimplePaginationProps
| CountPaginationProps
| ListPaginationProps;
function getPageNumbers(currentPage: number, totalPages: number) {
const pages: (number | string)[] = [];
const maxPagesToShow = 7;
if (totalPages <= maxPagesToShow) {
for (let i = 1; i <= totalPages; i++) {
pages.push(i);
}
} else {
pages.push(1);
let startPage = Math.max(2, currentPage - 1);
let endPage = Math.min(totalPages - 1, currentPage + 1);
if (currentPage <= 3) {
endPage = 5;
} else if (currentPage >= totalPages - 2) {
startPage = totalPages - 4;
}
if (startPage > 2) {
if (startPage === 3) {
pages.push(2);
} else {
pages.push("start-ellipsis");
}
}
for (let i = startPage; i <= endPage; i++) {
pages.push(i);
}
if (endPage < totalPages - 1) {
if (endPage === totalPages - 2) {
pages.push(totalPages - 1);
} else {
pages.push("end-ellipsis");
}
}
pages.push(totalPages);
}
return pages;
}
function sizedTextProps(isSmall: boolean, variant: "mono" | "muted") {
if (variant === "mono") {
return isSmall ? { secondaryMono: true } : { mainUiMono: true };
}
return isSmall ? { secondaryBody: true } : { mainUiMuted: true };
}
interface NavButtonsProps {
currentPage: number;
totalPages: number;
onPageChange: (page: number) => void;
size: PaginationSize;
children?: React.ReactNode;
}
function NavButtons({
currentPage,
totalPages,
onPageChange,
size,
children,
}: NavButtonsProps) {
return (
<>
<Button
icon={SvgChevronLeft}
onClick={() => onPageChange(currentPage - 1)}
disabled={currentPage <= 1}
size={size}
prominence="tertiary"
tooltip="Previous page"
/>
{children}
<Button
icon={SvgChevronRight}
onClick={() => onPageChange(currentPage + 1)}
disabled={currentPage >= totalPages}
size={size}
prominence="tertiary"
tooltip="Next page"
/>
</>
);
}
/**
* Table pagination component with three variants: `simple`, `count`, and `list`.
* Pass the `type` prop to select the variant, and the component will render the
* appropriate UI.
*/
export default function Pagination(props: PaginationProps) {
const normalized = { ...props, totalPages: Math.max(1, props.totalPages) };
switch (normalized.type) {
case "simple":
return <SimplePaginationInner {...normalized} />;
case "count":
return <CountPaginationInner {...normalized} />;
case "list":
return <ListPaginationInner {...normalized} />;
}
}
function SimplePaginationInner({
currentPage,
totalPages,
onPageChange,
showUnits,
showPageIndicator = true,
size = "lg",
className,
}: SimplePaginationProps) {
const isSmall = size === "sm";
return (
<div className={cn("flex items-center gap-1", className)}>
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentPage}
<Text as="span" {...sizedTextProps(isSmall, "muted")} text03>
/
</Text>
{totalPages}
</Text>
{showUnits && (
<Text {...sizedTextProps(isSmall, "muted")} text03>
pages
</Text>
)}
</>
)}
</NavButtons>
</div>
);
}
function CountPaginationInner({
pageSize,
totalItems,
currentPage,
totalPages,
onPageChange,
showPageIndicator = true,
showGoTo,
onGoTo,
showUnits,
size = "lg",
className,
}: CountPaginationProps) {
const isSmall = size === "sm";
const rangeStart = totalItems === 0 ? 0 : (currentPage - 1) * pageSize + 1;
const rangeEnd = Math.min(currentPage * pageSize, totalItems);
const currentItems = `${rangeStart}~${rangeEnd}`;
return (
<div className={cn("flex items-center gap-1", className)}>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentItems}
</Text>
<Text {...sizedTextProps(isSmall, "muted")} text03>
of
</Text>
<Text {...sizedTextProps(isSmall, "mono")} text03>
{totalItems}
</Text>
{showUnits && (
<Text {...sizedTextProps(isSmall, "muted")} text03>
items
</Text>
)}
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<Text {...sizedTextProps(isSmall, "mono")} text03>
{currentPage}
</Text>
)}
</NavButtons>
{showGoTo && onGoTo && (
<Button onClick={onGoTo} size={size} prominence="tertiary">
Go to
</Button>
)}
</div>
);
}
interface PageNumberIconProps {
className?: string;
pageNum: number;
isActive: boolean;
isLarge: boolean;
}
function PageNumberIcon({
className: iconClassName,
pageNum,
isActive,
isLarge,
}: PageNumberIconProps) {
return (
<div className={cn(iconClassName, "flex flex-col justify-center")}>
{isLarge ? (
<Text
mainUiBody={isActive}
mainUiMuted={!isActive}
text04={isActive}
text02={!isActive}
>
{pageNum}
</Text>
) : (
<Text
secondaryAction={isActive}
secondaryBody={!isActive}
text04={isActive}
text02={!isActive}
>
{pageNum}
</Text>
)}
</div>
);
}
function ListPaginationInner({
currentPage,
totalPages,
onPageChange,
showPageIndicator = true,
size = "lg",
className,
}: ListPaginationProps) {
const pageNumbers = getPageNumbers(currentPage, totalPages);
const isLarge = size === "lg";
return (
<div className={cn("flex items-center gap-1", className)}>
<NavButtons
currentPage={currentPage}
totalPages={totalPages}
onPageChange={onPageChange}
size={size}
>
{showPageIndicator && (
<div className="flex items-center">
{pageNumbers.map((page) => {
if (typeof page === "string") {
return (
<Text
key={page}
mainUiMuted={isLarge}
secondaryBody={!isLarge}
text03
>
...
</Text>
);
}
const pageNum = page as number;
const isActive = pageNum === currentPage;
return (
<Button
key={pageNum}
onClick={() => onPageChange(pageNum)}
size={size}
prominence="tertiary"
transient={isActive}
icon={({ className: iconClassName }) => (
<PageNumberIcon
className={iconClassName}
pageNum={pageNum}
isActive={isActive}
isLarge={isLarge}
/>
)}
/>
);
})}
</div>
)}
</NavButtons>
</div>
);
}

View File

@@ -1,181 +0,0 @@
"use client";
import { useState } from "react";
import {
type Table,
type ColumnDef,
type RowData,
type SortingState,
} from "@tanstack/react-table";
import { Button } from "@opal/components";
import { SvgArrowUpDown, SvgSortOrder, SvgCheck } from "@opal/icons";
import Popover from "@/refresh-components/Popover";
import Divider from "@/refresh-components/Divider";
import LineItem from "@/refresh-components/buttons/LineItem";
import Text from "@/refresh-components/texts/Text";
// ---------------------------------------------------------------------------
// Popover UI
// ---------------------------------------------------------------------------
interface SortingPopoverProps<TData extends RowData = RowData> {
table: Table<TData>;
sorting: SortingState;
size?: "regular" | "small";
footerText?: string;
ascendingLabel?: string;
descendingLabel?: string;
}
function SortingPopover<TData extends RowData>({
table,
sorting,
size = "regular",
footerText,
ascendingLabel = "Ascending",
descendingLabel = "Descending",
}: SortingPopoverProps<TData>) {
const [open, setOpen] = useState(false);
const sortableColumns = table
.getAllLeafColumns()
.filter((col) => col.getCanSort());
const currentSort = sorting[0] ?? null;
return (
<Popover open={open} onOpenChange={setOpen}>
<Popover.Trigger asChild>
<Button
icon={currentSort === null ? SvgArrowUpDown : SvgSortOrder}
transient={open}
size={size === "small" ? "sm" : "md"}
prominence="internal"
tooltip="Sort"
/>
</Popover.Trigger>
<Popover.Content width="lg" align="end" side="bottom">
<Popover.Menu
footer={
footerText ? (
<div className="px-2 py-1">
<Text secondaryBody text03>
{footerText}
</Text>
</div>
) : undefined
}
>
<Divider showTitle text="Sort by" />
<LineItem
selected={currentSort === null}
emphasized
rightChildren={
currentSort === null ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.resetSorting();
}}
>
Manual Ordering
</LineItem>
{sortableColumns.map((column) => {
const isSorted = currentSort?.id === column.id;
const label =
typeof column.columnDef.header === "string"
? column.columnDef.header
: column.id;
return (
<LineItem
key={column.id}
selected={isSorted}
emphasized
rightChildren={isSorted ? <SvgCheck size={16} /> : undefined}
onClick={() => {
if (isSorted) {
table.resetSorting();
return;
}
column.toggleSorting(false);
}}
>
{label}
</LineItem>
);
})}
{currentSort !== null && (
<>
<Divider showTitle text="Sorting Order" />
<LineItem
selected={!currentSort.desc}
emphasized
rightChildren={
!currentSort.desc ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.setSorting([{ id: currentSort.id, desc: false }]);
}}
>
{ascendingLabel}
</LineItem>
<LineItem
selected={currentSort.desc}
emphasized
rightChildren={
currentSort.desc ? <SvgCheck size={16} /> : undefined
}
onClick={() => {
table.setSorting([{ id: currentSort.id, desc: true }]);
}}
>
{descendingLabel}
</LineItem>
</>
)}
</Popover.Menu>
</Popover.Content>
</Popover>
);
}
// ---------------------------------------------------------------------------
// Column definition factory
// ---------------------------------------------------------------------------
interface CreateSortingColumnOptions {
size?: "regular" | "small";
footerText?: string;
ascendingLabel?: string;
descendingLabel?: string;
}
function createSortingColumn<TData>(
options?: CreateSortingColumnOptions
): ColumnDef<TData, unknown> {
return {
id: "__sorting",
size: 44,
enableHiding: false,
enableSorting: false,
enableResizing: false,
header: ({ table }) => (
<SortingPopover
table={table}
sorting={table.getState().sorting}
size={options?.size}
footerText={options?.footerText}
ascendingLabel={options?.ascendingLabel}
descendingLabel={options?.descendingLabel}
/>
),
cell: () => null,
};
}
export { SortingPopover, createSortingColumn };