mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-12 18:32:56 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b7779bc78 | ||
|
|
bb1c44daff | ||
|
|
f26ecafb51 | ||
|
|
9fdb425c0d | ||
|
|
47e20e89c5 | ||
|
|
8b28c127f2 | ||
|
|
9a861a71ad | ||
|
|
b4bc12f6dc |
@@ -24,13 +24,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y nodejs \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list \
|
||||
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /etc/apt/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends docker-ce-cli docker-compose-plugin gh \
|
||||
&& apt-get install -y --no-install-recommends gh \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# fd-find installs as fdfind on Debian/Ubuntu — symlink to fd
|
||||
|
||||
@@ -6,7 +6,7 @@ A containerized development environment for working on Onyx.
|
||||
|
||||
- Ubuntu 26.04 base image
|
||||
- Node.js 20, uv, Claude Code
|
||||
- Docker CLI, GitHub CLI (`gh`)
|
||||
- GitHub CLI (`gh`)
|
||||
- Neovim, ripgrep, fd, fzf, jq, make, wget, unzip
|
||||
- Zsh as default shell (sources host `~/.zshrc` if available)
|
||||
- Python venv auto-activation
|
||||
@@ -73,19 +73,6 @@ user has read/write access to the bind-mounted workspace:
|
||||
To override the auto-detection, set `DEVCONTAINER_REMOTE_USER` before running
|
||||
`ods dev up`.
|
||||
|
||||
## Docker socket
|
||||
|
||||
The container mounts the host's Docker socket so you can run `docker` commands
|
||||
from inside. `ods dev` auto-detects the socket path and sets `DOCKER_SOCK`:
|
||||
|
||||
| Environment | Socket path |
|
||||
| ----------------------- | ------------------------------ |
|
||||
| Linux (rootless Docker) | `$XDG_RUNTIME_DIR/docker.sock` |
|
||||
| macOS (Docker Desktop) | `~/.docker/run/docker.sock` |
|
||||
| Linux (standard Docker) | `/var/run/docker.sock` |
|
||||
|
||||
To override, set `DOCKER_SOCK` before running `ods dev up`.
|
||||
|
||||
## Firewall
|
||||
|
||||
The container starts with a default-deny firewall (`init-firewall.sh`) that only allows outbound traffic to:
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
"image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
|
||||
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
|
||||
"mounts": [
|
||||
"source=${localEnv:DOCKER_SOCK},target=/var/run/docker.sock,type=bind",
|
||||
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
|
||||
"source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
|
||||
"source=${localEnv:HOME}/.zshrc,target=/home/dev/.zshrc.host,type=bind,readonly",
|
||||
|
||||
@@ -56,9 +56,10 @@ for domain in "${ALLOWED_DOMAINS[@]}"; do
|
||||
done
|
||||
done
|
||||
|
||||
# Detect host network
|
||||
if [[ "${DOCKER_HOST:-}" == "unix://"* ]]; then
|
||||
DOCKER_GATEWAY=$(ip -4 route show | grep "^default" | awk '{print $3}')
|
||||
# Allow traffic to the Docker gateway so the container can reach host services
|
||||
# (e.g. the Onyx stack at localhost:3000, localhost:8080, etc.)
|
||||
DOCKER_GATEWAY=$(ip -4 route show default | awk '{print $3}')
|
||||
if [ -n "$DOCKER_GATEWAY" ]; then
|
||||
if ! ipset add allowed-domains "$DOCKER_GATEWAY/32" -exist 2>&1; then
|
||||
echo "warning: failed to add Docker gateway $DOCKER_GATEWAY to allowlist" >&2
|
||||
fi
|
||||
|
||||
@@ -818,10 +818,7 @@ def translate_history_to_llm_format(
|
||||
)
|
||||
]
|
||||
|
||||
# Add image parts. Each image is preceded by a text tag
|
||||
# carrying its file_id so the LLM can reference the image by
|
||||
# ID when calling tools like generate_image (which expects
|
||||
# reference_image_file_ids to edit a specific image).
|
||||
# Add image parts
|
||||
for img_file in msg.image_files:
|
||||
if img_file.file_type == ChatFileType.IMAGE:
|
||||
try:
|
||||
@@ -829,12 +826,6 @@ def translate_history_to_llm_format(
|
||||
base64_data = img_file.to_base64()
|
||||
image_url = f"data:{image_type};base64,{base64_data}"
|
||||
|
||||
content_parts.append(
|
||||
TextContentPart(
|
||||
type="text",
|
||||
text=f"[attached image — file_id: {img_file.file_id}]",
|
||||
)
|
||||
)
|
||||
image_part = ImageContentPart(
|
||||
type="image_url",
|
||||
image_url=ImageUrlDetail(
|
||||
|
||||
@@ -64,20 +64,9 @@ IMPORTANT: each call to this tool is independent. Variables from previous calls
|
||||
|
||||
GENERATE_IMAGE_GUIDANCE = """
|
||||
## generate_image
|
||||
NEVER use generate_image unless the user specifically requests an image or asks to
|
||||
edit/modify an existing image in the conversation.
|
||||
To edit, modify, restyle, or create a variation of an image already in the
|
||||
conversation, put that image's file_id in `reference_image_file_ids`. File IDs come
|
||||
from two places, and both can be passed the same way:
|
||||
- Images the user attached to a message carry a `[attached image — file_id: <id>]`
|
||||
tag immediately before the image content. Copy the id out of that tag.
|
||||
- Images produced by previous `generate_image` calls have their file_id in that
|
||||
call's tool response JSON.
|
||||
Only pass file_ids that actually appear in the conversation — never invent or guess
|
||||
one. Leave `reference_image_file_ids` unset for a brand-new generation that doesn't
|
||||
edit any existing image (for example when the user attached an image for context but
|
||||
asked for a completely unrelated new picture). The first file_id in the list is the
|
||||
primary edit source; any later file_ids are additional reference context.
|
||||
NEVER use generate_image unless the user specifically requests an image.
|
||||
For edits/variations of a previously generated image, pass `reference_image_file_ids` with
|
||||
the `file_id` values returned by earlier `generate_image` tool results.
|
||||
""".lstrip()
|
||||
|
||||
MEMORY_GUIDANCE = """
|
||||
|
||||
@@ -208,6 +208,12 @@ class PythonToolOverrideKwargs(BaseModel):
|
||||
chat_files: list[ChatFile] = []
|
||||
|
||||
|
||||
class ImageGenerationToolOverrideKwargs(BaseModel):
|
||||
"""Override kwargs for image generation tool calls."""
|
||||
|
||||
recent_generated_image_file_ids: list[str] = []
|
||||
|
||||
|
||||
class SearchToolRunContext(BaseModel):
|
||||
emitter: Emitter
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ from onyx.server.query_and_chat.streaming_models import ImageGenerationToolHeart
|
||||
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
|
||||
from onyx.server.query_and_chat.streaming_models import Packet
|
||||
from onyx.tools.interface import Tool
|
||||
from onyx.tools.models import ImageGenerationToolOverrideKwargs
|
||||
from onyx.tools.models import ToolCallException
|
||||
from onyx.tools.models import ToolExecutionException
|
||||
from onyx.tools.models import ToolResponse
|
||||
@@ -47,16 +48,9 @@ PROMPT_FIELD = "prompt"
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD = "reference_image_file_ids"
|
||||
|
||||
|
||||
class ImageGenerationTool(Tool[None]):
|
||||
class ImageGenerationTool(Tool[ImageGenerationToolOverrideKwargs | None]):
|
||||
NAME = "generate_image"
|
||||
DESCRIPTION = (
|
||||
"Generate a new image from a prompt, or edit/modify existing images"
|
||||
" from this conversation. To edit existing images — whether the user"
|
||||
" attached them or they were produced by a previous generate_image"
|
||||
" call — pass their file_id values in `reference_image_file_ids`."
|
||||
" Do not use unless the user specifically requests an image or asks"
|
||||
" to edit an image."
|
||||
)
|
||||
DESCRIPTION = "Generate an image based on a prompt. Do not use unless the user specifically requests an image."
|
||||
DISPLAY_NAME = "Image Generation"
|
||||
|
||||
def __init__(
|
||||
@@ -148,14 +142,8 @@ class ImageGenerationTool(Tool[None]):
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD: {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"Optional list of image file_id values to edit/modify/use as reference."
|
||||
" Accepts file_ids from two sources, with the same mechanics for both:"
|
||||
" (1) images the user attached to a user message — their file_id appears"
|
||||
" in the tag `[attached image — file_id: <id>]` right before the image"
|
||||
" in that message; (2) images returned by previous generate_image tool"
|
||||
" calls — their file_id appears in that call's response JSON. Leave"
|
||||
" unset/empty for a brand-new generation unrelated to any existing image."
|
||||
" The first file_id in the list is treated as the primary edit source."
|
||||
"Optional image file IDs to use as reference context for edits/variations. "
|
||||
"Use the file_id values returned by previous generate_image calls."
|
||||
),
|
||||
"items": {
|
||||
"type": "string",
|
||||
@@ -266,31 +254,41 @@ class ImageGenerationTool(Tool[None]):
|
||||
def _resolve_reference_image_file_ids(
|
||||
self,
|
||||
llm_kwargs: dict[str, Any],
|
||||
override_kwargs: ImageGenerationToolOverrideKwargs | None,
|
||||
) -> list[str]:
|
||||
raw_reference_ids = llm_kwargs.get(REFERENCE_IMAGE_FILE_IDS_FIELD)
|
||||
if raw_reference_ids is None:
|
||||
# No references requested — plain generation.
|
||||
return []
|
||||
|
||||
if not isinstance(raw_reference_ids, list) or not all(
|
||||
isinstance(file_id, str) for file_id in raw_reference_ids
|
||||
if raw_reference_ids is not None:
|
||||
if not isinstance(raw_reference_ids, list) or not all(
|
||||
isinstance(file_id, str) for file_id in raw_reference_ids
|
||||
):
|
||||
raise ToolCallException(
|
||||
message=(
|
||||
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
|
||||
),
|
||||
llm_facing_message=(
|
||||
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
|
||||
),
|
||||
)
|
||||
reference_image_file_ids = [
|
||||
file_id.strip() for file_id in raw_reference_ids if file_id.strip()
|
||||
]
|
||||
elif (
|
||||
override_kwargs
|
||||
and override_kwargs.recent_generated_image_file_ids
|
||||
and self.img_provider.supports_reference_images
|
||||
):
|
||||
raise ToolCallException(
|
||||
message=(
|
||||
f"Invalid {REFERENCE_IMAGE_FILE_IDS_FIELD}: expected array of strings, got {type(raw_reference_ids)}"
|
||||
),
|
||||
llm_facing_message=(
|
||||
f"The '{REFERENCE_IMAGE_FILE_IDS_FIELD}' field must be an array of file_id strings."
|
||||
),
|
||||
)
|
||||
# If no explicit reference was provided, default to the most recently generated image.
|
||||
reference_image_file_ids = [
|
||||
override_kwargs.recent_generated_image_file_ids[-1]
|
||||
]
|
||||
else:
|
||||
reference_image_file_ids = []
|
||||
|
||||
# Deduplicate while preserving order (first occurrence wins, so the
|
||||
# LLM's intended "primary edit source" stays at index 0).
|
||||
# Deduplicate while preserving order.
|
||||
deduped_reference_image_ids: list[str] = []
|
||||
seen_ids: set[str] = set()
|
||||
for file_id in raw_reference_ids:
|
||||
file_id = file_id.strip()
|
||||
if not file_id or file_id in seen_ids:
|
||||
for file_id in reference_image_file_ids:
|
||||
if file_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(file_id)
|
||||
deduped_reference_image_ids.append(file_id)
|
||||
@@ -304,14 +302,14 @@ class ImageGenerationTool(Tool[None]):
|
||||
f"Reference images requested but provider '{self.provider}' does not support image-editing context."
|
||||
),
|
||||
llm_facing_message=(
|
||||
"This image provider does not support editing from existing images. "
|
||||
"This image provider does not support editing from previous image context. "
|
||||
"Try text-only generation, or switch to a provider/model that supports image edits."
|
||||
),
|
||||
)
|
||||
|
||||
max_reference_images = self.img_provider.max_reference_images
|
||||
if max_reference_images > 0:
|
||||
return deduped_reference_image_ids[:max_reference_images]
|
||||
return deduped_reference_image_ids[-max_reference_images:]
|
||||
return deduped_reference_image_ids
|
||||
|
||||
def _load_reference_images(
|
||||
@@ -360,7 +358,7 @@ class ImageGenerationTool(Tool[None]):
|
||||
def run(
|
||||
self,
|
||||
placement: Placement,
|
||||
override_kwargs: None = None, # noqa: ARG002
|
||||
override_kwargs: ImageGenerationToolOverrideKwargs | None = None,
|
||||
**llm_kwargs: Any,
|
||||
) -> ToolResponse:
|
||||
if PROMPT_FIELD not in llm_kwargs:
|
||||
@@ -375,6 +373,7 @@ class ImageGenerationTool(Tool[None]):
|
||||
shape = ImageShape(llm_kwargs.get("shape", ImageShape.SQUARE.value))
|
||||
reference_image_file_ids = self._resolve_reference_image_file_ids(
|
||||
llm_kwargs=llm_kwargs,
|
||||
override_kwargs=override_kwargs,
|
||||
)
|
||||
reference_images = self._load_reference_images(reference_image_file_ids)
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import traceback
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
@@ -13,6 +14,7 @@ from onyx.server.query_and_chat.streaming_models import SectionEnd
|
||||
from onyx.tools.interface import Tool
|
||||
from onyx.tools.models import ChatFile
|
||||
from onyx.tools.models import ChatMinimalTextMessage
|
||||
from onyx.tools.models import ImageGenerationToolOverrideKwargs
|
||||
from onyx.tools.models import OpenURLToolOverrideKwargs
|
||||
from onyx.tools.models import ParallelToolCallResponse
|
||||
from onyx.tools.models import PythonToolOverrideKwargs
|
||||
@@ -22,6 +24,9 @@ from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tools.models import ToolExecutionException
|
||||
from onyx.tools.models import ToolResponse
|
||||
from onyx.tools.models import WebSearchToolOverrideKwargs
|
||||
from onyx.tools.tool_implementations.images.image_generation_tool import (
|
||||
ImageGenerationTool,
|
||||
)
|
||||
from onyx.tools.tool_implementations.memory.memory_tool import MemoryTool
|
||||
from onyx.tools.tool_implementations.memory.memory_tool import MemoryToolOverrideKwargs
|
||||
from onyx.tools.tool_implementations.open_url.open_url_tool import OpenURLTool
|
||||
@@ -105,6 +110,63 @@ def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff
|
||||
return merged_calls
|
||||
|
||||
|
||||
def _extract_image_file_ids_from_tool_response_message(
|
||||
message: str,
|
||||
) -> list[str]:
|
||||
try:
|
||||
parsed_message = json.loads(message)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
parsed_items: list[Any] = (
|
||||
parsed_message if isinstance(parsed_message, list) else [parsed_message]
|
||||
)
|
||||
file_ids: list[str] = []
|
||||
for item in parsed_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
file_id = item.get("file_id")
|
||||
if isinstance(file_id, str):
|
||||
file_ids.append(file_id)
|
||||
|
||||
return file_ids
|
||||
|
||||
|
||||
def _extract_recent_generated_image_file_ids(
|
||||
message_history: list[ChatMessageSimple],
|
||||
) -> list[str]:
|
||||
tool_name_by_tool_call_id: dict[str, str] = {}
|
||||
recent_image_file_ids: list[str] = []
|
||||
seen_file_ids: set[str] = set()
|
||||
|
||||
for message in message_history:
|
||||
if message.message_type == MessageType.ASSISTANT and message.tool_calls:
|
||||
for tool_call in message.tool_calls:
|
||||
tool_name_by_tool_call_id[tool_call.tool_call_id] = tool_call.tool_name
|
||||
continue
|
||||
|
||||
if (
|
||||
message.message_type != MessageType.TOOL_CALL_RESPONSE
|
||||
or not message.tool_call_id
|
||||
):
|
||||
continue
|
||||
|
||||
tool_name = tool_name_by_tool_call_id.get(message.tool_call_id)
|
||||
if tool_name != ImageGenerationTool.NAME:
|
||||
continue
|
||||
|
||||
for file_id in _extract_image_file_ids_from_tool_response_message(
|
||||
message.message
|
||||
):
|
||||
if file_id in seen_file_ids:
|
||||
continue
|
||||
seen_file_ids.add(file_id)
|
||||
recent_image_file_ids.append(file_id)
|
||||
|
||||
return recent_image_file_ids
|
||||
|
||||
|
||||
def _safe_run_single_tool(
|
||||
tool: Tool,
|
||||
tool_call: ToolCallKickoff,
|
||||
@@ -324,6 +386,9 @@ def run_tool_calls(
|
||||
url_to_citation: dict[str, int] = {
|
||||
url: citation_num for citation_num, url in citation_mapping.items()
|
||||
}
|
||||
recent_generated_image_file_ids = _extract_recent_generated_image_file_ids(
|
||||
message_history
|
||||
)
|
||||
|
||||
# Prepare all tool calls with their override_kwargs
|
||||
# Each tool gets a unique starting citation number to avoid conflicts when running in parallel
|
||||
@@ -340,6 +405,7 @@ def run_tool_calls(
|
||||
| WebSearchToolOverrideKwargs
|
||||
| OpenURLToolOverrideKwargs
|
||||
| PythonToolOverrideKwargs
|
||||
| ImageGenerationToolOverrideKwargs
|
||||
| MemoryToolOverrideKwargs
|
||||
| None
|
||||
) = None
|
||||
@@ -388,6 +454,10 @@ def run_tool_calls(
|
||||
override_kwargs = PythonToolOverrideKwargs(
|
||||
chat_files=chat_files or [],
|
||||
)
|
||||
elif isinstance(tool, ImageGenerationTool):
|
||||
override_kwargs = ImageGenerationToolOverrideKwargs(
|
||||
recent_generated_image_file_ids=recent_generated_image_file_ids
|
||||
)
|
||||
elif isinstance(tool, MemoryTool):
|
||||
override_kwargs = MemoryToolOverrideKwargs(
|
||||
user_name=(
|
||||
|
||||
769
backend/tests/unit/onyx/indexing/test_document_chunker.py
Normal file
769
backend/tests/unit/onyx/indexing/test_document_chunker.py
Normal file
@@ -0,0 +1,769 @@
|
||||
"""Unit tests for Chunker._chunk_document_with_sections.
|
||||
|
||||
These tests use a fake character-level tokenizer so every char counts as
|
||||
exactly one token. This makes token-limit arithmetic deterministic and lets
|
||||
us exercise every branch of the method without pulling real embedding
|
||||
models into the test.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import SECTION_SEPARATOR
|
||||
from onyx.connectors.models import IndexingDocument
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing import chunker as chunker_module
|
||||
from onyx.indexing.chunker import Chunker
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
|
||||
|
||||
class CharTokenizer(BaseTokenizer):
|
||||
"""1 character == 1 token. Deterministic & trivial to reason about."""
|
||||
|
||||
def encode(self, string: str) -> list[int]:
|
||||
return [ord(c) for c in string]
|
||||
|
||||
def tokenize(self, string: str) -> list[str]:
|
||||
return list(string)
|
||||
|
||||
def decode(self, tokens: list[int]) -> str:
|
||||
return "".join(chr(t) for t in tokens)
|
||||
|
||||
|
||||
# With a char-level tokenizer, each char is a token. 200 is comfortably
|
||||
# above BLURB_SIZE (128) so the blurb splitter won't get weird on small text.
|
||||
CHUNK_LIMIT = 200
|
||||
|
||||
|
||||
def _make_chunker(
|
||||
chunk_token_limit: int = CHUNK_LIMIT,
|
||||
enable_multipass: bool = False,
|
||||
) -> Chunker:
|
||||
return Chunker(
|
||||
tokenizer=CharTokenizer(),
|
||||
enable_multipass=enable_multipass,
|
||||
enable_large_chunks=False,
|
||||
enable_contextual_rag=False,
|
||||
chunk_token_limit=chunk_token_limit,
|
||||
)
|
||||
|
||||
|
||||
def _make_doc(
|
||||
sections: list[Section],
|
||||
title: str | None = "Test Doc",
|
||||
doc_id: str = "doc1",
|
||||
) -> IndexingDocument:
|
||||
return IndexingDocument(
|
||||
id=doc_id,
|
||||
source=DocumentSource.WEB,
|
||||
semantic_identifier=doc_id,
|
||||
title=title,
|
||||
metadata={},
|
||||
sections=[], # real sections unused — method reads processed_sections
|
||||
processed_sections=sections,
|
||||
)
|
||||
|
||||
|
||||
# --- Empty / degenerate input -------------------------------------------------
|
||||
|
||||
|
||||
def test_empty_processed_sections_returns_single_empty_safety_chunk() -> None:
|
||||
"""No sections at all should still yield one empty chunk (the
|
||||
`or not chunks` safety branch at the end)."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(sections=[])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=[],
|
||||
title_prefix="TITLE\n",
|
||||
metadata_suffix_semantic="meta_sem",
|
||||
metadata_suffix_keyword="meta_kw",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == ""
|
||||
assert chunks[0].chunk_id == 0
|
||||
assert chunks[0].title_prefix == "TITLE\n"
|
||||
assert chunks[0].metadata_suffix_semantic == "meta_sem"
|
||||
assert chunks[0].metadata_suffix_keyword == "meta_kw"
|
||||
# safe default link offsets
|
||||
assert chunks[0].source_links == {0: ""}
|
||||
|
||||
|
||||
def test_empty_section_on_first_position_without_title_is_skipped() -> None:
|
||||
"""Doc has no title, first section has empty text — the guard
|
||||
`(not document.title or section_idx > 0)` means it IS skipped."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[Section(text="", link="l0")],
|
||||
title=None,
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# skipped → no real content, but safety branch still yields 1 empty chunk
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == ""
|
||||
|
||||
|
||||
def test_empty_section_on_later_position_is_skipped_even_with_title() -> None:
|
||||
"""Index > 0 empty sections are skipped regardless of title."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="Alpha.", link="l0"),
|
||||
Section(text="", link="l1"), # should be skipped
|
||||
Section(text="Beta.", link="l2"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert "Alpha." in chunks[0].content
|
||||
assert "Beta." in chunks[0].content
|
||||
# link offsets should only contain l0 and l2 (no l1)
|
||||
assert "l1" not in (chunks[0].source_links or {}).values()
|
||||
|
||||
|
||||
# --- Single text section ------------------------------------------------------
|
||||
|
||||
|
||||
def test_single_small_text_section_becomes_one_chunk() -> None:
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(sections=[Section(text="Hello world.", link="https://a")])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="TITLE\n",
|
||||
metadata_suffix_semantic="ms",
|
||||
metadata_suffix_keyword="mk",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
chunk = chunks[0]
|
||||
assert chunk.content == "Hello world."
|
||||
assert chunk.source_links == {0: "https://a"}
|
||||
assert chunk.title_prefix == "TITLE\n"
|
||||
assert chunk.metadata_suffix_semantic == "ms"
|
||||
assert chunk.metadata_suffix_keyword == "mk"
|
||||
assert chunk.section_continuation is False
|
||||
assert chunk.image_file_id is None
|
||||
|
||||
|
||||
# --- Multiple text sections combined -----------------------------------------
|
||||
|
||||
|
||||
def test_multiple_small_sections_combine_into_one_chunk() -> None:
|
||||
chunker = _make_chunker()
|
||||
sections = [
|
||||
Section(text="Part one.", link="l1"),
|
||||
Section(text="Part two.", link="l2"),
|
||||
Section(text="Part three.", link="l3"),
|
||||
]
|
||||
doc = _make_doc(sections=sections)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
expected = SECTION_SEPARATOR.join(["Part one.", "Part two.", "Part three."])
|
||||
assert chunks[0].content == expected
|
||||
|
||||
# link_offsets: indexed by shared_precompare_cleanup length of the
|
||||
# chunk_text *before* each section was appended.
|
||||
# "" -> "", len 0
|
||||
# "Part one." -> "partone", len 7
|
||||
# "Part one.\n\nPart two." -> "partoneparttwo", len 14
|
||||
assert chunks[0].source_links == {0: "l1", 7: "l2", 14: "l3"}
|
||||
|
||||
|
||||
def test_sections_overflow_into_second_chunk() -> None:
|
||||
"""Two sections that together exceed content_token_limit should
|
||||
finalize the first as one chunk and start a new one."""
|
||||
chunker = _make_chunker()
|
||||
# char-level: 120 char section → 120 tokens. 2 of these plus separator
|
||||
# exceed a 200-token limit, forcing a flush.
|
||||
a = "A" * 120
|
||||
b = "B" * 120
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text=a, link="la"),
|
||||
Section(text=b, link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == a
|
||||
assert chunks[1].content == b
|
||||
# first chunk is not a continuation; second starts a new section → not either
|
||||
assert chunks[0].section_continuation is False
|
||||
assert chunks[1].section_continuation is False
|
||||
# chunk_ids should be sequential starting at 0
|
||||
assert chunks[0].chunk_id == 0
|
||||
assert chunks[1].chunk_id == 1
|
||||
# links routed appropriately
|
||||
assert chunks[0].source_links == {0: "la"}
|
||||
assert chunks[1].source_links == {0: "lb"}
|
||||
|
||||
|
||||
# --- Image section handling --------------------------------------------------
|
||||
|
||||
|
||||
def test_image_only_section_produces_single_chunk_with_image_id() -> None:
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(
|
||||
text="summary of image",
|
||||
link="https://img",
|
||||
image_file_id="img-abc",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].image_file_id == "img-abc"
|
||||
assert chunks[0].content == "summary of image"
|
||||
assert chunks[0].source_links == {0: "https://img"}
|
||||
|
||||
|
||||
def test_image_section_flushes_pending_text_and_creates_its_own_chunk() -> None:
|
||||
"""A buffered text section followed by an image section:
|
||||
the pending text should be flushed first, then the image chunk."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="Pending text.", link="ltext"),
|
||||
Section(
|
||||
text="image summary",
|
||||
link="limage",
|
||||
image_file_id="img-1",
|
||||
),
|
||||
Section(text="Trailing text.", link="ltail"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 3
|
||||
|
||||
# 0: flushed pending text
|
||||
assert chunks[0].content == "Pending text."
|
||||
assert chunks[0].image_file_id is None
|
||||
assert chunks[0].source_links == {0: "ltext"}
|
||||
|
||||
# 1: image chunk
|
||||
assert chunks[1].content == "image summary"
|
||||
assert chunks[1].image_file_id == "img-1"
|
||||
assert chunks[1].source_links == {0: "limage"}
|
||||
|
||||
# 2: trailing text, started fresh after image
|
||||
assert chunks[2].content == "Trailing text."
|
||||
assert chunks[2].image_file_id is None
|
||||
assert chunks[2].source_links == {0: "ltail"}
|
||||
|
||||
|
||||
def test_image_section_without_link_gets_empty_links_dict() -> None:
|
||||
"""If an image section has no link, links param is {} (not {0: ""})."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="img", link=None, image_file_id="img-xyz"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].image_file_id == "img-xyz"
|
||||
# _create_chunk falls back to {0: ""} when given an empty dict
|
||||
assert chunks[0].source_links == {0: ""}
|
||||
|
||||
|
||||
# --- Oversized section splitting ---------------------------------------------
|
||||
|
||||
|
||||
def test_oversized_section_is_split_across_multiple_chunks() -> None:
|
||||
"""A section whose text exceeds content_token_limit should be passed
|
||||
through chunk_splitter and yield >1 chunks; only the first is not a
|
||||
continuation."""
|
||||
chunker = _make_chunker()
|
||||
# Build a section whose char-count is well over CHUNK_LIMIT (200), made
|
||||
# of many short sentences so chonkie's SentenceChunker can split cleanly.
|
||||
section_text = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. "
|
||||
"Sixteen seventeen eighteen. Nineteen twenty."
|
||||
)
|
||||
assert len(section_text) > CHUNK_LIMIT
|
||||
|
||||
doc = _make_doc(
|
||||
sections=[Section(text=section_text, link="big-link")],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) >= 2
|
||||
# First chunk is fresh, rest are continuations
|
||||
assert chunks[0].section_continuation is False
|
||||
for c in chunks[1:]:
|
||||
assert c.section_continuation is True
|
||||
# Every produced chunk should carry the section's link
|
||||
for c in chunks:
|
||||
assert c.source_links == {0: "big-link"}
|
||||
# Concatenated content should roughly cover the original (allowing
|
||||
# for chunker boundary whitespace differences).
|
||||
joined = "".join(c.content for c in chunks)
|
||||
for word in ("Alpha", "omega", "twenty"):
|
||||
assert word in joined
|
||||
|
||||
|
||||
def test_oversized_section_flushes_pending_text_first() -> None:
|
||||
"""A buffered text section followed by an oversized section should
|
||||
flush the pending chunk first, then emit the split chunks."""
|
||||
chunker = _make_chunker()
|
||||
pending = "Pending buffered text."
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text=pending, link="l-pending"),
|
||||
Section(text=big, link="l-big"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# First chunk is the flushed pending text
|
||||
assert chunks[0].content == pending
|
||||
assert chunks[0].source_links == {0: "l-pending"}
|
||||
assert chunks[0].section_continuation is False
|
||||
|
||||
# Remaining chunks correspond to the oversized section
|
||||
assert len(chunks) >= 2
|
||||
for c in chunks[1:]:
|
||||
assert c.source_links == {0: "l-big"}
|
||||
# Within the oversized section, the first is fresh and the rest are
|
||||
# continuations.
|
||||
assert chunks[1].section_continuation is False
|
||||
for c in chunks[2:]:
|
||||
assert c.section_continuation is True
|
||||
|
||||
|
||||
# --- Title prefix / metadata propagation -------------------------------------
|
||||
|
||||
|
||||
def test_title_prefix_and_metadata_propagate_to_all_chunks() -> None:
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="A" * 120, link="la"),
|
||||
Section(text="B" * 120, link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="MY_TITLE\n",
|
||||
metadata_suffix_semantic="MS",
|
||||
metadata_suffix_keyword="MK",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
for chunk in chunks:
|
||||
assert chunk.title_prefix == "MY_TITLE\n"
|
||||
assert chunk.metadata_suffix_semantic == "MS"
|
||||
assert chunk.metadata_suffix_keyword == "MK"
|
||||
|
||||
|
||||
# --- chunk_id monotonicity ---------------------------------------------------
|
||||
|
||||
|
||||
def test_chunk_ids_are_sequential_starting_at_zero() -> None:
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="A" * 120, link="la"),
|
||||
Section(text="B" * 120, link="lb"),
|
||||
Section(text="C" * 120, link="lc"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert [c.chunk_id for c in chunks] == list(range(len(chunks)))
|
||||
|
||||
|
||||
# --- Overflow accumulation behavior ------------------------------------------
|
||||
|
||||
|
||||
def test_overflow_flush_then_subsequent_section_joins_new_chunk() -> None:
|
||||
"""After an overflow flush starts a new chunk, the next fitting section
|
||||
should combine into that same new chunk (not spawn a third)."""
|
||||
chunker = _make_chunker()
|
||||
# 120 + 120 > 200 → first two sections produce two chunks.
|
||||
# Third section is small (20 chars) → should fit with second.
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="A" * 120, link="la"),
|
||||
Section(text="B" * 120, link="lb"),
|
||||
Section(text="C" * 20, link="lc"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == "A" * 120
|
||||
assert chunks[1].content == ("B" * 120) + SECTION_SEPARATOR + ("C" * 20)
|
||||
# link_offsets on second chunk: lb at 0, lc at precompare-len("BBBB...")=120
|
||||
assert chunks[1].source_links == {0: "lb", 120: "lc"}
|
||||
|
||||
|
||||
def test_small_section_after_oversized_starts_a_fresh_chunk() -> None:
|
||||
"""After an oversized section is emitted as its own chunks, the internal
|
||||
accumulator should be empty so a following small section starts a new
|
||||
chunk instead of being swallowed."""
|
||||
chunker = _make_chunker()
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text=big, link="l-big"),
|
||||
Section(text="Tail text.", link="l-tail"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# All-but-last chunks belong to the oversized section; the very last is
|
||||
# the tail text starting fresh (not a continuation).
|
||||
assert len(chunks) >= 2
|
||||
assert chunks[-1].content == "Tail text."
|
||||
assert chunks[-1].source_links == {0: "l-tail"}
|
||||
assert chunks[-1].section_continuation is False
|
||||
# And earlier oversized chunks never leaked the tail link
|
||||
for c in chunks[:-1]:
|
||||
assert c.source_links == {0: "l-big"}
|
||||
|
||||
|
||||
# --- STRICT_CHUNK_TOKEN_LIMIT fallback path ----------------------------------
|
||||
|
||||
|
||||
def test_strict_chunk_token_limit_subdivides_oversized_split(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""When STRICT_CHUNK_TOKEN_LIMIT is enabled and chonkie's chunk_splitter
|
||||
still produces a piece larger than content_token_limit (e.g. a single
|
||||
no-period run), the code must fall back to _split_oversized_chunk."""
|
||||
monkeypatch.setattr(chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", True)
|
||||
chunker = _make_chunker()
|
||||
# 500 non-whitespace chars with no sentence boundaries — chonkie will
|
||||
# return it as one oversized piece (>200) which triggers the fallback.
|
||||
run = "a" * 500
|
||||
doc = _make_doc(sections=[Section(text=run, link="l-run")])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# With CHUNK_LIMIT=200 and a 500-char run we expect ceil(500/200)=3 sub-chunks.
|
||||
assert len(chunks) == 3
|
||||
# First is fresh, rest are continuations (is_continuation=(j != 0))
|
||||
assert chunks[0].section_continuation is False
|
||||
assert chunks[1].section_continuation is True
|
||||
assert chunks[2].section_continuation is True
|
||||
# All carry the section link
|
||||
for c in chunks:
|
||||
assert c.source_links == {0: "l-run"}
|
||||
# NOTE: we do NOT assert the chunks are at or below content_token_limit.
|
||||
# _split_oversized_chunk joins tokens with " ", which means the resulting
|
||||
# chunk contents can exceed the limit when tokens are short. That's a
|
||||
# quirk of the current implementation and this test pins the window
|
||||
# slicing, not the post-join length.
|
||||
|
||||
|
||||
def test_strict_chunk_token_limit_disabled_allows_oversized_split(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Same pathological input, but with STRICT disabled: the oversized
|
||||
split is emitted verbatim as a single chunk (current behavior)."""
|
||||
monkeypatch.setattr(chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", False)
|
||||
chunker = _make_chunker()
|
||||
run = "a" * 500
|
||||
doc = _make_doc(sections=[Section(text=run, link="l-run")])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == run
|
||||
assert chunks[0].section_continuation is False
|
||||
|
||||
|
||||
# --- First-section-with-empty-text-but-document-has-title edge case ----------
|
||||
|
||||
|
||||
def test_first_empty_section_with_title_is_processed_not_skipped() -> None:
|
||||
"""The guard `(not document.title or section_idx > 0)` means: when
|
||||
the doc has a title AND it's the first section, an empty text section
|
||||
is NOT skipped. This pins current behavior so a refactor can't silently
|
||||
change it."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="", link="l0"), # empty first section, kept
|
||||
Section(text="Real content.", link="l1"),
|
||||
],
|
||||
title="Has A Title",
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == "Real content."
|
||||
# First (empty) section did register a link_offset at 0 before being
|
||||
# overwritten; that offset is then reused when "Real content." is added,
|
||||
# because shared_precompare_cleanup("") is still "". End state: {0: "l1"}
|
||||
assert chunks[0].source_links == {0: "l1"}
|
||||
|
||||
|
||||
# --- clean_text is applied to section text -----------------------------------
|
||||
|
||||
|
||||
def test_clean_text_strips_control_chars_from_section_content() -> None:
|
||||
"""clean_text() should remove control chars before the text enters the
|
||||
accumulator — verifies the call isn't dropped by a refactor."""
|
||||
chunker = _make_chunker()
|
||||
# NUL + BEL are control chars below 0x20 and not \n or \t → should be
|
||||
# stripped by clean_text.
|
||||
dirty = "Hello\x00 World\x07!"
|
||||
doc = _make_doc(sections=[Section(text=dirty, link="l1")])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == "Hello World!"
|
||||
|
||||
|
||||
# --- None-valued fields ------------------------------------------------------
|
||||
|
||||
|
||||
def test_section_with_none_text_behaves_like_empty_string() -> None:
|
||||
"""`section.text` may be None — the method coerces via
|
||||
`str(section.text or "")`, so a None-text section behaves identically
|
||||
to an empty one (skipped unless it's the first section of a titled doc)."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="Alpha.", link="la"),
|
||||
Section(text=None, link="lnone"), # idx 1 → skipped
|
||||
Section(text="Beta.", link="lb"),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert "Alpha." in chunks[0].content
|
||||
assert "Beta." in chunks[0].content
|
||||
assert "lnone" not in (chunks[0].source_links or {}).values()
|
||||
|
||||
|
||||
# --- Trailing empty chunk suppression ----------------------------------------
|
||||
|
||||
|
||||
def test_no_trailing_empty_chunk_when_last_section_was_image() -> None:
|
||||
"""If the final section was an image (which emits its own chunk and
|
||||
resets chunk_text), the safety `or not chunks` branch should NOT fire
|
||||
because chunks is non-empty. Pin this explicitly."""
|
||||
chunker = _make_chunker()
|
||||
doc = _make_doc(
|
||||
sections=[
|
||||
Section(text="Leading text.", link="ltext"),
|
||||
Section(
|
||||
text="img summary", link="limg", image_file_id="img-final"
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
assert len(chunks) == 2
|
||||
assert chunks[0].content == "Leading text."
|
||||
assert chunks[0].image_file_id is None
|
||||
assert chunks[1].content == "img summary"
|
||||
assert chunks[1].image_file_id == "img-final"
|
||||
# Crucially: no third empty chunk got appended at the end.
|
||||
|
||||
|
||||
def test_no_trailing_empty_chunk_when_last_section_was_oversized() -> None:
|
||||
"""Same guarantee for oversized sections: their splits fully clear the
|
||||
accumulator, and the trailing safety branch should be a no-op."""
|
||||
chunker = _make_chunker()
|
||||
big = (
|
||||
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
|
||||
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
|
||||
"Chi psi omega. One two three. Four five six. Seven eight nine. "
|
||||
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
|
||||
)
|
||||
assert len(big) > CHUNK_LIMIT
|
||||
doc = _make_doc(sections=[Section(text=big, link="l-big")])
|
||||
|
||||
chunks = chunker._chunk_document_with_sections(
|
||||
document=doc,
|
||||
sections=doc.processed_sections,
|
||||
title_prefix="",
|
||||
metadata_suffix_semantic="",
|
||||
metadata_suffix_keyword="",
|
||||
content_token_limit=CHUNK_LIMIT,
|
||||
)
|
||||
|
||||
# Every chunk should be non-empty — no dangling "" chunk at the tail.
|
||||
assert all(c.content.strip() for c in chunks)
|
||||
@@ -1,115 +0,0 @@
|
||||
"""Tests for ``ImageGenerationTool._resolve_reference_image_file_ids``.
|
||||
|
||||
The resolver turns the LLM's ``reference_image_file_ids`` argument into a
|
||||
cleaned list of file IDs to hand to ``_load_reference_images``. It trusts
|
||||
the LLM's picks — the LLM can only see file IDs that actually appear in
|
||||
the conversation (via ``[attached image — file_id: <id>]`` tags on user
|
||||
messages and the JSON returned by prior generate_image calls), so we
|
||||
don't re-validate against an allow-list in the tool itself.
|
||||
"""
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.tools.models import ToolCallException
|
||||
from onyx.tools.tool_implementations.images.image_generation_tool import (
|
||||
ImageGenerationTool,
|
||||
)
|
||||
from onyx.tools.tool_implementations.images.image_generation_tool import (
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD,
|
||||
)
|
||||
|
||||
|
||||
def _make_tool(
|
||||
supports_reference_images: bool = True,
|
||||
max_reference_images: int = 16,
|
||||
) -> ImageGenerationTool:
|
||||
"""Construct a tool with a mock provider so no credentials/network are needed."""
|
||||
with patch(
|
||||
"onyx.tools.tool_implementations.images.image_generation_tool.get_image_generation_provider"
|
||||
) as mock_get_provider:
|
||||
mock_provider = MagicMock()
|
||||
mock_provider.supports_reference_images = supports_reference_images
|
||||
mock_provider.max_reference_images = max_reference_images
|
||||
mock_get_provider.return_value = mock_provider
|
||||
|
||||
return ImageGenerationTool(
|
||||
image_generation_credentials=MagicMock(),
|
||||
tool_id=1,
|
||||
emitter=MagicMock(),
|
||||
model="gpt-image-1",
|
||||
provider="openai",
|
||||
)
|
||||
|
||||
|
||||
class TestResolveReferenceImageFileIds:
|
||||
def test_unset_returns_empty_plain_generation(self) -> None:
|
||||
tool = _make_tool()
|
||||
assert tool._resolve_reference_image_file_ids(llm_kwargs={}) == []
|
||||
|
||||
def test_empty_list_is_treated_like_unset(self) -> None:
|
||||
tool = _make_tool()
|
||||
result = tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: []},
|
||||
)
|
||||
assert result == []
|
||||
|
||||
def test_passes_llm_supplied_ids_through(self) -> None:
|
||||
tool = _make_tool()
|
||||
result = tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["upload-1", "gen-1"]},
|
||||
)
|
||||
# Order preserved — first entry is the primary edit source.
|
||||
assert result == ["upload-1", "gen-1"]
|
||||
|
||||
def test_invalid_shape_raises(self) -> None:
|
||||
tool = _make_tool()
|
||||
with pytest.raises(ToolCallException):
|
||||
tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: "not-a-list"},
|
||||
)
|
||||
|
||||
def test_non_string_element_raises(self) -> None:
|
||||
tool = _make_tool()
|
||||
with pytest.raises(ToolCallException):
|
||||
tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["ok", 123]},
|
||||
)
|
||||
|
||||
def test_deduplicates_preserving_first_occurrence(self) -> None:
|
||||
tool = _make_tool()
|
||||
result = tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1", "gen-2", "gen-1"]
|
||||
},
|
||||
)
|
||||
assert result == ["gen-1", "gen-2"]
|
||||
|
||||
def test_strips_whitespace_and_skips_empty_strings(self) -> None:
|
||||
tool = _make_tool()
|
||||
result = tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD: [" gen-1 ", "", " "]
|
||||
},
|
||||
)
|
||||
assert result == ["gen-1"]
|
||||
|
||||
def test_provider_without_reference_support_raises(self) -> None:
|
||||
tool = _make_tool(supports_reference_images=False)
|
||||
with pytest.raises(ToolCallException):
|
||||
tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={REFERENCE_IMAGE_FILE_IDS_FIELD: ["gen-1"]},
|
||||
)
|
||||
|
||||
def test_truncates_to_provider_max_preserving_head(self) -> None:
|
||||
"""When the LLM lists more images than the provider allows, keep the
|
||||
HEAD of the list (the primary edit source + earliest extras) rather
|
||||
than the tail, since the LLM put the most important one first."""
|
||||
tool = _make_tool(max_reference_images=2)
|
||||
result = tool._resolve_reference_image_file_ids(
|
||||
llm_kwargs={
|
||||
REFERENCE_IMAGE_FILE_IDS_FIELD: ["a", "b", "c", "d"]
|
||||
},
|
||||
)
|
||||
assert result == ["a", "b"]
|
||||
@@ -1,5 +1,10 @@
|
||||
from onyx.chat.models import ChatMessageSimple
|
||||
from onyx.chat.models import ToolCallSimple
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.server.query_and_chat.placement import Placement
|
||||
from onyx.tools.models import ToolCallKickoff
|
||||
from onyx.tools.tool_runner import _extract_image_file_ids_from_tool_response_message
|
||||
from onyx.tools.tool_runner import _extract_recent_generated_image_file_ids
|
||||
from onyx.tools.tool_runner import _merge_tool_calls
|
||||
|
||||
|
||||
@@ -308,3 +313,61 @@ class TestMergeToolCalls:
|
||||
# String should be converted to list item
|
||||
assert result[0].tool_args["queries"] == ["single_query", "q2"]
|
||||
|
||||
|
||||
class TestImageHistoryExtraction:
|
||||
def test_extracts_image_file_ids_from_json_response(self) -> None:
|
||||
msg = '[{"file_id":"img-1","revised_prompt":"v1"},{"file_id":"img-2","revised_prompt":"v2"}]'
|
||||
assert _extract_image_file_ids_from_tool_response_message(msg) == [
|
||||
"img-1",
|
||||
"img-2",
|
||||
]
|
||||
|
||||
def test_extracts_recent_generated_image_ids_from_history(self) -> None:
|
||||
history = [
|
||||
ChatMessageSimple(
|
||||
message="",
|
||||
token_count=1,
|
||||
message_type=MessageType.ASSISTANT,
|
||||
tool_calls=[
|
||||
ToolCallSimple(
|
||||
tool_call_id="call_1",
|
||||
tool_name="generate_image",
|
||||
tool_arguments={"prompt": "test"},
|
||||
token_count=1,
|
||||
)
|
||||
],
|
||||
),
|
||||
ChatMessageSimple(
|
||||
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
|
||||
token_count=1,
|
||||
message_type=MessageType.TOOL_CALL_RESPONSE,
|
||||
tool_call_id="call_1",
|
||||
),
|
||||
]
|
||||
|
||||
assert _extract_recent_generated_image_file_ids(history) == ["img-1"]
|
||||
|
||||
def test_ignores_non_image_tool_responses(self) -> None:
|
||||
history = [
|
||||
ChatMessageSimple(
|
||||
message="",
|
||||
token_count=1,
|
||||
message_type=MessageType.ASSISTANT,
|
||||
tool_calls=[
|
||||
ToolCallSimple(
|
||||
tool_call_id="call_1",
|
||||
tool_name="web_search",
|
||||
tool_arguments={"queries": ["q"]},
|
||||
token_count=1,
|
||||
)
|
||||
],
|
||||
),
|
||||
ChatMessageSimple(
|
||||
message='[{"file_id":"img-1","revised_prompt":"r1"}]',
|
||||
token_count=1,
|
||||
message_type=MessageType.TOOL_CALL_RESPONSE,
|
||||
tool_call_id="call_1",
|
||||
),
|
||||
]
|
||||
|
||||
assert _extract_recent_generated_image_file_ids(history) == []
|
||||
|
||||
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
|
||||
sources:
|
||||
- "https://github.com/onyx-dot-app/onyx"
|
||||
type: application
|
||||
version: 0.4.43
|
||||
version: 0.4.44
|
||||
appVersion: latest
|
||||
annotations:
|
||||
category: Productivity
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
{{- if and .Values.ingress.enabled .Values.mcpServer.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-ingress-mcp-oauth-callback
|
||||
annotations:
|
||||
{{- if not .Values.ingress.className }}
|
||||
kubernetes.io/ingress.class: nginx
|
||||
{{- end }}
|
||||
cert-manager.io/cluster-issuer: {{ include "onyx.fullname" . }}-letsencrypt
|
||||
spec:
|
||||
{{- if .Values.ingress.className }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: {{ .Values.ingress.api.host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /mcp/oauth/callback
|
||||
pathType: Exact
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "onyx.fullname" . }}-webserver
|
||||
port:
|
||||
number: {{ .Values.webserver.service.servicePort }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.ingress.api.host }}
|
||||
secretName: {{ include "onyx.fullname" . }}-ingress-mcp-oauth-callback-tls
|
||||
{{- end }}
|
||||
@@ -63,7 +63,7 @@ func checkDevcontainerCLI() {
|
||||
}
|
||||
|
||||
// ensureDockerSock sets the DOCKER_SOCK environment variable if not already set.
|
||||
// devcontainer.json references ${localEnv:DOCKER_SOCK} for the socket mount.
|
||||
// Used by ensureRemoteUser to detect rootless Docker.
|
||||
func ensureDockerSock() {
|
||||
if os.Getenv("DOCKER_SOCK") != "" {
|
||||
return
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import "@opal/components/cards/card/styles.css";
|
||||
import type { PaddingVariants, RoundingVariants } from "@opal/types";
|
||||
import { cardPaddingVariants, cardRoundingVariants } from "@opal/shared";
|
||||
import { paddingVariants, cardRoundingVariants } from "@opal/shared";
|
||||
import { cn } from "@opal/utils";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -79,7 +79,7 @@ function Card({
|
||||
ref,
|
||||
children,
|
||||
}: CardProps) {
|
||||
const padding = cardPaddingVariants[paddingProp];
|
||||
const padding = paddingVariants[paddingProp];
|
||||
const rounding = cardRoundingVariants[roundingProp];
|
||||
|
||||
return (
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import "@opal/components/cards/select-card/styles.css";
|
||||
import type { PaddingVariants, RoundingVariants } from "@opal/types";
|
||||
import { cardPaddingVariants, cardRoundingVariants } from "@opal/shared";
|
||||
import { paddingVariants, cardRoundingVariants } from "@opal/shared";
|
||||
import { cn } from "@opal/utils";
|
||||
import { Interactive, type InteractiveStatefulProps } from "@opal/core";
|
||||
|
||||
@@ -78,7 +78,7 @@ function SelectCard({
|
||||
children,
|
||||
...statefulProps
|
||||
}: SelectCardProps) {
|
||||
const padding = cardPaddingVariants[paddingProp];
|
||||
const padding = paddingVariants[paddingProp];
|
||||
const rounding = cardRoundingVariants[roundingProp];
|
||||
|
||||
return (
|
||||
|
||||
@@ -15,6 +15,42 @@ export const Plain: Story = {
|
||||
render: () => <Divider />,
|
||||
};
|
||||
|
||||
export const Vertical: Story = {
|
||||
render: () => (
|
||||
<div
|
||||
style={{ display: "flex", alignItems: "stretch", height: 64, gap: 16 }}
|
||||
>
|
||||
<span>Left</span>
|
||||
<Divider orientation="vertical" />
|
||||
<span>Right</span>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
|
||||
export const NoPadding: Story = {
|
||||
render: () => <Divider paddingParallel="fit" paddingPerpendicular="fit" />,
|
||||
};
|
||||
|
||||
export const CustomPadding: Story = {
|
||||
render: () => <Divider paddingParallel="lg" paddingPerpendicular="sm" />,
|
||||
};
|
||||
|
||||
export const VerticalNoPadding: Story = {
|
||||
render: () => (
|
||||
<div
|
||||
style={{ display: "flex", alignItems: "stretch", height: 64, gap: 16 }}
|
||||
>
|
||||
<span>Left</span>
|
||||
<Divider
|
||||
orientation="vertical"
|
||||
paddingParallel="fit"
|
||||
paddingPerpendicular="fit"
|
||||
/>
|
||||
<span>Right</span>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
|
||||
export const WithTitle: Story = {
|
||||
render: () => <Divider title="Section" />,
|
||||
};
|
||||
|
||||
@@ -10,7 +10,13 @@ The component uses a discriminated union with four variants. `title` and `descri
|
||||
|
||||
### Bare divider
|
||||
|
||||
No props — renders a plain horizontal line.
|
||||
A plain line with no title or description.
|
||||
|
||||
| Prop | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `orientation` | `"horizontal" \| "vertical"` | `"horizontal"` | Direction of the line |
|
||||
| `paddingParallel` | `PaddingVariants` | `"sm"` | Padding along the line direction (0.5rem) |
|
||||
| `paddingPerpendicular` | `PaddingVariants` | `"xs"` | Padding perpendicular to the line (0.25rem) |
|
||||
|
||||
### Titled divider
|
||||
|
||||
@@ -40,9 +46,18 @@ No props — renders a plain horizontal line.
|
||||
```tsx
|
||||
import { Divider } from "@opal/components";
|
||||
|
||||
// Plain line
|
||||
// Plain horizontal line
|
||||
<Divider />
|
||||
|
||||
// Vertical line
|
||||
<Divider orientation="vertical" />
|
||||
|
||||
// No padding
|
||||
<Divider paddingParallel="fit" paddingPerpendicular="fit" />
|
||||
|
||||
// Custom padding
|
||||
<Divider paddingParallel="lg" paddingPerpendicular="sm" />
|
||||
|
||||
// With title
|
||||
<Divider title="Advanced" />
|
||||
|
||||
|
||||
@@ -2,16 +2,25 @@
|
||||
|
||||
import "@opal/components/divider/styles.css";
|
||||
import { useState, useCallback } from "react";
|
||||
import type { RichStr } from "@opal/types";
|
||||
import type { PaddingVariants, RichStr } from "@opal/types";
|
||||
import { Button, Text } from "@opal/components";
|
||||
import { SvgChevronRight } from "@opal/icons";
|
||||
import { Interactive } from "@opal/core";
|
||||
import { cn } from "@opal/utils";
|
||||
import { paddingXVariants, paddingYVariants } from "@opal/shared";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface DividerNeverFields {
|
||||
interface DividerSharedProps {
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
title?: never;
|
||||
description?: never;
|
||||
foldable?: false;
|
||||
orientation?: never;
|
||||
paddingParallel?: never;
|
||||
paddingPerpendicular?: never;
|
||||
open?: never;
|
||||
defaultOpen?: never;
|
||||
onOpenChange?: never;
|
||||
@@ -19,36 +28,37 @@ interface DividerNeverFields {
|
||||
}
|
||||
|
||||
/** Plain line — no title, no description. */
|
||||
interface DividerBareProps extends DividerNeverFields {
|
||||
title?: never;
|
||||
description?: never;
|
||||
foldable?: false;
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
}
|
||||
type DividerBareProps = Omit<
|
||||
DividerSharedProps,
|
||||
"orientation" | "paddingParallel" | "paddingPerpendicular"
|
||||
> & {
|
||||
/** Orientation of the line. Default: `"horizontal"`. */
|
||||
orientation?: "horizontal" | "vertical";
|
||||
/** Padding along the line direction. Default: `"sm"` (0.5rem). */
|
||||
paddingParallel?: PaddingVariants;
|
||||
/** Padding perpendicular to the line. Default: `"xs"` (0.25rem). */
|
||||
paddingPerpendicular?: PaddingVariants;
|
||||
};
|
||||
|
||||
/** Line with a title to the left. */
|
||||
interface DividerTitledProps extends DividerNeverFields {
|
||||
type DividerTitledProps = Omit<DividerSharedProps, "title"> & {
|
||||
title: string | RichStr;
|
||||
description?: never;
|
||||
foldable?: false;
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
}
|
||||
};
|
||||
|
||||
/** Line with a description below. */
|
||||
interface DividerDescribedProps extends DividerNeverFields {
|
||||
title?: never;
|
||||
type DividerDescribedProps = Omit<DividerSharedProps, "description"> & {
|
||||
/** Description rendered below the divider line. */
|
||||
description: string | RichStr;
|
||||
foldable?: false;
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
}
|
||||
};
|
||||
|
||||
/** Foldable — requires title, reveals children. */
|
||||
interface DividerFoldableProps {
|
||||
type DividerFoldableProps = Omit<
|
||||
DividerSharedProps,
|
||||
"title" | "foldable" | "open" | "defaultOpen" | "onOpenChange" | "children"
|
||||
> & {
|
||||
/** Title is required when foldable. */
|
||||
title: string | RichStr;
|
||||
foldable: true;
|
||||
description?: never;
|
||||
/** Controlled open state. */
|
||||
open?: boolean;
|
||||
/** Uncontrolled default open state. */
|
||||
@@ -57,8 +67,7 @@ interface DividerFoldableProps {
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
/** Content revealed when open. */
|
||||
children?: React.ReactNode;
|
||||
ref?: React.Ref<HTMLDivElement>;
|
||||
}
|
||||
};
|
||||
|
||||
type DividerProps =
|
||||
| DividerBareProps
|
||||
@@ -75,12 +84,39 @@ function Divider(props: DividerProps) {
|
||||
return <FoldableDivider {...props} />;
|
||||
}
|
||||
|
||||
const { ref } = props;
|
||||
const title = "title" in props ? props.title : undefined;
|
||||
const description = "description" in props ? props.description : undefined;
|
||||
const {
|
||||
ref,
|
||||
title,
|
||||
description,
|
||||
orientation = "horizontal",
|
||||
paddingParallel = "sm",
|
||||
paddingPerpendicular = "xs",
|
||||
} = props;
|
||||
|
||||
if (orientation === "vertical") {
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"opal-divider-vertical",
|
||||
paddingXVariants[paddingPerpendicular],
|
||||
paddingYVariants[paddingParallel]
|
||||
)}
|
||||
>
|
||||
<div className="opal-divider-line-vertical" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div ref={ref} className="opal-divider">
|
||||
<div
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"opal-divider",
|
||||
paddingXVariants[paddingParallel],
|
||||
paddingYVariants[paddingPerpendicular]
|
||||
)}
|
||||
>
|
||||
<div className="opal-divider-row">
|
||||
{title && (
|
||||
<div className="opal-divider-title">
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
Divider
|
||||
|
||||
A horizontal rule with optional title, foldable chevron, or description.
|
||||
Padding is controlled via Tailwind classes applied by the component.
|
||||
--------------------------------------------------------------------------- */
|
||||
|
||||
/* ── Horizontal ─────────────────────────────────────────────────────────────── */
|
||||
|
||||
.opal-divider {
|
||||
@apply flex flex-col w-full;
|
||||
padding: 0.25rem 0.5rem;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
@@ -29,6 +31,18 @@
|
||||
padding: 0px 2px;
|
||||
}
|
||||
|
||||
/* ── Vertical orientation ───────────────────────────────────────────────────── */
|
||||
|
||||
.opal-divider-vertical {
|
||||
@apply flex flex-row h-full;
|
||||
}
|
||||
|
||||
.opal-divider-line-vertical {
|
||||
@apply flex-1 w-px bg-border-01;
|
||||
}
|
||||
|
||||
/* ── Foldable chevron ───────────────────────────────────────────────────────── */
|
||||
|
||||
.opal-divider-chevron {
|
||||
@apply transition-transform duration-200 ease-in-out;
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ const heightVariants: Record<ExtremaSizeVariants, string> = {
|
||||
// - SelectCard (paddingVariant, roundingVariant)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const cardPaddingVariants: Record<PaddingVariants, string> = {
|
||||
const paddingVariants: Record<PaddingVariants, string> = {
|
||||
lg: "p-6",
|
||||
md: "p-4",
|
||||
sm: "p-2",
|
||||
@@ -109,6 +109,24 @@ const cardPaddingVariants: Record<PaddingVariants, string> = {
|
||||
fit: "p-0",
|
||||
};
|
||||
|
||||
const paddingXVariants: Record<PaddingVariants, string> = {
|
||||
lg: "px-6",
|
||||
md: "px-4",
|
||||
sm: "px-2",
|
||||
xs: "px-1",
|
||||
"2xs": "px-0.5",
|
||||
fit: "px-0",
|
||||
};
|
||||
|
||||
const paddingYVariants: Record<PaddingVariants, string> = {
|
||||
lg: "py-6",
|
||||
md: "py-4",
|
||||
sm: "py-2",
|
||||
xs: "py-1",
|
||||
"2xs": "py-0.5",
|
||||
fit: "py-0",
|
||||
};
|
||||
|
||||
const cardRoundingVariants: Record<RoundingVariants, string> = {
|
||||
lg: "rounded-16",
|
||||
md: "rounded-12",
|
||||
@@ -122,7 +140,9 @@ export {
|
||||
type OverridableExtremaSizeVariants,
|
||||
type SizeVariants,
|
||||
containerSizeVariants,
|
||||
cardPaddingVariants,
|
||||
paddingVariants,
|
||||
paddingXVariants,
|
||||
paddingYVariants,
|
||||
cardRoundingVariants,
|
||||
widthVariants,
|
||||
heightVariants,
|
||||
|
||||
16
web/package-lock.json
generated
16
web/package-lock.json
generated
@@ -47,6 +47,7 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"cookies-next": "^5.1.0",
|
||||
"copy-to-clipboard": "^3.3.3",
|
||||
"date-fns": "^3.6.0",
|
||||
"docx-preview": "^0.3.7",
|
||||
"favicon-fetch": "^1.0.0",
|
||||
@@ -8843,6 +8844,15 @@
|
||||
"react": ">= 16.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/copy-to-clipboard": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/copy-to-clipboard/-/copy-to-clipboard-3.3.3.tgz",
|
||||
"integrity": "sha512-2KV8NhB5JqC3ky0r9PMCAZKbUHSwtEo4CwCs0KXgruG43gX5PMqDEBbVU4OUzw2MuAWUfsuFmWvEKG5QRfSnJA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"toggle-selection": "^1.0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/core-js": {
|
||||
"version": "3.46.0",
|
||||
"hasInstallScript": true,
|
||||
@@ -17426,6 +17436,12 @@
|
||||
"node": ">=8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/toggle-selection": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/toggle-selection/-/toggle-selection-1.0.6.tgz",
|
||||
"integrity": "sha512-BiZS+C1OS8g/q2RRbJmy59xpyghNBqrr6k5L/uKBGRsTfxmu3ffiRnd8mlGPUVayg8pvfi5urfnu8TU7DVOkLQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/toposort": {
|
||||
"version": "2.0.2",
|
||||
"license": "MIT"
|
||||
|
||||
@@ -65,6 +65,7 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "^1.0.0",
|
||||
"cookies-next": "^5.1.0",
|
||||
"copy-to-clipboard": "^3.3.3",
|
||||
"date-fns": "^3.6.0",
|
||||
"docx-preview": "^0.3.7",
|
||||
"favicon-fetch": "^1.0.0",
|
||||
|
||||
@@ -210,8 +210,10 @@ export default function MultiModelResponseView({
|
||||
const response = responses.find((r) => r.modelIndex === modelIndex);
|
||||
if (!response) return;
|
||||
|
||||
// Persist preferred response to backend + update local tree so the
|
||||
// input bar unblocks (awaitingPreferredSelection clears).
|
||||
// Persist preferred response + sync `latestChildNodeId`. Backend's
|
||||
// `set_preferred_response` updates `latest_child_message_id`; if the
|
||||
// frontend chain walk disagrees, the next follow-up fails with
|
||||
// "not on the latest mainline".
|
||||
if (parentMessage?.messageId && response.messageId && currentSessionId) {
|
||||
setPreferredResponse(parentMessage.messageId, response.messageId).catch(
|
||||
(err) => console.error("Failed to persist preferred response:", err)
|
||||
@@ -227,6 +229,7 @@ export default function MultiModelResponseView({
|
||||
updated.set(parentMessage.nodeId, {
|
||||
...userMsg,
|
||||
preferredResponseId: response.messageId,
|
||||
latestChildNodeId: response.nodeId,
|
||||
});
|
||||
updateSessionMessageTree(currentSessionId, updated);
|
||||
}
|
||||
|
||||
@@ -694,6 +694,25 @@ export function useLlmManager(
|
||||
prevAgentIdRef.current = liveAgent?.id;
|
||||
}, [liveAgent?.id]);
|
||||
|
||||
// Clear manual override when arriving at a *different* existing session
|
||||
// from any previously-seen defined session. Tracks only the last
|
||||
// *defined* session id so a round-trip through new-chat (A → undefined
|
||||
// → B) still resets, while A → undefined (new-chat) preserves it.
|
||||
const prevDefinedSessionIdRef = useRef<string | undefined>(undefined);
|
||||
useEffect(() => {
|
||||
const nextId = currentChatSession?.id;
|
||||
if (
|
||||
nextId !== undefined &&
|
||||
prevDefinedSessionIdRef.current !== undefined &&
|
||||
nextId !== prevDefinedSessionIdRef.current
|
||||
) {
|
||||
setUserHasManuallyOverriddenLLM(false);
|
||||
}
|
||||
if (nextId !== undefined) {
|
||||
prevDefinedSessionIdRef.current = nextId;
|
||||
}
|
||||
}, [currentChatSession?.id]);
|
||||
|
||||
function getValidLlmDescriptor(
|
||||
modelName: string | null | undefined
|
||||
): LlmDescriptor {
|
||||
@@ -715,8 +734,9 @@ export function useLlmManager(
|
||||
|
||||
if (llmProviders === undefined || llmProviders === null) {
|
||||
resolved = manualLlm;
|
||||
} else if (userHasManuallyOverriddenLLM && !currentChatSession) {
|
||||
// User has overridden in this session and switched to a new session
|
||||
} else if (userHasManuallyOverriddenLLM) {
|
||||
// Manual override wins over session's `current_alternate_model`.
|
||||
// Cleared on cross-session navigation by the effect above.
|
||||
resolved = manualLlm;
|
||||
} else if (currentChatSession?.current_alternate_model) {
|
||||
resolved = getValidLlmDescriptorForProviders(
|
||||
@@ -728,8 +748,6 @@ export function useLlmManager(
|
||||
liveAgent.llm_model_version_override,
|
||||
llmProviders
|
||||
);
|
||||
} else if (userHasManuallyOverriddenLLM) {
|
||||
resolved = manualLlm;
|
||||
} else if (user?.preferences?.default_model) {
|
||||
resolved = getValidLlmDescriptorForProviders(
|
||||
user.preferences.default_model,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import copy from "copy-to-clipboard";
|
||||
import { Button, ButtonProps } from "@opal/components";
|
||||
import { SvgAlertTriangle, SvgCheck, SvgCopy } from "@opal/icons";
|
||||
|
||||
@@ -40,26 +41,19 @@ export default function CopyIconButton({
|
||||
}
|
||||
|
||||
try {
|
||||
// Check if Clipboard API is available
|
||||
if (!navigator.clipboard) {
|
||||
throw new Error("Clipboard API not available");
|
||||
}
|
||||
|
||||
// If HTML content getter is provided, copy both HTML and plain text
|
||||
if (getHtmlContent) {
|
||||
if (navigator.clipboard && getHtmlContent) {
|
||||
const htmlContent = getHtmlContent();
|
||||
const clipboardItem = new ClipboardItem({
|
||||
"text/html": new Blob([htmlContent], { type: "text/html" }),
|
||||
"text/plain": new Blob([text], { type: "text/plain" }),
|
||||
});
|
||||
await navigator.clipboard.write([clipboardItem]);
|
||||
}
|
||||
// Default: plain text only
|
||||
else {
|
||||
} else if (navigator.clipboard) {
|
||||
await navigator.clipboard.writeText(text);
|
||||
} else if (!copy(text)) {
|
||||
throw new Error("copy-to-clipboard returned false");
|
||||
}
|
||||
|
||||
// Show "copied" state
|
||||
setCopyState("copied");
|
||||
} catch (err) {
|
||||
console.error("Failed to copy:", err);
|
||||
|
||||
@@ -159,9 +159,12 @@ export default function ModelSelector({
|
||||
);
|
||||
|
||||
if (!isMultiModel) {
|
||||
// Stable key — keying on model would unmount the pill
|
||||
// on change and leave Radix's anchorRef detached,
|
||||
// flashing the closing popover at (0,0).
|
||||
return (
|
||||
<OpenButton
|
||||
key={modelKey(model.provider, model.modelName)}
|
||||
key="single-model-pill"
|
||||
icon={ProviderIcon}
|
||||
onClick={(e: React.MouseEvent) =>
|
||||
handlePillClick(index, e.currentTarget as HTMLElement)
|
||||
|
||||
@@ -425,16 +425,27 @@ export default function AppPage({ firstMessage }: ChatPageProps) {
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [multiModel.isMultiModelActive]);
|
||||
|
||||
// Sync single-model selection to llmManager so the submission path
|
||||
// uses the correct provider/version (replaces the old LLMPopover sync).
|
||||
// Sync single-model selection to llmManager so the submission path uses
|
||||
// the correct provider/version. Guard against echoing derived state back
|
||||
// — only call updateCurrentLlm when the selection actually differs from
|
||||
// currentLlm, otherwise the initial [] → [currentLlmModel] sync would
|
||||
// pin `userHasManuallyOverriddenLLM=true` with whatever was resolved
|
||||
// first (often the default model before the session's alt_model loads).
|
||||
useEffect(() => {
|
||||
if (multiModel.selectedModels.length === 1) {
|
||||
const model = multiModel.selectedModels[0]!;
|
||||
llmManager.updateCurrentLlm({
|
||||
name: model.name,
|
||||
provider: model.provider,
|
||||
modelName: model.modelName,
|
||||
});
|
||||
const current = llmManager.currentLlm;
|
||||
if (
|
||||
model.provider !== current.provider ||
|
||||
model.modelName !== current.modelName ||
|
||||
model.name !== current.name
|
||||
) {
|
||||
llmManager.updateCurrentLlm({
|
||||
name: model.name,
|
||||
provider: model.provider,
|
||||
modelName: model.modelName,
|
||||
});
|
||||
}
|
||||
}
|
||||
}, [multiModel.selectedModels]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user