Compare commits

..

11 Commits

Author SHA1 Message Date
Raunak Bhagat
fbebeb8173 fix: add max-width to opal-tooltip for text wrapping 2026-04-13 12:30:12 -07:00
Raunak Bhagat
3a448fc7d2 fix: make children optional on Disabled 2026-04-13 12:22:54 -07:00
Raunak Bhagat
65f772bdd5 fix: add WithoutStyles, self-stretch wrapper to Disabled 2026-04-13 12:20:20 -07:00
Raunak Bhagat
2e3b869fa5 fix: address PR review comments
- Update styles.css comment to reference div wrapper instead of Radix Slot
- Replace raw <button> with Opal Button in story
- Update AGENTS.md Disabled section to reflect new div-based approach
2026-04-13 11:57:09 -07:00
Raunak Bhagat
fe0088b398 style: remove stray blank line in Text component imports 2026-04-13 11:52:37 -07:00
Raunak Bhagat
5820fb063c chore: add TODO for opalified Tooltip replacement 2026-04-13 11:51:01 -07:00
Raunak Bhagat
dd19348a2f refactor: render Disabled tooltip with Text component 2026-04-13 11:48:28 -07:00
Raunak Bhagat
9eeba7a44d feat: support RichStr (inline markdown) in Disabled tooltip 2026-04-13 11:47:26 -07:00
Raunak Bhagat
ce3df44533 fix: remove explicit tooltipSide, default to right 2026-04-13 11:46:07 -07:00
Raunak Bhagat
4f6e66c913 refactor: replace SimpleTooltip+allowClick with Disabled tooltip in ChatPreferencesPage
The Search Mode row used SimpleTooltip wrapping Disabled with allowClick
and an extra div. Now that Disabled supports a tooltip prop natively,
collapse all three into a single Disabled with tooltip/tooltipSide.
2026-04-13 11:45:17 -07:00
Raunak Bhagat
b24235a785 refactor: replace Radix Slot with div wrapper in Disabled, add tooltip support
The Disabled component used Radix Slot which only works with single DOM
element children. This broke when wrapping React components (e.g.
InputLayouts.Horizontal) because Slot cannot merge data-attributes onto
them. Switching to a plain <div> wrapper ensures the data-opal-disabled
attribute always lands on a real DOM node and cascades into descendants.

Also adds a tooltip prop that shows a Radix tooltip on hover when
disabled (implies allowClick internally), and includes a README and
Storybook stories.
2026-04-13 11:35:12 -07:00
40 changed files with 671 additions and 1050 deletions

View File

@@ -710,7 +710,7 @@ jobs:
pull-requests: write
steps:
- name: Download visual diff summaries
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3
with:
pattern: screenshot-diff-summary-*
path: summaries/

View File

@@ -38,7 +38,7 @@ jobs:
- name: Install node dependencies
working-directory: ./web
run: npm ci
- uses: j178/prek-action@cbc2f23eb5539cf20d82d1aabd0d0ecbcc56f4e3
- uses: j178/prek-action@0bb87d7f00b0c99306c8bcb8b8beba1eb581c037 # ratchet:j178/prek-action@v1
with:
prek-version: '0.3.4'
extra-args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || github.event_name == 'merge_group' && format('--from-ref {0} --to-ref {1}', github.event.merge_group.base_sha, github.event.merge_group.head_sha) || github.ref_name == 'main' && '--all-files' || '' }}

View File

@@ -49,12 +49,12 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
4. **Light Worker** (`light`)
- Handles lightweight, fast operations
- Tasks: vespa metadata sync, connector deletion, doc permissions upsert, checkpoint cleanup, index attempt cleanup
- Tasks: vespa operations, document permissions sync, external group sync
- Higher concurrency for quick tasks
5. **Heavy Worker** (`heavy`)
- Handles resource-intensive operations
- Tasks: connector pruning, document permissions sync, external group sync, CSV generation
- Primary task: document pruning operations
- Runs with 4 threads concurrency
6. **KG Processing Worker** (`kg_processing`)

View File

@@ -102,7 +102,7 @@ def revoke_tasks_blocking_deletion(
f"Revoked permissions sync task {permissions_sync_payload.celery_task_id}."
)
except Exception:
task_logger.exception("Exception while revoking permissions sync task")
task_logger.exception("Exception while revoking pruning task")
try:
prune_payload = redis_connector.prune.payload
@@ -110,7 +110,7 @@ def revoke_tasks_blocking_deletion(
app.control.revoke(prune_payload.celery_task_id)
task_logger.info(f"Revoked pruning task {prune_payload.celery_task_id}.")
except Exception:
task_logger.exception("Exception while revoking pruning task")
task_logger.exception("Exception while revoking permissions sync task")
try:
external_group_sync_payload = redis_connector.external_group_sync.payload
@@ -508,11 +508,7 @@ def monitor_connector_deletion_taskset(
db_session=db_session,
connector_id=connector_id_to_delete,
)
if not connector:
task_logger.info(
"Connector deletion - Connector already deleted, skipping connector cleanup"
)
elif not len(connector.credentials):
if not connector or not len(connector.credentials):
task_logger.info(
"Connector deletion - Found no credentials left for connector, deleting connector"
)

View File

@@ -171,10 +171,7 @@ class ClickupConnector(LoadConnector, PollConnector):
document.metadata[extra_field] = task[extra_field]
if self.retrieve_task_comments:
document.sections = [
*document.sections,
*self._get_task_comments(task["id"]),
]
document.sections.extend(self._get_task_comments(task["id"]))
doc_batch.append(document)

View File

@@ -1,65 +0,0 @@
import csv
import io
from typing import IO
from onyx.connectors.models import TabularSection
from onyx.file_processing.extract_file_text import file_io_to_text
from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.utils.logger import setup_logger
logger = setup_logger()
def is_tabular_file(file_name: str) -> bool:
lowered = file_name.lower()
return any(lowered.endswith(ext) for ext in OnyxFileExtensions.TABULAR_EXTENSIONS)
def _tsv_to_csv(tsv_text: str) -> str:
"""Re-serialize tab-separated text as CSV so downstream parsers that
assume the default Excel dialect read the columns correctly."""
out = io.StringIO()
csv.writer(out, lineterminator="\n").writerows(
csv.reader(io.StringIO(tsv_text), dialect="excel-tab")
)
return out.getvalue().rstrip("\n")
def tabular_file_to_sections(
file: IO[bytes],
file_name: str,
link: str = "",
) -> list[TabularSection]:
"""Convert a tabular file into one or more TabularSections.
- .xlsx → one TabularSection per non-empty sheet.
- .csv / .tsv → a single TabularSection containing the full decoded
file.
Returns an empty list when the file yields no extractable content.
"""
lowered = file_name.lower()
if lowered.endswith(".xlsx"):
return [
TabularSection(link=f"sheet:{sheet_title}", text=csv_text)
for csv_text, sheet_title in xlsx_sheet_extraction(
file, file_name=file_name
)
]
if not lowered.endswith((".csv", ".tsv")):
raise ValueError(f"{file_name!r} is not a tabular file")
try:
text = file_io_to_text(file).strip()
except Exception as e:
logger.error(f"Failure decoding {file_name}: {e}")
raise e
if not text:
return []
if lowered.endswith(".tsv"):
text = _tsv_to_csv(text)
return [TabularSection(link=link or file_name, text=text)]

View File

@@ -1,10 +1,8 @@
import sys
from collections.abc import Sequence
from datetime import datetime
from enum import Enum
from typing import Any
from typing import cast
from typing import Literal
from pydantic import BaseModel
from pydantic import Field
@@ -35,18 +33,9 @@ class ConnectorMissingCredentialError(PermissionError):
)
class SectionType(str, Enum):
"""Discriminator for Section subclasses."""
TEXT = "text"
IMAGE = "image"
TABULAR = "tabular"
class Section(BaseModel):
"""Base section class with common attributes"""
type: SectionType
link: str | None = None
text: str | None = None
image_file_id: str | None = None
@@ -55,7 +44,6 @@ class Section(BaseModel):
class TextSection(Section):
"""Section containing text content"""
type: Literal[SectionType.TEXT] = SectionType.TEXT
text: str
def __sizeof__(self) -> int:
@@ -65,25 +53,12 @@ class TextSection(Section):
class ImageSection(Section):
"""Section containing an image reference"""
type: Literal[SectionType.IMAGE] = SectionType.IMAGE
image_file_id: str
def __sizeof__(self) -> int:
return sys.getsizeof(self.image_file_id) + sys.getsizeof(self.link)
class TabularSection(Section):
"""Section containing tabular data (csv/tsv content, or one sheet of
an xlsx workbook rendered as CSV)."""
type: Literal[SectionType.TABULAR] = SectionType.TABULAR
text: str # CSV representation in a string
link: str
def __sizeof__(self) -> int:
return sys.getsizeof(self.text) + sys.getsizeof(self.link)
class BasicExpertInfo(BaseModel):
"""Basic Information for the owner of a document, any of the fields can be left as None
Display fallback goes as follows:
@@ -186,7 +161,7 @@ class DocumentBase(BaseModel):
"""Used for Onyx ingestion api, the ID is inferred before use if not provided"""
id: str | None = None
sections: Sequence[TextSection | ImageSection | TabularSection]
sections: list[TextSection | ImageSection]
source: DocumentSource | None = None
semantic_identifier: str # displayed in the UI as the main identifier for the doc
# TODO(andrei): Ideally we could improve this to where each value is just a
@@ -396,9 +371,12 @@ class IndexingDocument(Document):
)
else:
section_len = sum(
len(section.text) if section.text is not None else 0
(
len(section.text)
if isinstance(section, TextSection) and section.text is not None
else 0
)
for section in self.sections
if isinstance(section, (TextSection, TabularSection))
)
return title_len + section_len

View File

@@ -463,13 +463,29 @@ def _remove_empty_runs(
return result
def xlsx_sheet_extraction(file: IO[Any], file_name: str = "") -> list[tuple[str, str]]:
"""
Converts each sheet in the excel file to a csv condensed string.
Returns a string and the worksheet title for each worksheet
def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
# TODO: switch back to this approach in a few months when markitdown
# fixes their handling of excel files
Returns a list of (csv_text, sheet)
"""
# md = get_markitdown_converter()
# stream_info = StreamInfo(
# mimetype=SPREADSHEET_MIME_TYPE, filename=file_name or None, extension=".xlsx"
# )
# try:
# workbook = md.convert(to_bytesio(file), stream_info=stream_info)
# except (
# BadZipFile,
# ValueError,
# FileConversionException,
# UnsupportedFormatException,
# ) as e:
# error_str = f"Failed to extract text from {file_name or 'xlsx file'}: {e}"
# if file_name.startswith("~"):
# logger.debug(error_str + " (this is expected for files with ~)")
# else:
# logger.warning(error_str)
# return ""
# return workbook.markdown
try:
workbook = openpyxl.load_workbook(file, read_only=True)
except BadZipFile as e:
@@ -478,30 +494,23 @@ def xlsx_sheet_extraction(file: IO[Any], file_name: str = "") -> list[tuple[str,
logger.debug(error_str + " (this is expected for files with ~)")
else:
logger.warning(error_str)
return []
return ""
except Exception as e:
if any(s in str(e) for s in KNOWN_OPENPYXL_BUGS):
logger.error(
f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
)
return []
return ""
raise
sheets: list[tuple[str, str]] = []
text_content = []
for sheet in workbook.worksheets:
sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
buf = io.StringIO()
writer = csv.writer(buf, lineterminator="\n")
writer.writerows(sheet_matrix)
csv_text = buf.getvalue().rstrip("\n")
if csv_text.strip():
sheets.append((csv_text, sheet.title))
return sheets
def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
sheets = xlsx_sheet_extraction(file, file_name)
return TEXT_SECTION_SEPARATOR.join(csv_text for csv_text, _title in sheets)
text_content.append(buf.getvalue().rstrip("\n"))
return TEXT_SECTION_SEPARATOR.join(text_content)
def eml_to_text(file: IO[Any]) -> str:

View File

@@ -1,3 +1,5 @@
from typing import cast
from chonkie import SentenceChunker
from onyx.configs.app_configs import AVERAGE_SUMMARY_EMBEDDINGS
@@ -14,14 +16,16 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
get_metadata_keys_to_ignore,
)
from onyx.connectors.models import IndexingDocument
from onyx.indexing.chunking import DocumentChunker
from onyx.indexing.chunking import extract_blurb
from onyx.connectors.models import Section
from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
from onyx.indexing.models import DocAwareChunk
from onyx.llm.utils import MAX_CONTEXT_TOKENS
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import clean_text
from onyx.utils.text_processing import shared_precompare_cleanup
from shared_configs.configs import DOC_EMBEDDING_CONTEXT_SIZE
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
# Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps
# actually help quality at all
@@ -150,6 +154,9 @@ class Chunker:
self.tokenizer = tokenizer
self.callback = callback
self.max_context = 0
self.prompt_tokens = 0
# Create a token counter function that returns the count instead of the tokens
def token_counter(text: str) -> int:
return len(tokenizer.encode(text))
@@ -179,12 +186,234 @@ class Chunker:
else None
)
self._document_chunker = DocumentChunker(
tokenizer=tokenizer,
blurb_splitter=self.blurb_splitter,
chunk_splitter=self.chunk_splitter,
mini_chunk_splitter=self.mini_chunk_splitter,
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
"""
Splits the text into smaller chunks based on token count to ensure
no chunk exceeds the content_token_limit.
"""
tokens = self.tokenizer.tokenize(text)
chunks = []
start = 0
total_tokens = len(tokens)
while start < total_tokens:
end = min(start + content_token_limit, total_tokens)
token_chunk = tokens[start:end]
chunk_text = " ".join(token_chunk)
chunks.append(chunk_text)
start = end
return chunks
def _extract_blurb(self, text: str) -> str:
"""
Extract a short blurb from the text (first chunk of size `blurb_size`).
"""
# chunker is in `text` mode
texts = cast(list[str], self.blurb_splitter.chunk(text))
if not texts:
return ""
return texts[0]
def _get_mini_chunk_texts(self, chunk_text: str) -> list[str] | None:
"""
For "multipass" mode: additional sub-chunks (mini-chunks) for use in certain embeddings.
"""
if self.mini_chunk_splitter and chunk_text.strip():
# chunker is in `text` mode
return cast(list[str], self.mini_chunk_splitter.chunk(chunk_text))
return None
# ADDED: extra param image_url to store in the chunk
def _create_chunk(
self,
document: IndexingDocument,
chunks_list: list[DocAwareChunk],
text: str,
links: dict[int, str],
is_continuation: bool = False,
title_prefix: str = "",
metadata_suffix_semantic: str = "",
metadata_suffix_keyword: str = "",
image_file_id: str | None = None,
) -> None:
"""
Helper to create a new DocAwareChunk, append it to chunks_list.
"""
new_chunk = DocAwareChunk(
source_document=document,
chunk_id=len(chunks_list),
blurb=self._extract_blurb(text),
content=text,
source_links=links or {0: ""},
image_file_id=image_file_id,
section_continuation=is_continuation,
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
mini_chunk_texts=self._get_mini_chunk_texts(text),
large_chunk_id=None,
doc_summary="",
chunk_context="",
contextual_rag_reserved_tokens=0, # set per-document in _handle_single_document
)
chunks_list.append(new_chunk)
def _chunk_document_with_sections(
self,
document: IndexingDocument,
sections: list[Section],
title_prefix: str,
metadata_suffix_semantic: str,
metadata_suffix_keyword: str,
content_token_limit: int,
) -> list[DocAwareChunk]:
"""
Loops through sections of the document, converting them into one or more chunks.
Works with processed sections that are base Section objects.
"""
chunks: list[DocAwareChunk] = []
link_offsets: dict[int, str] = {}
chunk_text = ""
for section_idx, section in enumerate(sections):
# Get section text and other attributes
section_text = clean_text(str(section.text or ""))
section_link_text = section.link or ""
image_url = section.image_file_id
# If there is no useful content, skip
if not section_text and (not document.title or section_idx > 0):
logger.warning(
f"Skipping empty or irrelevant section in doc {document.semantic_identifier}, link={section_link_text}"
)
continue
# CASE 1: If this section has an image, force a separate chunk
if image_url:
# First, if we have any partially built text chunk, finalize it
if chunk_text.strip():
self._create_chunk(
document,
chunks,
chunk_text,
link_offsets,
is_continuation=False,
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
)
chunk_text = ""
link_offsets = {}
# Create a chunk specifically for this image section
# (Using the text summary that was generated during processing)
self._create_chunk(
document,
chunks,
section_text,
links={0: section_link_text} if section_link_text else {},
image_file_id=image_url,
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
)
# Continue to next section
continue
# CASE 2: Normal text section
section_token_count = len(self.tokenizer.encode(section_text))
# If the section is large on its own, split it separately
if section_token_count > content_token_limit:
if chunk_text.strip():
self._create_chunk(
document,
chunks,
chunk_text,
link_offsets,
False,
title_prefix,
metadata_suffix_semantic,
metadata_suffix_keyword,
)
chunk_text = ""
link_offsets = {}
# chunker is in `text` mode
split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
for i, split_text in enumerate(split_texts):
# If even the split_text is bigger than strict limit, further split
if (
STRICT_CHUNK_TOKEN_LIMIT
and len(self.tokenizer.encode(split_text)) > content_token_limit
):
smaller_chunks = self._split_oversized_chunk(
split_text, content_token_limit
)
for j, small_chunk in enumerate(smaller_chunks):
self._create_chunk(
document,
chunks,
small_chunk,
{0: section_link_text},
is_continuation=(j != 0),
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
)
else:
self._create_chunk(
document,
chunks,
split_text,
{0: section_link_text},
is_continuation=(i != 0),
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
)
continue
# If we can still fit this section into the current chunk, do so
current_token_count = len(self.tokenizer.encode(chunk_text))
current_offset = len(shared_precompare_cleanup(chunk_text))
next_section_tokens = (
len(self.tokenizer.encode(SECTION_SEPARATOR)) + section_token_count
)
if next_section_tokens + current_token_count <= content_token_limit:
if chunk_text:
chunk_text += SECTION_SEPARATOR
chunk_text += section_text
link_offsets[current_offset] = section_link_text
else:
# finalize the existing chunk
self._create_chunk(
document,
chunks,
chunk_text,
link_offsets,
False,
title_prefix,
metadata_suffix_semantic,
metadata_suffix_keyword,
)
# start a new chunk
link_offsets = {0: section_link_text}
chunk_text = section_text
# finalize any leftover text chunk
if chunk_text.strip() or not chunks:
self._create_chunk(
document,
chunks,
chunk_text,
link_offsets or {0: ""}, # safe default
False,
title_prefix,
metadata_suffix_semantic,
metadata_suffix_keyword,
)
return chunks
def _handle_single_document(
self, document: IndexingDocument
@@ -194,10 +423,7 @@ class Chunker:
logger.debug(f"Chunking {document.semantic_identifier}")
# Title prep
title = extract_blurb(
document.get_title_for_document_index() or "",
self.blurb_splitter,
)
title = self._extract_blurb(document.get_title_for_document_index() or "")
title_prefix = title + RETURN_SEPARATOR if title else ""
title_tokens = len(self.tokenizer.encode(title_prefix))
@@ -265,7 +491,7 @@ class Chunker:
# Use processed_sections if available (IndexingDocument), otherwise use original sections
sections_to_chunk = document.processed_sections
normal_chunks = self._document_chunker.chunk(
normal_chunks = self._chunk_document_with_sections(
document,
sections_to_chunk,
title_prefix,

View File

@@ -1,7 +0,0 @@
from onyx.indexing.chunking.document_chunker import DocumentChunker
from onyx.indexing.chunking.section_chunker import extract_blurb
__all__ = [
"DocumentChunker",
"extract_blurb",
]

View File

@@ -1,111 +0,0 @@
from chonkie import SentenceChunker
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.connectors.models import SectionType
from onyx.indexing.chunking.image_section_chunker import ImageChunker
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.section_chunker import ChunkPayload
from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.text_section_chunker import TextChunker
from onyx.indexing.models import DocAwareChunk
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.utils.logger import setup_logger
from onyx.utils.text_processing import clean_text
logger = setup_logger()
class DocumentChunker:
"""Converts a document's processed sections into DocAwareChunks.
Drop-in replacement for `Chunker._chunk_document_with_sections`.
"""
def __init__(
self,
tokenizer: BaseTokenizer,
blurb_splitter: SentenceChunker,
chunk_splitter: SentenceChunker,
mini_chunk_splitter: SentenceChunker | None = None,
) -> None:
self.blurb_splitter = blurb_splitter
self.mini_chunk_splitter = mini_chunk_splitter
self._dispatch: dict[SectionType, SectionChunker] = {
SectionType.TEXT: TextChunker(
tokenizer=tokenizer,
chunk_splitter=chunk_splitter,
),
SectionType.IMAGE: ImageChunker(),
}
def chunk(
self,
document: IndexingDocument,
sections: list[Section],
title_prefix: str,
metadata_suffix_semantic: str,
metadata_suffix_keyword: str,
content_token_limit: int,
) -> list[DocAwareChunk]:
payloads = self._collect_section_payloads(
document=document,
sections=sections,
content_token_limit=content_token_limit,
)
if not payloads:
payloads.append(ChunkPayload(text="", links={0: ""}))
return [
payload.to_doc_aware_chunk(
document=document,
chunk_id=idx,
blurb_splitter=self.blurb_splitter,
mini_chunk_splitter=self.mini_chunk_splitter,
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
)
for idx, payload in enumerate(payloads)
]
def _collect_section_payloads(
self,
document: IndexingDocument,
sections: list[Section],
content_token_limit: int,
) -> list[ChunkPayload]:
accumulator = AccumulatorState()
payloads: list[ChunkPayload] = []
for section_idx, section in enumerate(sections):
section_text = clean_text(str(section.text or ""))
if not section_text and (not document.title or section_idx > 0):
logger.warning(
f"Skipping empty or irrelevant section in doc "
f"{document.semantic_identifier}, link={section.link}"
)
continue
chunker = self._select_chunker(section)
result = chunker.chunk_section(
section=section,
accumulator=accumulator,
content_token_limit=content_token_limit,
)
payloads.extend(result.payloads)
accumulator = result.accumulator
# Final flush — any leftover buffered text becomes one last payload.
payloads.extend(accumulator.flush_to_list())
return payloads
def _select_chunker(self, section: Section) -> SectionChunker:
try:
return self._dispatch[section.type]
except KeyError:
raise ValueError(f"No SectionChunker registered for type={section.type}")

View File

@@ -1,35 +0,0 @@
from onyx.connectors.models import Section
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.section_chunker import ChunkPayload
from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
from onyx.utils.text_processing import clean_text
class ImageChunker(SectionChunker):
def chunk_section(
self,
section: Section,
accumulator: AccumulatorState,
content_token_limit: int, # noqa: ARG002
) -> SectionChunkerOutput:
assert section.image_file_id is not None
section_text = clean_text(str(section.text or ""))
section_link = section.link or ""
# Flush any partially built text chunks
payloads = accumulator.flush_to_list()
payloads.append(
ChunkPayload(
text=section_text,
links={0: section_link} if section_link else {},
image_file_id=section.image_file_id,
is_continuation=False,
)
)
return SectionChunkerOutput(
payloads=payloads,
accumulator=AccumulatorState(),
)

View File

@@ -1,100 +0,0 @@
from abc import ABC
from abc import abstractmethod
from collections.abc import Sequence
from typing import cast
from chonkie import SentenceChunker
from pydantic import BaseModel
from pydantic import Field
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.indexing.models import DocAwareChunk
def extract_blurb(text: str, blurb_splitter: SentenceChunker) -> str:
texts = cast(list[str], blurb_splitter.chunk(text))
if not texts:
return ""
return texts[0]
def get_mini_chunk_texts(
chunk_text: str,
mini_chunk_splitter: SentenceChunker | None,
) -> list[str] | None:
if mini_chunk_splitter and chunk_text.strip():
return list(cast(Sequence[str], mini_chunk_splitter.chunk(chunk_text)))
return None
class ChunkPayload(BaseModel):
"""Section-local chunk content without document-scoped fields.
The orchestrator upgrades these to DocAwareChunks via
`to_doc_aware_chunk` after assigning chunk_ids and attaching
title/metadata.
"""
text: str
links: dict[int, str]
is_continuation: bool = False
image_file_id: str | None = None
def to_doc_aware_chunk(
self,
document: IndexingDocument,
chunk_id: int,
blurb_splitter: SentenceChunker,
title_prefix: str = "",
metadata_suffix_semantic: str = "",
metadata_suffix_keyword: str = "",
mini_chunk_splitter: SentenceChunker | None = None,
) -> DocAwareChunk:
return DocAwareChunk(
source_document=document,
chunk_id=chunk_id,
blurb=extract_blurb(self.text, blurb_splitter),
content=self.text,
source_links=self.links or {0: ""},
image_file_id=self.image_file_id,
section_continuation=self.is_continuation,
title_prefix=title_prefix,
metadata_suffix_semantic=metadata_suffix_semantic,
metadata_suffix_keyword=metadata_suffix_keyword,
mini_chunk_texts=get_mini_chunk_texts(self.text, mini_chunk_splitter),
large_chunk_id=None,
doc_summary="",
chunk_context="",
contextual_rag_reserved_tokens=0,
)
class AccumulatorState(BaseModel):
"""Cross-section text buffer threaded through SectionChunkers."""
text: str = ""
link_offsets: dict[int, str] = Field(default_factory=dict)
def is_empty(self) -> bool:
return not self.text.strip()
def flush_to_list(self) -> list[ChunkPayload]:
if self.is_empty():
return []
return [ChunkPayload(text=self.text, links=self.link_offsets)]
class SectionChunkerOutput(BaseModel):
payloads: list[ChunkPayload]
accumulator: AccumulatorState
class SectionChunker(ABC):
@abstractmethod
def chunk_section(
self,
section: Section,
accumulator: AccumulatorState,
content_token_limit: int,
) -> SectionChunkerOutput: ...

View File

@@ -1,129 +0,0 @@
from typing import cast
from chonkie import SentenceChunker
from onyx.configs.constants import SECTION_SEPARATOR
from onyx.connectors.models import Section
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.section_chunker import ChunkPayload
from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import count_tokens
from onyx.utils.text_processing import clean_text
from onyx.utils.text_processing import shared_precompare_cleanup
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
class TextChunker(SectionChunker):
def __init__(
self,
tokenizer: BaseTokenizer,
chunk_splitter: SentenceChunker,
) -> None:
self.tokenizer = tokenizer
self.chunk_splitter = chunk_splitter
self.section_separator_token_count = count_tokens(
SECTION_SEPARATOR,
self.tokenizer,
)
def chunk_section(
self,
section: Section,
accumulator: AccumulatorState,
content_token_limit: int,
) -> SectionChunkerOutput:
section_text = clean_text(str(section.text or ""))
section_link = section.link or ""
section_token_count = len(self.tokenizer.encode(section_text))
# Oversized — flush buffer and split the section
if section_token_count > content_token_limit:
return self._handle_oversized_section(
section_text=section_text,
section_link=section_link,
accumulator=accumulator,
content_token_limit=content_token_limit,
)
current_token_count = count_tokens(accumulator.text, self.tokenizer)
next_section_tokens = self.section_separator_token_count + section_token_count
# Fits — extend the accumulator
if next_section_tokens + current_token_count <= content_token_limit:
offset = len(shared_precompare_cleanup(accumulator.text))
new_text = accumulator.text
if new_text:
new_text += SECTION_SEPARATOR
new_text += section_text
return SectionChunkerOutput(
payloads=[],
accumulator=AccumulatorState(
text=new_text,
link_offsets={**accumulator.link_offsets, offset: section_link},
),
)
# Doesn't fit — flush buffer and restart with this section
return SectionChunkerOutput(
payloads=accumulator.flush_to_list(),
accumulator=AccumulatorState(
text=section_text,
link_offsets={0: section_link},
),
)
def _handle_oversized_section(
self,
section_text: str,
section_link: str,
accumulator: AccumulatorState,
content_token_limit: int,
) -> SectionChunkerOutput:
payloads = accumulator.flush_to_list()
split_texts = cast(list[str], self.chunk_splitter.chunk(section_text))
for i, split_text in enumerate(split_texts):
if (
STRICT_CHUNK_TOKEN_LIMIT
and count_tokens(split_text, self.tokenizer) > content_token_limit
):
smaller_chunks = self._split_oversized_chunk(
split_text, content_token_limit
)
for j, small_chunk in enumerate(smaller_chunks):
payloads.append(
ChunkPayload(
text=small_chunk,
links={0: section_link},
is_continuation=(j != 0),
)
)
else:
payloads.append(
ChunkPayload(
text=split_text,
links={0: section_link},
is_continuation=(i != 0),
)
)
return SectionChunkerOutput(
payloads=payloads,
accumulator=AccumulatorState(),
)
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
tokens = self.tokenizer.tokenize(text)
chunks: list[str] = []
start = 0
total_tokens = len(tokens)
while start < total_tokens:
end = min(start + content_token_limit, total_tokens)
token_chunk = tokens[start:end]
chunk_text = " ".join(token_chunk)
chunks.append(chunk_text)
start = end
return chunks

View File

@@ -542,7 +542,6 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
**document.model_dump(),
processed_sections=[
Section(
type=section.type,
text=section.text if isinstance(section, TextSection) else "",
link=section.link,
image_file_id=(
@@ -567,7 +566,6 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
if isinstance(section, ImageSection):
# Default section with image path preserved - ensure text is always a string
processed_section = Section(
type=section.type,
link=section.link,
image_file_id=section.image_file_id,
text="", # Initialize with empty string
@@ -611,7 +609,6 @@ def process_image_sections(documents: list[Document]) -> list[IndexingDocument]:
# For TextSection, create a base Section with text and link
elif isinstance(section, TextSection):
processed_section = Section(
type=section.type,
text=section.text or "", # Ensure text is always a string, not None
link=section.link,
image_file_id=None,

View File

@@ -618,7 +618,6 @@ done
"app.kubernetes.io/managed-by": "onyx",
"onyx.app/sandbox-id": sandbox_id,
"onyx.app/tenant-id": tenant_id,
"admission.datadoghq.com/enabled": "false",
},
),
spec=pod_spec,

View File

@@ -65,7 +65,6 @@ class Settings(BaseModel):
anonymous_user_enabled: bool | None = None
invite_only_enabled: bool = False
deep_research_enabled: bool | None = None
multi_model_chat_enabled: bool | None = None
search_ui_enabled: bool | None = None
# Whether EE features are unlocked for use.
@@ -90,8 +89,7 @@ class Settings(BaseModel):
default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0
)
file_token_count_threshold_k: int | None = Field(
default=None,
ge=0, # thousands of tokens; None = context-aware default
default=None, ge=0 # thousands of tokens; None = context-aware default
)
# Connector settings

View File

@@ -17,7 +17,6 @@ def documents_to_indexing_documents(
processed_sections = []
for section in document.sections:
processed_section = Section(
type=section.type,
text=section.text or "",
link=section.link,
image_file_id=None,

View File

@@ -12,7 +12,6 @@ from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TabularSection
from onyx.connectors.models import TextSection
_ITERATION_LIMIT = 100_000
@@ -142,15 +141,13 @@ def load_all_from_connector(
def to_sections(
documents: list[Document],
) -> Iterator[TextSection | ImageSection | TabularSection]:
) -> Iterator[TextSection | ImageSection]:
for doc in documents:
for section in doc.sections:
yield section
def to_text_sections(
sections: Iterator[TextSection | ImageSection | TabularSection],
) -> Iterator[str]:
def to_text_sections(sections: Iterator[TextSection | ImageSection]) -> Iterator[str]:
for section in sections:
if isinstance(section, TextSection):
yield section.text

View File

@@ -4,7 +4,6 @@ from typing import cast
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
from onyx.file_processing.extract_file_text import xlsx_to_text
@@ -197,136 +196,3 @@ class TestXlsxToText:
assert "r1c1" in lines[0] and "r1c2" in lines[0]
assert "r2c1" in lines[1] and "r2c2" in lines[1]
assert "r3c1" in lines[2] and "r3c2" in lines[2]
class TestXlsxSheetExtraction:
def test_one_tuple_per_sheet(self) -> None:
xlsx = _make_xlsx(
{
"Revenue": [["Month", "Amount"], ["Jan", "100"]],
"Expenses": [["Category", "Cost"], ["Rent", "500"]],
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 2
# Order preserved from workbook sheet order
titles = [title for _csv, title in sheets]
assert titles == ["Revenue", "Expenses"]
# Content present in the right tuple
revenue_csv, _ = sheets[0]
expenses_csv, _ = sheets[1]
assert "Month" in revenue_csv
assert "Jan" in revenue_csv
assert "Category" in expenses_csv
assert "Rent" in expenses_csv
def test_tuple_structure_is_csv_text_then_title(self) -> None:
"""The tuple order is (csv_text, sheet_title) — pin it so callers
that unpack positionally don't silently break."""
xlsx = _make_xlsx({"MySheet": [["a", "b"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "MySheet"
assert "a" in csv_text
assert "b" in csv_text
def test_empty_sheet_is_skipped(self) -> None:
"""A sheet whose CSV output is empty/whitespace-only should NOT
appear in the result — the `if csv_text.strip():` guard filters
it out."""
xlsx = _make_xlsx(
{
"Data": [["a", "b"]],
"Empty": [],
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
assert sheets[0][1] == "Data"
def test_empty_workbook_returns_empty_list(self) -> None:
"""All sheets empty → empty list (not a list of empty tuples)."""
xlsx = _make_xlsx({"Sheet1": [], "Sheet2": []})
sheets = xlsx_sheet_extraction(xlsx)
assert sheets == []
def test_single_sheet(self) -> None:
xlsx = _make_xlsx({"Only": [["x", "y"], ["1", "2"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "Only"
assert "x" in csv_text
assert "1" in csv_text
def test_bad_zip_returns_empty_list(self) -> None:
bad_file = io.BytesIO(b"not a zip file")
sheets = xlsx_sheet_extraction(bad_file, file_name="test.xlsx")
assert sheets == []
def test_bad_zip_tilde_file_returns_empty_list(self) -> None:
"""`~$`-prefixed files are Excel lock files; failure should log
at debug (not warning) and still return []."""
bad_file = io.BytesIO(b"not a zip file")
sheets = xlsx_sheet_extraction(bad_file, file_name="~$temp.xlsx")
assert sheets == []
def test_csv_content_matches_xlsx_to_text_per_sheet(self) -> None:
"""For a single-sheet workbook, xlsx_to_text output should equal
the csv_text from xlsx_sheet_extraction — they share the same
per-sheet CSV-ification logic."""
single_sheet_data = [["Name", "Age"], ["Alice", "30"]]
expected_text = xlsx_to_text(_make_xlsx({"People": single_sheet_data}))
sheets = xlsx_sheet_extraction(_make_xlsx({"People": single_sheet_data}))
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "People"
assert csv_text.strip() == expected_text.strip()
def test_commas_in_cells_are_quoted(self) -> None:
xlsx = _make_xlsx({"S1": [["hello, world", "normal"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, _ = sheets[0]
assert '"hello, world"' in csv_text
def test_long_empty_row_run_capped_within_sheet(self) -> None:
"""The matrix cleanup applies per-sheet: >2 empty rows collapse
to 2, which keeps the sheet non-empty and it still appears in
the result."""
xlsx = _make_xlsx(
{
"S1": [
["header"],
[""],
[""],
[""],
[""],
["data"],
]
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, _ = sheets[0]
lines = csv_text.strip().split("\n")
# header + 2 empty (capped) + data = 4 lines
assert len(lines) == 4
assert "header" in lines[0]
assert "data" in lines[-1]
def test_sheet_title_with_special_chars_preserved(self) -> None:
"""Spaces, punctuation, unicode in sheet titles are preserved
verbatim — the title is used as a link anchor downstream."""
xlsx = _make_xlsx(
{
"Q1 Revenue (USD)": [["a", "b"]],
"Données": [["c", "d"]],
}
)
sheets = xlsx_sheet_extraction(xlsx)
titles = [title for _csv, title in sheets]
assert "Q1 Revenue (USD)" in titles
assert "Données" in titles

View File

@@ -1,13 +1,11 @@
import pytest
from chonkie import SentenceChunker
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import SECTION_SEPARATOR
from onyx.connectors.models import IndexingDocument
from onyx.connectors.models import Section
from onyx.connectors.models import SectionType
from onyx.indexing.chunking import DocumentChunker
from onyx.indexing.chunking import text_section_chunker as text_chunker_module
from onyx.indexing import chunker as chunker_module
from onyx.indexing.chunker import Chunker
from onyx.natural_language_processing.utils import BaseTokenizer
@@ -29,26 +27,16 @@ class CharTokenizer(BaseTokenizer):
CHUNK_LIMIT = 200
def _make_document_chunker(
def _make_chunker(
chunk_token_limit: int = CHUNK_LIMIT,
) -> DocumentChunker:
def token_counter(text: str) -> int:
return len(text)
return DocumentChunker(
enable_multipass: bool = False,
) -> Chunker:
return Chunker(
tokenizer=CharTokenizer(),
blurb_splitter=SentenceChunker(
tokenizer_or_token_counter=token_counter,
chunk_size=128,
chunk_overlap=0,
return_type="texts",
),
chunk_splitter=SentenceChunker(
tokenizer_or_token_counter=token_counter,
chunk_size=chunk_token_limit,
chunk_overlap=0,
return_type="texts",
),
enable_multipass=enable_multipass,
enable_large_chunks=False,
enable_contextual_rag=False,
chunk_token_limit=chunk_token_limit,
)
@@ -74,10 +62,10 @@ def _make_doc(
def test_empty_processed_sections_returns_single_empty_safety_chunk() -> None:
"""No sections at all should still yield one empty chunk (the
`or not chunks` safety branch at the end)."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(sections=[])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=[],
title_prefix="TITLE\n",
@@ -99,13 +87,13 @@ def test_empty_processed_sections_returns_single_empty_safety_chunk() -> None:
def test_empty_section_on_first_position_without_title_is_skipped() -> None:
"""Doc has no title, first section has empty text — the guard
`(not document.title or section_idx > 0)` means it IS skipped."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[Section(type=SectionType.TEXT, text="", link="l0")],
sections=[Section(text="", link="l0")],
title=None,
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -121,16 +109,16 @@ def test_empty_section_on_first_position_without_title_is_skipped() -> None:
def test_empty_section_on_later_position_is_skipped_even_with_title() -> None:
"""Index > 0 empty sections are skipped regardless of title."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="Alpha.", link="l0"),
Section(type=SectionType.TEXT, text="", link="l1"), # should be skipped
Section(type=SectionType.TEXT, text="Beta.", link="l2"),
Section(text="Alpha.", link="l0"),
Section(text="", link="l1"), # should be skipped
Section(text="Beta.", link="l2"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -150,12 +138,10 @@ def test_empty_section_on_later_position_is_skipped_even_with_title() -> None:
def test_single_small_text_section_becomes_one_chunk() -> None:
dc = _make_document_chunker()
doc = _make_doc(
sections=[Section(type=SectionType.TEXT, text="Hello world.", link="https://a")]
)
chunker = _make_chunker()
doc = _make_doc(sections=[Section(text="Hello world.", link="https://a")])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="TITLE\n",
@@ -179,15 +165,15 @@ def test_single_small_text_section_becomes_one_chunk() -> None:
def test_multiple_small_sections_combine_into_one_chunk() -> None:
dc = _make_document_chunker()
chunker = _make_chunker()
sections = [
Section(type=SectionType.TEXT, text="Part one.", link="l1"),
Section(type=SectionType.TEXT, text="Part two.", link="l2"),
Section(type=SectionType.TEXT, text="Part three.", link="l3"),
Section(text="Part one.", link="l1"),
Section(text="Part two.", link="l2"),
Section(text="Part three.", link="l3"),
]
doc = _make_doc(sections=sections)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -211,19 +197,19 @@ def test_multiple_small_sections_combine_into_one_chunk() -> None:
def test_sections_overflow_into_second_chunk() -> None:
"""Two sections that together exceed content_token_limit should
finalize the first as one chunk and start a new one."""
dc = _make_document_chunker()
chunker = _make_chunker()
# char-level: 120 char section → 120 tokens. 2 of these plus separator
# exceed a 200-token limit, forcing a flush.
a = "A" * 120
b = "B" * 120
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text=a, link="la"),
Section(type=SectionType.TEXT, text=b, link="lb"),
Section(text=a, link="la"),
Section(text=b, link="lb"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -250,11 +236,10 @@ def test_sections_overflow_into_second_chunk() -> None:
def test_image_only_section_produces_single_chunk_with_image_id() -> None:
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(
type=SectionType.IMAGE,
text="summary of image",
link="https://img",
image_file_id="img-abc",
@@ -262,7 +247,7 @@ def test_image_only_section_produces_single_chunk_with_image_id() -> None:
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -280,21 +265,20 @@ def test_image_only_section_produces_single_chunk_with_image_id() -> None:
def test_image_section_flushes_pending_text_and_creates_its_own_chunk() -> None:
"""A buffered text section followed by an image section:
the pending text should be flushed first, then the image chunk."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="Pending text.", link="ltext"),
Section(text="Pending text.", link="ltext"),
Section(
type=SectionType.IMAGE,
text="image summary",
link="limage",
image_file_id="img-1",
),
Section(type=SectionType.TEXT, text="Trailing text.", link="ltail"),
Section(text="Trailing text.", link="ltail"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -323,19 +307,14 @@ def test_image_section_flushes_pending_text_and_creates_its_own_chunk() -> None:
def test_image_section_without_link_gets_empty_links_dict() -> None:
"""If an image section has no link, links param is {} (not {0: ""})."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(
type=SectionType.IMAGE,
text="img",
link=None,
image_file_id="img-xyz",
),
Section(text="img", link=None, image_file_id="img-xyz"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -346,7 +325,7 @@ def test_image_section_without_link_gets_empty_links_dict() -> None:
assert len(chunks) == 1
assert chunks[0].image_file_id == "img-xyz"
# to_doc_aware_chunk falls back to {0: ""} when given an empty dict
# _create_chunk falls back to {0: ""} when given an empty dict
assert chunks[0].source_links == {0: ""}
@@ -357,7 +336,7 @@ def test_oversized_section_is_split_across_multiple_chunks() -> None:
"""A section whose text exceeds content_token_limit should be passed
through chunk_splitter and yield >1 chunks; only the first is not a
continuation."""
dc = _make_document_chunker()
chunker = _make_chunker()
# Build a section whose char-count is well over CHUNK_LIMIT (200), made
# of many short sentences so chonkie's SentenceChunker can split cleanly.
section_text = (
@@ -370,10 +349,10 @@ def test_oversized_section_is_split_across_multiple_chunks() -> None:
assert len(section_text) > CHUNK_LIMIT
doc = _make_doc(
sections=[Section(type=SectionType.TEXT, text=section_text, link="big-link")],
sections=[Section(text=section_text, link="big-link")],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -400,7 +379,7 @@ def test_oversized_section_is_split_across_multiple_chunks() -> None:
def test_oversized_section_flushes_pending_text_first() -> None:
"""A buffered text section followed by an oversized section should
flush the pending chunk first, then emit the split chunks."""
dc = _make_document_chunker()
chunker = _make_chunker()
pending = "Pending buffered text."
big = (
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
@@ -412,12 +391,12 @@ def test_oversized_section_flushes_pending_text_first() -> None:
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text=pending, link="l-pending"),
Section(type=SectionType.TEXT, text=big, link="l-big"),
Section(text=pending, link="l-pending"),
Section(text=big, link="l-big"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -446,15 +425,15 @@ def test_oversized_section_flushes_pending_text_first() -> None:
def test_title_prefix_and_metadata_propagate_to_all_chunks() -> None:
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
Section(text="A" * 120, link="la"),
Section(text="B" * 120, link="lb"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="MY_TITLE\n",
@@ -474,16 +453,16 @@ def test_title_prefix_and_metadata_propagate_to_all_chunks() -> None:
def test_chunk_ids_are_sequential_starting_at_zero() -> None:
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
Section(type=SectionType.TEXT, text="C" * 120, link="lc"),
Section(text="A" * 120, link="la"),
Section(text="B" * 120, link="lb"),
Section(text="C" * 120, link="lc"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -501,18 +480,18 @@ def test_chunk_ids_are_sequential_starting_at_zero() -> None:
def test_overflow_flush_then_subsequent_section_joins_new_chunk() -> None:
"""After an overflow flush starts a new chunk, the next fitting section
should combine into that same new chunk (not spawn a third)."""
dc = _make_document_chunker()
chunker = _make_chunker()
# 120 + 120 > 200 → first two sections produce two chunks.
# Third section is small (20 chars) → should fit with second.
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="A" * 120, link="la"),
Section(type=SectionType.TEXT, text="B" * 120, link="lb"),
Section(type=SectionType.TEXT, text="C" * 20, link="lc"),
Section(text="A" * 120, link="la"),
Section(text="B" * 120, link="lb"),
Section(text="C" * 20, link="lc"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -532,7 +511,7 @@ def test_small_section_after_oversized_starts_a_fresh_chunk() -> None:
"""After an oversized section is emitted as its own chunks, the internal
accumulator should be empty so a following small section starts a new
chunk instead of being swallowed."""
dc = _make_document_chunker()
chunker = _make_chunker()
big = (
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
@@ -542,12 +521,12 @@ def test_small_section_after_oversized_starts_a_fresh_chunk() -> None:
assert len(big) > CHUNK_LIMIT
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text=big, link="l-big"),
Section(type=SectionType.TEXT, text="Tail text.", link="l-tail"),
Section(text=big, link="l-big"),
Section(text="Tail text.", link="l-tail"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -576,14 +555,14 @@ def test_strict_chunk_token_limit_subdivides_oversized_split(
"""When STRICT_CHUNK_TOKEN_LIMIT is enabled and chonkie's chunk_splitter
still produces a piece larger than content_token_limit (e.g. a single
no-period run), the code must fall back to _split_oversized_chunk."""
monkeypatch.setattr(text_chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", True)
dc = _make_document_chunker()
monkeypatch.setattr(chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", True)
chunker = _make_chunker()
# 500 non-whitespace chars with no sentence boundaries — chonkie will
# return it as one oversized piece (>200) which triggers the fallback.
run = "a" * 500
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=run, link="l-run")])
doc = _make_doc(sections=[Section(text=run, link="l-run")])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -613,12 +592,12 @@ def test_strict_chunk_token_limit_disabled_allows_oversized_split(
) -> None:
"""Same pathological input, but with STRICT disabled: the oversized
split is emitted verbatim as a single chunk (current behavior)."""
monkeypatch.setattr(text_chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", False)
dc = _make_document_chunker()
monkeypatch.setattr(chunker_module, "STRICT_CHUNK_TOKEN_LIMIT", False)
chunker = _make_chunker()
run = "a" * 500
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=run, link="l-run")])
doc = _make_doc(sections=[Section(text=run, link="l-run")])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -640,18 +619,16 @@ def test_first_empty_section_with_title_is_processed_not_skipped() -> None:
the doc has a title AND it's the first section, an empty text section
is NOT skipped. This pins current behavior so a refactor can't silently
change it."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(
type=SectionType.TEXT, text="", link="l0"
), # empty first section, kept
Section(type=SectionType.TEXT, text="Real content.", link="l1"),
Section(text="", link="l0"), # empty first section, kept
Section(text="Real content.", link="l1"),
],
title="Has A Title",
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -674,13 +651,13 @@ def test_first_empty_section_with_title_is_processed_not_skipped() -> None:
def test_clean_text_strips_control_chars_from_section_content() -> None:
"""clean_text() should remove control chars before the text enters the
accumulator — verifies the call isn't dropped by a refactor."""
dc = _make_document_chunker()
chunker = _make_chunker()
# NUL + BEL are control chars below 0x20 and not \n or \t → should be
# stripped by clean_text.
dirty = "Hello\x00 World\x07!"
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=dirty, link="l1")])
doc = _make_doc(sections=[Section(text=dirty, link="l1")])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -700,16 +677,16 @@ def test_section_with_none_text_behaves_like_empty_string() -> None:
"""`section.text` may be None — the method coerces via
`str(section.text or "")`, so a None-text section behaves identically
to an empty one (skipped unless it's the first section of a titled doc)."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="Alpha.", link="la"),
Section(type=SectionType.TEXT, text=None, link="lnone"), # idx 1 → skipped
Section(type=SectionType.TEXT, text="Beta.", link="lb"),
Section(text="Alpha.", link="la"),
Section(text=None, link="lnone"), # idx 1 → skipped
Section(text="Beta.", link="lb"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -731,20 +708,15 @@ def test_no_trailing_empty_chunk_when_last_section_was_image() -> None:
"""If the final section was an image (which emits its own chunk and
resets chunk_text), the safety `or not chunks` branch should NOT fire
because chunks is non-empty. Pin this explicitly."""
dc = _make_document_chunker()
chunker = _make_chunker()
doc = _make_doc(
sections=[
Section(type=SectionType.TEXT, text="Leading text.", link="ltext"),
Section(
type=SectionType.IMAGE,
text="img summary",
link="limg",
image_file_id="img-final",
),
Section(text="Leading text.", link="ltext"),
Section(text="img summary", link="limg", image_file_id="img-final"),
],
)
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",
@@ -764,7 +736,7 @@ def test_no_trailing_empty_chunk_when_last_section_was_image() -> None:
def test_no_trailing_empty_chunk_when_last_section_was_oversized() -> None:
"""Same guarantee for oversized sections: their splits fully clear the
accumulator, and the trailing safety branch should be a no-op."""
dc = _make_document_chunker()
chunker = _make_chunker()
big = (
"Alpha beta gamma. Delta epsilon zeta. Eta theta iota. "
"Kappa lambda mu. Nu xi omicron. Pi rho sigma. Tau upsilon phi. "
@@ -772,9 +744,9 @@ def test_no_trailing_empty_chunk_when_last_section_was_oversized() -> None:
"Ten eleven twelve. Thirteen fourteen fifteen. Sixteen seventeen."
)
assert len(big) > CHUNK_LIMIT
doc = _make_doc(sections=[Section(type=SectionType.TEXT, text=big, link="l-big")])
doc = _make_doc(sections=[Section(text=big, link="l-big")])
chunks = dc.chunk(
chunks = chunker._chunk_document_with_sections(
document=doc,
sections=doc.processed_sections,
title_prefix="",

View File

@@ -193,9 +193,10 @@ hover, active, and disabled states.
### Disabled (`core/disabled/`)
A pure CSS wrapper that applies disabled visuals (`opacity-50`, `cursor-not-allowed`,
`pointer-events: none`) to a single child element via Radix `Slot`. Has no React context —
Interactive primitives and buttons manage their own disabled state via a `disabled` prop.
A `<div>` wrapper that applies disabled visuals (`opacity-50`, `cursor-not-allowed`,
`pointer-events: none`) to its children via the `data-opal-disabled` CSS attribute. Supports an
optional `tooltip` prop (shown on hover when disabled) and `allowClick` to re-enable pointer
events. Interactive primitives and buttons manage their own disabled state via a `disabled` prop.
### Hoverable (`core/animations/`)

View File

@@ -1,5 +1,4 @@
import type { HTMLAttributes } from "react";
import type { RichStr, WithoutStyles } from "@opal/types";
import { cn } from "@opal/utils";
import { resolveStr } from "@opal/components/text/InlineMarkdown";

View File

@@ -2,6 +2,7 @@
.opal-tooltip {
z-index: var(--z-tooltip, 1300);
max-width: 20rem;
@apply rounded-08 px-3 py-2 text-sm
bg-background-neutral-dark-03 text-text-light-05
animate-in fade-in-0 zoom-in-95

View File

@@ -0,0 +1,129 @@
import type { Meta, StoryObj } from "@storybook/react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { Decorator } from "@storybook/react";
import { Disabled } from "@opal/core";
import { Card } from "@opal/components";
import { Button } from "@opal/components/buttons/button/components";
const withTooltipProvider: Decorator = (Story) => (
<TooltipPrimitive.Provider>
<Story />
</TooltipPrimitive.Provider>
);
const meta: Meta<typeof Disabled> = {
title: "opal/core/Disabled",
component: Disabled,
tags: ["autodocs"],
decorators: [withTooltipProvider],
};
export default meta;
type Story = StoryObj<typeof Disabled>;
const SampleContent = () => (
<Card border="solid" padding="md">
<div className="flex flex-col gap-2">
<p className="text-sm font-medium">Card Title</p>
<p className="text-xs text-text-03">Some content that can be disabled.</p>
<Button prominence="secondary" size="sm">
Action
</Button>
</div>
</Card>
);
export const Enabled: Story = {
render: () => (
<div className="w-80">
<Disabled disabled={false}>
<SampleContent />
</Disabled>
</div>
),
};
export const DisabledState: Story = {
render: () => (
<div className="w-80">
<Disabled disabled>
<SampleContent />
</Disabled>
</div>
),
};
export const WithTooltip: Story = {
render: () => (
<div className="w-80">
<Disabled disabled tooltip="This feature requires a Pro plan">
<SampleContent />
</Disabled>
</div>
),
};
export const TooltipSides: Story = {
render: () => (
<div className="flex flex-col gap-8 items-center py-16">
{(["top", "right", "bottom", "left"] as const).map((side) => (
<Disabled
key={side}
disabled
tooltip={`Tooltip on ${side}`}
tooltipSide={side}
>
<Card border="solid" padding="sm">
<p className="text-sm">tooltipSide: {side}</p>
</Card>
</Disabled>
))}
</div>
),
};
export const WithAllowClick: Story = {
render: () => (
<div className="w-80">
<Disabled disabled allowClick>
<Card border="solid" padding="md">
<p className="text-sm">
Disabled visuals, but pointer events are still active.
</p>
<Button
prominence="tertiary"
size="sm"
onClick={() => alert("Clicked!")}
>
Click me
</Button>
</Card>
</Disabled>
</div>
),
};
export const Comparison: Story = {
render: () => (
<div className="flex gap-4">
<div className="flex flex-col gap-2 w-60">
<p className="text-xs font-medium">Enabled</p>
<Disabled disabled={false}>
<SampleContent />
</Disabled>
</div>
<div className="flex flex-col gap-2 w-60">
<p className="text-xs font-medium">Disabled</p>
<Disabled disabled>
<SampleContent />
</Disabled>
</div>
<div className="flex flex-col gap-2 w-60">
<p className="text-xs font-medium">Disabled + Tooltip</p>
<Disabled disabled tooltip="Not available right now">
<SampleContent />
</Disabled>
</div>
</div>
),
};

View File

@@ -0,0 +1,42 @@
# Disabled
**Import:** `import { Disabled } from "@opal/core";`
Wrapper component that applies baseline disabled CSS (opacity, cursor, pointer-events) to its children. Renders a `<div>` with the `data-opal-disabled` attribute so styling cascades into all descendants. Works with any children — DOM elements, React components, or fragments.
## Props
| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `disabled` | `boolean` | `false` | Applies disabled styling when truthy |
| `allowClick` | `boolean` | `false` | Re-enables pointer events while keeping disabled visuals |
| `tooltip` | `string \| RichStr` | — | Tooltip shown on hover when disabled (implies `allowClick`). Supports `markdown()`. |
| `tooltipSide` | `"top" \| "bottom" \| "left" \| "right"` | `"right"` | Which side the tooltip appears on |
## CSS behavior
| Selector | Effect |
|----------|--------|
| `[data-opal-disabled]` | `cursor-not-allowed`, `select-none`, `pointer-events: none` |
| `[data-opal-disabled]:not(.interactive)` | `opacity-50` (non-Interactive elements only) |
| `[data-opal-disabled].interactive` | `pointer-events: auto` (Interactive elements handle their own disabled colors) |
| `[data-opal-disabled][data-allow-click]` | `pointer-events: auto` |
## Usage
```tsx
// Basic — disables children visually and blocks pointer events
<Disabled disabled={!canSubmit}>
<Card>Content</Card>
</Disabled>
// With tooltip — explains why the section is disabled
<Disabled disabled={!canSubmit} tooltip="Complete the form first">
<Card>Content</Card>
</Disabled>
// With allowClick — keeps pointer events for custom handling
<Disabled disabled={isProcessing} allowClick>
<MyInputBar />
</Disabled>
```

View File

@@ -1,13 +1,18 @@
import "@opal/core/disabled/styles.css";
import "@opal/components/tooltip.css";
import React from "react";
import { Slot } from "@radix-ui/react-slot";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { TooltipSide } from "@opal/components";
import type { RichStr, WithoutStyles } from "@opal/types";
import { Text } from "@opal/components";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
interface DisabledProps extends React.HTMLAttributes<HTMLElement> {
ref?: React.Ref<HTMLElement>;
interface DisabledProps
extends WithoutStyles<React.HTMLAttributes<HTMLDivElement>> {
ref?: React.Ref<HTMLDivElement>;
/**
* When truthy, applies disabled styling to child elements.
@@ -16,13 +21,23 @@ interface DisabledProps extends React.HTMLAttributes<HTMLElement> {
/**
* When `true`, re-enables pointer events while keeping the disabled
* visual treatment. Useful for elements that need to show tooltips or
* error messages on click.
* visual treatment. Useful for elements that need to remain interactive
* (e.g. to show tooltips or handle clicks at a higher level).
* @default false
*/
allowClick?: boolean;
children: React.ReactElement;
/**
* Tooltip content shown on hover when disabled. Implies `allowClick` so that
* the tooltip trigger can receive pointer events. Supports inline markdown
* via `markdown()`.
*/
tooltip?: string | RichStr;
/** Which side the tooltip appears on. @default "right" */
tooltipSide?: TooltipSide;
children?: React.ReactNode;
}
// ---------------------------------------------------------------------------
@@ -31,35 +46,63 @@ interface DisabledProps extends React.HTMLAttributes<HTMLElement> {
/**
* Wrapper component that applies baseline disabled CSS (opacity, cursor,
* pointer-events) to its child element.
* pointer-events) to its children.
*
* Uses Radix `Slot` — merges props onto the single child element without
* adding any DOM node. Works correctly inside Radix `asChild` chains.
* Renders a `<div>` that carries the `data-opal-disabled` attribute so the
* CSS rules in `styles.css` take effect on the wrapper and cascade into its
* descendants. Works with any children (DOM elements, React components, or
* fragments).
*
* @example
* ```tsx
* <Disabled disabled={!canSubmit}>
* <div>...</div>
* <MyComponent />
* </Disabled>
*
* <Disabled disabled={!canSubmit} tooltip="Feature not available">
* <MyComponent />
* </Disabled>
* ```
*/
function Disabled({
disabled,
allowClick,
children,
tooltip,
tooltipSide = "right",
ref,
...rest
}: DisabledProps) {
return (
<Slot
const showTooltip = disabled && tooltip;
const enableClick = allowClick || showTooltip;
const wrapper = (
<div
ref={ref}
className="opal-disabled"
{...rest}
aria-disabled={disabled || undefined}
data-opal-disabled={disabled || undefined}
data-allow-click={disabled && allowClick ? "" : undefined}
>
{children}
</Slot>
data-allow-click={disabled && enableClick ? "" : undefined}
/>
);
if (!showTooltip) return wrapper;
// TODO(@raunakab): Replace this raw Radix tooltip with the opalified
// Tooltip component once it lands.
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{wrapper}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
<Text font="secondary-body">{tooltip}</Text>
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
);
}

View File

@@ -1,4 +1,4 @@
/* Disabled — baseline disabled visuals via Radix Slot (no extra DOM node).
/* Disabled — baseline disabled visuals applied via a wrapper <div>.
*
* [data-opal-disabled] → cursor + pointer-events for all
* [data-opal-disabled]:not(.interactive) → opacity for non-Interactive elements
@@ -9,6 +9,10 @@
* re-enabled so the JS layer can suppress onClick.
*/
.opal-disabled {
@apply self-stretch;
}
[data-opal-disabled] {
@apply cursor-not-allowed select-none;
pointer-events: none;

View File

@@ -258,102 +258,23 @@ pre[class*="language-"] {
scrollbar-color: #4b5563 #1f2937;
}
/* Card wrapper — holds the background, border-radius, padding, and fade overlay.
Does NOT scroll — the inner .markdown-table-breakout handles that. */
.markdown-table-card {
position: relative;
background: var(--background-neutral-01);
border-radius: 0.5rem;
padding: 0.5rem 0;
}
/*
* Scrollable table container — sits inside the card.
* Table breakout container - allows tables to extend beyond their parent's
* constrained width to use the full container query width (100cqw).
*
* Requires an ancestor element with `container-type: inline-size` (@container in Tailwind).
*
* How the math works:
* - width: 100cqw → expand to full container query width
* - marginLeft: calc((100% - 100cqw) / 2) → negative margin pulls element left
* (100% is parent width, 100cqw is larger, so result is negative)
* - paddingLeft/Right: calc((100cqw - 100%) / 2) → padding keeps content aligned
* with original position while allowing scroll area to extend
*/
.markdown-table-breakout {
overflow-x: auto;
/* Always reserve scrollbar height so hover doesn't shift content.
Thumb is transparent by default, revealed on hover. */
scrollbar-width: thin; /* Firefox — always shows track */
scrollbar-color: transparent transparent; /* invisible thumb + track */
}
.markdown-table-breakout::-webkit-scrollbar {
height: 6px;
}
.markdown-table-breakout::-webkit-scrollbar-track {
background: transparent;
}
.markdown-table-breakout::-webkit-scrollbar-thumb {
background: transparent;
border-radius: 3px;
}
.markdown-table-breakout:hover {
scrollbar-color: var(--border-03) transparent; /* Firefox — reveal thumb */
}
.markdown-table-breakout:hover::-webkit-scrollbar-thumb {
background: var(--border-03);
}
/* Fade the right edge via an ::after overlay on the non-scrolling card.
Stays pinned while table scrolls; doesn't affect the sticky column. */
.markdown-table-card::after {
content: "";
position: absolute;
top: 0;
right: 0;
bottom: 0;
width: 2rem;
pointer-events: none;
z-index: 2;
background: linear-gradient(
to right,
transparent,
var(--background-neutral-01)
);
border-radius: 0 0.5rem 0.5rem 0;
opacity: 0;
transition: opacity 0.15s;
}
.markdown-table-card[data-overflows="true"]::after {
opacity: 1;
}
/* Sticky first column — inherits the container's background so it
matches regardless of theme or custom wallpaper. */
.markdown-table-breakout th:first-child,
.markdown-table-breakout td:first-child {
position: sticky;
left: 0;
z-index: 1;
padding-left: 0.75rem;
background: var(--background-neutral-01);
}
.markdown-table-breakout th:last-child,
.markdown-table-breakout td:last-child {
padding-right: 0.75rem;
}
/* Shadow on sticky column when scrolled. Uses an ::after pseudo-element
so it isn't clipped by the overflow container or the mask-image fade. */
.markdown-table-breakout th:first-child::after,
.markdown-table-breakout td:first-child::after {
content: "";
position: absolute;
top: 0;
right: -6px;
bottom: 0;
width: 6px;
pointer-events: none;
opacity: 0;
transition: opacity 0.15s;
box-shadow: inset 6px 0 8px -4px var(--alpha-grey-100-25);
}
.dark .markdown-table-breakout th:first-child::after,
.dark .markdown-table-breakout td:first-child::after {
box-shadow: inset 6px 0 8px -4px var(--alpha-grey-100-60);
}
.markdown-table-breakout[data-scrolled="true"] th:first-child::after,
.markdown-table-breakout[data-scrolled="true"] td:first-child::after {
opacity: 1;
width: 100cqw;
margin-left: calc((100% - 100cqw) / 2);
padding-left: calc((100cqw - 100%) / 2);
padding-right: calc((100cqw - 100%) / 2);
}

View File

@@ -1,4 +1,4 @@
import React, { useCallback, useEffect, useRef, useMemo, JSX } from "react";
import React, { useCallback, useMemo, JSX } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
@@ -17,66 +17,6 @@ import { transformLinkUri, cn } from "@/lib/utils";
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
import { extractChatImageFileId } from "@/app/app/components/files/images/utils";
/** Table wrapper that detects horizontal overflow and shows a fade + scrollbar. */
interface ScrollableTableProps
extends React.TableHTMLAttributes<HTMLTableElement> {
children: React.ReactNode;
}
export function ScrollableTable({
className,
children,
...props
}: ScrollableTableProps) {
const scrollRef = useRef<HTMLDivElement>(null);
const wrapRef = useRef<HTMLDivElement>(null);
const tableRef = useRef<HTMLTableElement>(null);
useEffect(() => {
const el = scrollRef.current;
const wrap = wrapRef.current;
const table = tableRef.current;
if (!el || !wrap) return;
const check = () => {
const overflows = el.scrollWidth > el.clientWidth;
const atEnd = el.scrollLeft + el.clientWidth >= el.scrollWidth - 2;
wrap.dataset.overflows = overflows && !atEnd ? "true" : "false";
el.dataset.scrolled = el.scrollLeft > 0 ? "true" : "false";
};
check();
el.addEventListener("scroll", check, { passive: true });
// Observe both the scroll container (parent resize) and the table
// itself (content growth during streaming).
const ro = new ResizeObserver(check);
ro.observe(el);
if (table) ro.observe(table);
return () => {
el.removeEventListener("scroll", check);
ro.disconnect();
};
}, []);
return (
<div ref={wrapRef} className="markdown-table-card">
<div ref={scrollRef} className="markdown-table-breakout">
<table
ref={tableRef}
className={cn(
className,
"min-w-full !my-0 [&_th]:whitespace-nowrap [&_td]:whitespace-nowrap"
)}
{...props}
>
{children}
</table>
</div>
</div>
);
}
/**
* Processes content for markdown rendering by handling code blocks and LaTeX
*/
@@ -187,9 +127,11 @@ export const useMarkdownComponents = (
},
table: ({ node, className, children, ...props }: any) => {
return (
<ScrollableTable className={className} {...props}>
{children}
</ScrollableTable>
<div className="markdown-table-breakout">
<table className={cn(className, "min-w-full")} {...props}>
{children}
</table>
</div>
);
},
code: ({ node, className, children }: any) => {

View File

@@ -16,7 +16,7 @@ import {
} from "../../../services/streamingModels";
import { MessageRenderer, FullChatState } from "../interfaces";
import { isFinalAnswerComplete } from "../../../services/packetUtils";
import { processContent, ScrollableTable } from "../markdownUtils";
import { processContent } from "../markdownUtils";
import { BlinkingBar } from "../../BlinkingBar";
import { useVoiceMode } from "@/providers/VoiceModeProvider";
import {
@@ -338,9 +338,11 @@ export const MessageTextRenderer: MessageRenderer<
</li>
),
table: ({ className, children, ...rest }) => (
<ScrollableTable className={className} {...rest}>
{children}
</ScrollableTable>
<div className="markdown-table-breakout">
<table className={cn(className, "min-w-full")} {...rest}>
{children}
</table>
</div>
),
code: ({ node, className, children }) => {
const codeText = extractCodeText(

View File

@@ -516,7 +516,8 @@ export default function NRFPage({ isSidePanel = false }: NRFPageProps) {
ref={inputRef}
className={cn(
"w-full flex flex-col",
!isSidePanel && "max-w-[var(--app-page-main-content-width)]"
!isSidePanel &&
"max-w-[var(--app-page-main-content-width)] px-4"
)}
>
{hasMessages && liveAgent && !llmManager.isLoadingProviders && (

View File

@@ -644,7 +644,6 @@ export default function useChatController({
});
node.modelDisplayName = model.displayName;
node.overridden_model = model.modelName;
node.is_generating = true;
return node;
});
}
@@ -1053,14 +1052,10 @@ export default function useChatController({
// In multi-model mode, route per-model errors to the specific model's
// node instead of killing the entire stream. Other models keep streaming.
if (isMultiModel) {
// Multi-model: isolate the error to its panel. Never throw
// or set global error state — other models keep streaming.
const errorModelIndex = streamingError.details?.model_index as
| number
| undefined;
if (isMultiModel && streamingError.details?.model_index != null) {
const errorModelIndex = streamingError.details
.model_index as number;
if (
errorModelIndex != null &&
errorModelIndex >= 0 &&
errorModelIndex < initialAssistantNodes.length
) {
@@ -1093,15 +1088,8 @@ export default function useChatController({
completeMessageTreeOverride: currentMessageTreeLocal,
chatSessionId: frozenSessionId!,
});
} else {
// Error without model_index in multi-model — can't route
// to a specific panel. Log and continue; the stream loop
// stays alive for other models.
console.warn(
"Multi-model error without model_index:",
streamingError.error
);
}
// Skip the normal per-packet upsert — we already upserted the error node
continue;
} else {
// Single-model: kill the stream
@@ -1257,7 +1245,6 @@ export default function useChatController({
errorCode,
isRetryable,
errorDetails,
is_generating: false,
})
: [
{

View File

@@ -48,7 +48,6 @@ describe("useSettings", () => {
anonymous_user_enabled: false,
invite_only_enabled: false,
deep_research_enabled: true,
multi_model_chat_enabled: true,
temperature_override_enabled: true,
query_history_type: QueryHistoryType.NORMAL,
});
@@ -66,7 +65,6 @@ describe("useSettings", () => {
anonymous_user_enabled: false,
invite_only_enabled: false,
deep_research_enabled: true,
multi_model_chat_enabled: true,
temperature_override_enabled: true,
query_history_type: QueryHistoryType.NORMAL,
};

View File

@@ -23,7 +23,6 @@ const DEFAULT_SETTINGS = {
anonymous_user_enabled: false,
invite_only_enabled: false,
deep_research_enabled: true,
multi_model_chat_enabled: true,
temperature_override_enabled: true,
query_history_type: QueryHistoryType.NORMAL,
} satisfies Settings;

View File

@@ -27,7 +27,6 @@ export interface Settings {
query_history_type: QueryHistoryType;
deep_research_enabled?: boolean;
multi_model_chat_enabled?: boolean;
search_ui_enabled?: boolean;
// Image processing settings

View File

@@ -9,7 +9,6 @@ import { useField, useFormikContext } from "formik";
import { Section } from "@/layouts/general-layouts";
import { Content } from "@opal/layouts";
import Label from "@/refresh-components/form/Label";
import type { TagProps } from "@opal/components/tag/components";
interface OrientationLayoutProps {
name?: string;
@@ -17,8 +16,6 @@ interface OrientationLayoutProps {
nonInteractive?: boolean;
children?: React.ReactNode;
title: string | RichStr;
/** Tag rendered inline beside the title (passed through to Content). */
tag?: TagProps;
description?: string | RichStr;
suffix?: "optional" | (string & {});
sizePreset?: "main-content" | "main-ui";
@@ -131,7 +128,6 @@ function HorizontalInputLayout({
children,
center,
title,
tag,
description,
suffix,
sizePreset = "main-content",
@@ -148,7 +144,6 @@ function HorizontalInputLayout({
title={title}
description={description}
suffix={suffix}
tag={tag}
sizePreset={sizePreset}
variant="section"
widthVariant="full"

View File

@@ -6,7 +6,6 @@ import { LlmManager } from "@/lib/hooks";
import { getModelIcon } from "@/lib/llmConfig";
import { Button, SelectButton, OpenButton } from "@opal/components";
import { SvgPlusCircle, SvgX } from "@opal/icons";
import { useSettingsContext } from "@/providers/SettingsProvider";
import { LLMOption } from "@/refresh-components/popovers/interfaces";
import ModelListContent from "@/refresh-components/popovers/ModelListContent";
import Separator from "@/refresh-components/Separator";
@@ -45,12 +44,8 @@ export default function ModelSelector({
// Virtual anchor ref — points to the clicked pill so the popover positions above it
const anchorRef = useRef<HTMLElement | null>(null);
const settings = useSettingsContext();
const multiModelAllowed =
settings?.settings?.multi_model_chat_enabled ?? true;
const isMultiModel = selectedModels.length > 1;
const atMax = selectedModels.length >= MAX_MODELS || !multiModelAllowed;
const atMax = selectedModels.length >= MAX_MODELS;
const selectedKeys = useMemo(
() => new Set(selectedModels.map((m) => modelKey(m.provider, m.modelName))),

View File

@@ -595,46 +595,25 @@ function ChatPreferencesForm() {
variant="section"
/>
<Card>
<SimpleTooltip
tooltip={
uniqueSources.length === 0
? "Set up connectors to use Search Mode"
: undefined
}
side="top"
<Disabled
disabled={uniqueSources.length === 0}
tooltip="Set up connectors to use Search Mode"
>
<Disabled disabled={uniqueSources.length === 0} allowClick>
<div className="w-full">
<InputLayouts.Horizontal
title="Search Mode"
description="UI mode for quick document search across your organization."
disabled={uniqueSources.length === 0}
nonInteractive
>
<Switch
checked={s.search_ui_enabled ?? false}
onCheckedChange={(checked) => {
void saveSettings({ search_ui_enabled: checked });
}}
disabled={uniqueSources.length === 0}
/>
</InputLayouts.Horizontal>
</div>
</Disabled>
</SimpleTooltip>
<InputLayouts.Horizontal
title="Multi-Model Generation"
tag={{ title: "beta", color: "blue" }}
description="Allow multiple models to generate responses in parallel in chat."
nonInteractive
>
<Switch
checked={s.multi_model_chat_enabled ?? true}
onCheckedChange={(checked) => {
void saveSettings({ multi_model_chat_enabled: checked });
}}
/>
</InputLayouts.Horizontal>
<InputLayouts.Horizontal
title="Search Mode"
description="UI mode for quick document search across your organization."
disabled={uniqueSources.length === 0}
nonInteractive
>
<Switch
checked={s.search_ui_enabled ?? false}
onCheckedChange={(checked) => {
void saveSettings({ search_ui_enabled: checked });
}}
disabled={uniqueSources.length === 0}
/>
</InputLayouts.Horizontal>
</Disabled>
<InputLayouts.Horizontal
title="Deep Research"
description="Agentic research system that works across the web and connected sources. Uses significantly more tokens per query."

View File

@@ -331,13 +331,10 @@ const ChatUI = React.memo(
return null;
})}
{/* Error banner when last message is user message or error type.
Skip for multi-model per-panel errors — those are shown in
their own panel, not as a global banner. */}
{/* Error banner when last message is user message or error type */}
{(((error !== null || loadError !== null) &&
messages[messages.length - 1]?.type === "user") ||
(messages[messages.length - 1]?.type === "error" &&
!messages[messages.length - 1]?.modelDisplayName)) && (
messages[messages.length - 1]?.type === "error") && (
<div className={`p-4 w-full ${MSG_MAX_W} self-center`}>
<ErrorBanner
resubmit={onResubmit}