Compare commits

..

3 Commits

Author SHA1 Message Date
Evan Lohn
e0baaf85e5 fix: Anthropic litellm thinking workaround (#9713) 2026-03-27 14:12:15 -07:00
github-actions[bot]
a0ffd47e2c chore(playwright): deflake settings_pages.spec.ts (#9684) to release v3.1 (#9702)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-03-27 09:08:51 -07:00
Jamison Lahman
d0396a1337 fix(fe): Popover content doesnt overflow on small screens (#9612) to release v3.1 (#9700) 2026-03-27 08:43:53 -07:00
55 changed files with 455 additions and 2221 deletions

View File

@@ -24,16 +24,6 @@ When hardcoding a boolean variable to a constant value, remove the variable enti
Code changes must consider both multi-tenant and single-tenant deployments. In multi-tenant mode, preserve tenant isolation, ensure tenant context is propagated correctly, and avoid assumptions that only hold for a single shared schema or globally shared state. In single-tenant mode, avoid introducing unnecessary tenant-specific requirements or cloud-only control-plane dependencies.
## Nginx Routing — New Backend Routes
Whenever a new backend route is added that does NOT start with `/api`, it must also be explicitly added to ALL nginx configs:
- `deployment/helm/charts/onyx/templates/nginx-conf.yaml` (Helm/k8s)
- `deployment/data/nginx/app.conf.template` (docker-compose dev)
- `deployment/data/nginx/app.conf.template.prod` (docker-compose prod)
- `deployment/data/nginx/app.conf.template.no-letsencrypt` (docker-compose no-letsencrypt)
Routes not starting with `/api` are not caught by the existing `^/(api|openapi\.json)` location block and will fall through to `location /`, which proxies to the Next.js web server and returns an HTML 404. The new location block must be placed before the `/api` block. Examples of routes that need this treatment: `/scim`, `/mcp`.
## Full vs Lite Deployments
Code changes must consider both regular Onyx deployments and Onyx lite deployments. Lite deployments disable the vector DB, Redis, model servers, and background workers by default, use PostgreSQL-backed cache/auth/file storage, and rely on the API server to handle background work. Do not assume those services are available unless the code path is explicitly limited to full deployments.

View File

@@ -473,8 +473,6 @@ def connector_permission_sync_generator_task(
cc_pair = get_connector_credential_pair_from_id(
db_session=db_session,
cc_pair_id=cc_pair_id,
eager_load_connector=True,
eager_load_credential=True,
)
if cc_pair is None:
raise ValueError(

View File

@@ -8,7 +8,6 @@ from ee.onyx.external_permissions.slack.utils import fetch_user_id_to_email_map
from onyx.access.models import DocExternalAccess
from onyx.access.models import ExternalAccess
from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.models import HierarchyNode
from onyx.connectors.slack.connector import get_channels
from onyx.connectors.slack.connector import make_paginated_slack_api_call
@@ -106,11 +105,9 @@ def _get_slack_document_access(
slack_connector: SlackConnector,
channel_permissions: dict[str, ExternalAccess], # noqa: ARG001
callback: IndexingHeartbeatInterface | None,
indexing_start: SecondsSinceUnixEpoch | None = None,
) -> Generator[DocExternalAccess, None, None]:
slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
callback=callback,
start=indexing_start,
callback=callback
)
for doc_metadata_batch in slim_doc_generator:
@@ -183,15 +180,9 @@ def slack_doc_sync(
slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
slack_connector.set_credentials_provider(provider)
indexing_start_ts: SecondsSinceUnixEpoch | None = (
cc_pair.connector.indexing_start.timestamp()
if cc_pair.connector.indexing_start is not None
else None
)
yield from _get_slack_document_access(
slack_connector=slack_connector,
slack_connector,
channel_permissions=channel_permissions,
callback=callback,
indexing_start=indexing_start_ts,
)

View File

@@ -6,7 +6,6 @@ from onyx.access.models import ElementExternalAccess
from onyx.access.models import ExternalAccess
from onyx.access.models import NodeExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import HierarchyNode
from onyx.db.models import ConnectorCredentialPair
@@ -41,19 +40,10 @@ def generic_doc_sync(
logger.info(f"Starting {doc_source} doc sync for CC Pair ID: {cc_pair.id}")
indexing_start: SecondsSinceUnixEpoch | None = (
cc_pair.connector.indexing_start.timestamp()
if cc_pair.connector.indexing_start is not None
else None
)
newly_fetched_doc_ids: set[str] = set()
logger.info(f"Fetching all slim documents from {doc_source}")
for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(
start=indexing_start,
callback=callback,
):
for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(callback=callback):
logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")
if callback:

View File

@@ -44,31 +44,6 @@ SEND_USER_METADATA_TO_LLM_PROVIDER = (
# User Facing Features Configs
#####
BLURB_SIZE = 128 # Number Encoder Tokens included in the chunk blurb
# Hard ceiling for the admin-configurable file upload size (in MB).
# Self-hosted customers can raise or lower this via the environment variable.
_raw_max_upload_size_mb = int(os.environ.get("MAX_ALLOWED_UPLOAD_SIZE_MB", "250"))
if _raw_max_upload_size_mb < 0:
logger.warning(
"MAX_ALLOWED_UPLOAD_SIZE_MB=%d is negative; falling back to 250",
_raw_max_upload_size_mb,
)
_raw_max_upload_size_mb = 250
MAX_ALLOWED_UPLOAD_SIZE_MB = _raw_max_upload_size_mb
# Default fallback for the per-user file upload size limit (in MB) when no
# admin-configured value exists. Clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at
# runtime so this never silently exceeds the hard ceiling.
_raw_default_upload_size_mb = int(
os.environ.get("DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", "100")
)
if _raw_default_upload_size_mb < 0:
logger.warning(
"DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=%d is negative; falling back to 100",
_raw_default_upload_size_mb,
)
_raw_default_upload_size_mb = 100
DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB = _raw_default_upload_size_mb
GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400
) # 1 day
@@ -86,6 +61,17 @@ CACHE_BACKEND = CacheBackendType(
os.environ.get("CACHE_BACKEND", CacheBackendType.REDIS)
)
# Maximum token count for a single uploaded file. Files exceeding this are rejected.
# Defaults to 100k tokens (or 10M when vector DB is disabled).
_DEFAULT_FILE_TOKEN_LIMIT = 10_000_000 if DISABLE_VECTOR_DB else 100_000
FILE_TOKEN_COUNT_THRESHOLD = int(
os.environ.get("FILE_TOKEN_COUNT_THRESHOLD", str(_DEFAULT_FILE_TOKEN_LIMIT))
)
# Maximum upload size for a single user file (chat/projects) in MB.
USER_FILE_MAX_UPLOAD_SIZE_MB = int(os.environ.get("USER_FILE_MAX_UPLOAD_SIZE_MB") or 50)
USER_FILE_MAX_UPLOAD_SIZE_BYTES = USER_FILE_MAX_UPLOAD_SIZE_MB * 1024 * 1024
# If set to true, will show extra/uncommon connectors in the "Other" category
SHOW_EXTRA_CONNECTORS = os.environ.get("SHOW_EXTRA_CONNECTORS", "").lower() == "true"

View File

@@ -890,8 +890,8 @@ class ConfluenceConnector(
def _retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None,
include_permissions: bool = True,
) -> GenerateSlimDocumentOutput:
@@ -915,8 +915,8 @@ class ConfluenceConnector(
self.confluence_client, doc_id, restrictions, ancestors
) or space_level_access_info.get(page_space_key)
# Query pages (with optional time filtering for indexing_start)
page_query = self._construct_page_cql_query(start, end)
# Query pages
page_query = self.base_cql_page_query + self.cql_label_filter
for page in self.confluence_client.cql_paginate_all_expansions(
cql=page_query,
expand=restrictions_expand,
@@ -950,9 +950,7 @@ class ConfluenceConnector(
# Query attachments for each page
page_hierarchy_node_yielded = False
attachment_query = self._construct_attachment_query(
_get_page_id(page), start, end
)
attachment_query = self._construct_attachment_query(_get_page_id(page))
for attachment in self.confluence_client.cql_paginate_all_expansions(
cql=attachment_query,
expand=restrictions_expand,

View File

@@ -1765,11 +1765,7 @@ class SharepointConnector(
checkpoint.current_drive_delta_next_link = None
checkpoint.seen_document_ids.clear()
def _fetch_slim_documents_from_sharepoint(
self,
start: datetime | None = None,
end: datetime | None = None,
) -> GenerateSlimDocumentOutput:
def _fetch_slim_documents_from_sharepoint(self) -> GenerateSlimDocumentOutput:
site_descriptors = self._filter_excluded_sites(
self.site_descriptors or self.fetch_sites()
)
@@ -1790,9 +1786,7 @@ class SharepointConnector(
# Process site documents if flag is True
if self.include_site_documents:
for driveitem, drive_name, drive_web_url in self._fetch_driveitems(
site_descriptor=site_descriptor,
start=start,
end=end,
site_descriptor=site_descriptor
):
if self._is_driveitem_excluded(driveitem):
logger.debug(f"Excluding by path denylist: {driveitem.web_url}")
@@ -1847,9 +1841,7 @@ class SharepointConnector(
# Process site pages if flag is True
if self.include_site_pages:
site_pages = self._fetch_site_pages(
site_descriptor, start=start, end=end
)
site_pages = self._fetch_site_pages(site_descriptor)
for site_page in site_pages:
logger.debug(
f"Processing site page: {site_page.get('webUrl', site_page.get('name', 'Unknown'))}"
@@ -2573,22 +2565,12 @@ class SharepointConnector(
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None, # noqa: ARG002
) -> GenerateSlimDocumentOutput:
start_dt = (
datetime.fromtimestamp(start, tz=timezone.utc)
if start is not None
else None
)
end_dt = (
datetime.fromtimestamp(end, tz=timezone.utc) if end is not None else None
)
yield from self._fetch_slim_documents_from_sharepoint(
start=start_dt,
end=end_dt,
)
yield from self._fetch_slim_documents_from_sharepoint()
if __name__ == "__main__":

View File

@@ -516,8 +516,6 @@ def _get_all_doc_ids(
] = default_msg_filter,
callback: IndexingHeartbeatInterface | None = None,
workspace_url: str | None = None,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> GenerateSlimDocumentOutput:
"""
Get all document ids in the workspace, channel by channel
@@ -548,8 +546,6 @@ def _get_all_doc_ids(
client=client,
channel=channel,
callback=callback,
oldest=str(start) if start else None, # 0.0 -> None intentionally
latest=str(end) if end is not None else None,
)
for message_batch in channel_message_batches:
@@ -851,8 +847,8 @@ class SlackConnector(
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
start: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
end: SecondsSinceUnixEpoch | None = None, # noqa: ARG002
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
if self.client is None:
@@ -865,8 +861,6 @@ class SlackConnector(
msg_filter_func=self.msg_filter_func,
callback=callback,
workspace_url=self._workspace_url,
start=start,
end=end,
)
def _load_from_checkpoint(

View File

@@ -7,7 +7,6 @@ from fastapi import HTTPException
from fastapi import UploadFile
from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from sqlalchemy import func
from sqlalchemy.orm import Session
from starlette.background import BackgroundTasks
@@ -35,19 +34,9 @@ class CategorizedFilesResult(BaseModel):
user_files: list[UserFile]
rejected_files: list[RejectedFile]
id_to_temp_id: dict[str, str]
# Filenames that should be stored but not indexed.
skip_indexing_filenames: set[str] = Field(default_factory=set)
# Allow SQLAlchemy ORM models inside this result container
model_config = ConfigDict(arbitrary_types_allowed=True)
@property
def indexable_files(self) -> list[UserFile]:
return [
uf
for uf in self.user_files
if (uf.name or "") not in self.skip_indexing_filenames
]
def build_hashed_file_key(file: UploadFile) -> str:
name_prefix = (file.filename or "")[:50]
@@ -109,7 +98,6 @@ def create_user_files(
user_files=user_files,
rejected_files=rejected_files,
id_to_temp_id=id_to_temp_id,
skip_indexing_filenames=categorized_files.skip_indexing,
)
@@ -135,7 +123,6 @@ def upload_files_to_user_files_with_indexing(
user_files = categorized_files_result.user_files
rejected_files = categorized_files_result.rejected_files
id_to_temp_id = categorized_files_result.id_to_temp_id
indexable_files = categorized_files_result.indexable_files
# Trigger per-file processing immediately for the current tenant
tenant_id = get_current_tenant_id()
for rejected_file in rejected_files:
@@ -147,12 +134,12 @@ def upload_files_to_user_files_with_indexing(
from onyx.background.task_utils import drain_processing_loop
background_tasks.add_task(drain_processing_loop, tenant_id)
for user_file in indexable_files:
for user_file in user_files:
logger.info(f"Queued in-process processing for user_file_id={user_file.id}")
else:
from onyx.background.celery.versioned_apps.client import app as client_app
for user_file in indexable_files:
for user_file in user_files:
task = client_app.send_task(
OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
@@ -168,7 +155,6 @@ def upload_files_to_user_files_with_indexing(
user_files=user_files,
rejected_files=rejected_files,
id_to_temp_id=id_to_temp_id,
skip_indexing_filenames=categorized_files_result.skip_indexing_filenames,
)

View File

@@ -15,7 +15,6 @@ PLAIN_TEXT_MIME_TYPE = "text/plain"
class OnyxMimeTypes:
IMAGE_MIME_TYPES = {"image/jpg", "image/jpeg", "image/png", "image/webp"}
CSV_MIME_TYPES = {"text/csv"}
TABULAR_MIME_TYPES = CSV_MIME_TYPES | {SPREADSHEET_MIME_TYPE}
TEXT_MIME_TYPES = {
PLAIN_TEXT_MIME_TYPE,
"text/markdown",
@@ -35,12 +34,13 @@ class OnyxMimeTypes:
PDF_MIME_TYPE,
WORD_PROCESSING_MIME_TYPE,
PRESENTATION_MIME_TYPE,
SPREADSHEET_MIME_TYPE,
"message/rfc822",
"application/epub+zip",
}
ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES.union(
TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, TABULAR_MIME_TYPES
TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, CSV_MIME_TYPES
)
EXCLUDED_IMAGE_TYPES = {

View File

@@ -13,14 +13,13 @@ class ChatFileType(str, Enum):
DOC = "document"
# Plain text only contain the text
PLAIN_TEXT = "plain_text"
# Tabular data files (CSV, TSV, XLSX) — metadata-only injection
TABULAR = "tabular"
CSV = "csv"
def is_text_file(self) -> bool:
return self in (
ChatFileType.PLAIN_TEXT,
ChatFileType.DOC,
ChatFileType.TABULAR,
ChatFileType.CSV,
)

View File

@@ -29,7 +29,6 @@ from onyx.indexing.models import DocMetadataAwareIndexChunk
from onyx.indexing.models import IndexChunk
from onyx.indexing.models import UpdatableChunkData
from onyx.llm.factory import get_default_llm
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.utils.logger import setup_logger
@@ -174,10 +173,8 @@ class UserFileIndexingAdapter:
[chunk.content for chunk in user_file_chunks]
)
user_file_id_to_raw_text[str(user_file_id)] = combined_content
token_count: int = (
count_tokens(combined_content, llm_tokenizer)
if llm_tokenizer
else 0
token_count = (
len(llm_tokenizer.encode(combined_content)) if llm_tokenizer else 0
)
user_file_id_to_token_count[str(user_file_id)] = token_count
else:

View File

@@ -185,6 +185,21 @@ def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
return False
def _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:
"""Check if the prompt contains any assistant messages with tool_calls.
When Anthropic's extended thinking is enabled, the API requires every
assistant message to start with a thinking block before any tool_use
blocks. Since we don't preserve thinking_blocks (they carry
cryptographic signatures that can't be reconstructed), we must skip
the thinking param whenever history contains prior tool-calling turns.
"""
from onyx.llm.models import AssistantMessage
msgs = prompt if isinstance(prompt, list) else [prompt]
return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)
def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
normalized_model_name = model_name.lower()
return any(
@@ -466,7 +481,20 @@ class LitellmLLM(LLM):
reasoning_effort
)
if budget_tokens is not None:
# Anthropic requires every assistant message with tool_use
# blocks to start with a thinking block that carries a
# cryptographic signature. We don't preserve those blocks
# across turns, so skip thinking when the history already
# contains tool-calling assistant messages. LiteLLM's
# modify_params workaround doesn't cover all providers
# (notably Bedrock).
can_enable_thinking = (
budget_tokens is not None
and not _prompt_contains_tool_call_history(prompt)
)
if can_enable_thinking:
assert budget_tokens is not None # mypy
if max_tokens is not None:
# Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
# and the minimum budget tokens is 1024

View File

@@ -175,32 +175,6 @@ def get_tokenizer(
return _check_tokenizer_cache(provider_type, model_name)
# Max characters per encode() call.
_ENCODE_CHUNK_SIZE = 500_000
def count_tokens(
text: str,
tokenizer: BaseTokenizer,
token_limit: int | None = None,
) -> int:
"""Count tokens, chunking the input to avoid tiktoken stack overflow.
If token_limit is provided and the text is large enough to require
multiple chunks (> 500k chars), stops early once the count exceeds it.
When early-exiting, the returned value exceeds token_limit but may be
less than the true full token count.
"""
if len(text) <= _ENCODE_CHUNK_SIZE:
return len(tokenizer.encode(text))
total = 0
for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
total += len(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
if token_limit is not None and total > token_limit:
return total # Already over — skip remaining chunks
return total
def tokenizer_trim_content(
content: str, desired_length: int, tokenizer: BaseTokenizer
) -> str:

View File

@@ -9,15 +9,20 @@ from pydantic import ConfigDict
from pydantic import Field
from sqlalchemy.orm import Session
from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_BYTES
from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.db.llm import fetch_default_llm_model
from onyx.file_processing.extract_file_text import extract_file_text
from onyx.file_processing.extract_file_text import get_file_ext
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.file_processing.password_validation import is_file_password_protected
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.settings.store import load_settings
from onyx.utils.logger import setup_logger
from shared_configs.configs import MULTI_TENANT
from shared_configs.configs import SKIP_USERFILE_THRESHOLD
from shared_configs.configs import SKIP_USERFILE_THRESHOLD_TENANT_LIST
from shared_configs.contextvars import get_current_tenant_id
logger = setup_logger()
@@ -76,26 +81,11 @@ class CategorizedFiles(BaseModel):
acceptable: list[UploadFile] = Field(default_factory=list)
rejected: list[RejectedFile] = Field(default_factory=list)
acceptable_file_to_token_count: dict[str, int] = Field(default_factory=dict)
# Filenames within `acceptable` that should be stored but not indexed.
skip_indexing: set[str] = Field(default_factory=set)
# Allow FastAPI UploadFile instances
model_config = ConfigDict(arbitrary_types_allowed=True)
# Extensions that bypass the token-count threshold on upload.
_TOKEN_THRESHOLD_EXEMPT_EXTENSIONS: set[str] = {
".csv",
".tsv",
".xlsx",
}
def _skip_token_threshold(extension: str) -> bool:
"""Return True if this file extension should bypass the token limit."""
return extension.lower() in _TOKEN_THRESHOLD_EXEMPT_EXTENSIONS
def _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:
if max(width, height) <= cap:
return width, height
@@ -171,8 +161,8 @@ def categorize_uploaded_files(
document formats (.pdf, .docx, …) and falls back to a text-detection
heuristic for unknown extensions (.py, .js, .rs, …).
- Uses default tokenizer to compute token length.
- If token length exceeds the admin-configured threshold, reject file.
- If extension unsupported or text cannot be extracted, reject file.
- If token length > threshold, reject file (unless threshold skip is enabled).
- If text cannot be extracted, reject file.
- Otherwise marked as acceptable.
"""
@@ -183,33 +173,36 @@ def categorize_uploaded_files(
provider_type = default_model.llm_provider.provider if default_model else None
tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)
# Derive limits from admin-configurable settings.
# For upload size: load_settings() resolves 0/None to a positive default.
# For token threshold: 0 means "no limit" (converted to None below).
settings = load_settings()
max_upload_size_mb = (
settings.user_file_max_upload_size_mb
) # always positive after load_settings()
max_upload_size_bytes = (
max_upload_size_mb * 1024 * 1024 if max_upload_size_mb else None
)
token_threshold_k = settings.file_token_count_threshold_k
token_threshold = (
token_threshold_k * 1000 if token_threshold_k else None
) # 0 → None = no limit
# Check if threshold checks should be skipped
skip_threshold = False
# Check global skip flag (works for both single-tenant and multi-tenant)
if SKIP_USERFILE_THRESHOLD:
skip_threshold = True
logger.info("Skipping userfile threshold check (global setting)")
# Check tenant-specific skip list (only applicable in multi-tenant)
elif MULTI_TENANT and SKIP_USERFILE_THRESHOLD_TENANT_LIST:
try:
current_tenant_id = get_current_tenant_id()
skip_threshold = current_tenant_id in SKIP_USERFILE_THRESHOLD_TENANT_LIST
if skip_threshold:
logger.info(
f"Skipping userfile threshold check for tenant: {current_tenant_id}"
)
except RuntimeError as e:
logger.warning(f"Failed to get current tenant ID: {str(e)}")
for upload in files:
try:
filename = get_safe_filename(upload)
# Size limit is a hard safety cap.
if max_upload_size_bytes is not None and is_upload_too_large(
upload, max_upload_size_bytes
):
# Size limit is a hard safety cap and is enforced even when token
# threshold checks are skipped via SKIP_USERFILE_THRESHOLD settings.
if is_upload_too_large(upload, USER_FILE_MAX_UPLOAD_SIZE_BYTES):
results.rejected.append(
RejectedFile(
filename=filename,
reason=f"Exceeds {max_upload_size_mb} MB file size limit",
reason=f"Exceeds {USER_FILE_MAX_UPLOAD_SIZE_MB} MB file size limit",
)
)
continue
@@ -231,11 +224,11 @@ def categorize_uploaded_files(
)
continue
if token_threshold is not None and token_count > token_threshold:
if not skip_threshold and token_count > FILE_TOKEN_COUNT_THRESHOLD:
results.rejected.append(
RejectedFile(
filename=filename,
reason=f"Exceeds {token_threshold_k}K token limit",
reason=f"Exceeds {FILE_TOKEN_COUNT_THRESHOLD} token limit",
)
)
else:
@@ -276,24 +269,12 @@ def categorize_uploaded_files(
)
continue
token_count = count_tokens(
text_content, tokenizer, token_limit=token_threshold
)
exceeds_threshold = (
token_threshold is not None and token_count > token_threshold
)
if exceeds_threshold and _skip_token_threshold(extension):
# Exempt extensions (e.g. spreadsheets) are accepted
# but flagged to skip indexing — only metadata is
# injected into the LLM context.
results.acceptable.append(upload)
results.acceptable_file_to_token_count[filename] = token_count
results.skip_indexing.add(filename)
elif exceeds_threshold:
token_count = len(tokenizer.encode(text_content))
if not skip_threshold and token_count > FILE_TOKEN_COUNT_THRESHOLD:
results.rejected.append(
RejectedFile(
filename=filename,
reason=f"Exceeds {token_threshold_k}K token limit",
reason=f"Exceeds {FILE_TOKEN_COUNT_THRESHOLD} token limit",
)
)
else:

View File

@@ -9,8 +9,8 @@ def mime_type_to_chat_file_type(mime_type: str | None) -> ChatFileType:
if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:
return ChatFileType.IMAGE
if mime_type in OnyxMimeTypes.TABULAR_MIME_TYPES:
return ChatFileType.TABULAR
if mime_type in OnyxMimeTypes.CSV_MIME_TYPES:
return ChatFileType.CSV
if mime_type in OnyxMimeTypes.DOCUMENT_MIME_TYPES:
return ChatFileType.DOC

View File

@@ -9,9 +9,7 @@ from onyx import __version__ as onyx_version
from onyx.auth.users import current_admin_user
from onyx.auth.users import current_user
from onyx.auth.users import is_user_admin
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.constants import KV_REINDEX_KEY
from onyx.configs.constants import NotificationType
from onyx.db.engine.sql_engine import get_session
@@ -19,16 +17,10 @@ from onyx.db.models import User
from onyx.db.notification import dismiss_all_notifications
from onyx.db.notification import get_notifications
from onyx.db.notification import update_notification_last_shown
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
from onyx.hooks.utils import HOOKS_AVAILABLE
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.features.build.utils import is_onyx_craft_enabled
from onyx.server.settings.models import (
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Notification
from onyx.server.settings.models import Settings
from onyx.server.settings.models import UserSettings
@@ -49,15 +41,6 @@ basic_router = APIRouter(prefix="/settings")
def admin_put_settings(
settings: Settings, _: User = Depends(current_admin_user)
) -> None:
if (
settings.user_file_max_upload_size_mb is not None
and settings.user_file_max_upload_size_mb > 0
and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
):
raise OnyxError(
OnyxErrorCode.INVALID_INPUT,
f"File upload size limit cannot exceed {MAX_ALLOWED_UPLOAD_SIZE_MB} MB",
)
store_settings(settings)
@@ -100,16 +83,6 @@ def fetch_settings(
vector_db_enabled=not DISABLE_VECTOR_DB,
hooks_enabled=HOOKS_AVAILABLE,
version=onyx_version,
max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,
default_user_file_max_upload_size_mb=min(
DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
MAX_ALLOWED_UPLOAD_SIZE_MB,
),
default_file_token_count_threshold_k=(
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
if DISABLE_VECTOR_DB
else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
),
)

View File

@@ -2,19 +2,12 @@ from datetime import datetime
from enum import Enum
from pydantic import BaseModel
from pydantic import Field
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.constants import NotificationType
from onyx.configs.constants import QueryHistoryType
from onyx.db.models import Notification as NotificationDBModel
from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB = 200
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB = 10000
class PageType(str, Enum):
CHAT = "chat"
@@ -85,12 +78,7 @@ class Settings(BaseModel):
# User Knowledge settings
user_knowledge_enabled: bool | None = True
user_file_max_upload_size_mb: int | None = Field(
default=DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB, ge=0
)
file_token_count_threshold_k: int | None = Field(
default=None, ge=0 # thousands of tokens; None = context-aware default
)
user_file_max_upload_size_mb: int | None = None
# Connector settings
show_extra_connectors: bool | None = True
@@ -120,14 +108,3 @@ class UserSettings(Settings):
hooks_enabled: bool = False
# Application version, read from the ONYX_VERSION env var at startup.
version: str | None = None
# Hard ceiling for user_file_max_upload_size_mb, derived from env var.
max_allowed_upload_size_mb: int = MAX_ALLOWED_UPLOAD_SIZE_MB
# Factory defaults so the frontend can show a "restore default" button.
default_user_file_max_upload_size_mb: int = DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
default_file_token_count_threshold_k: int = Field(
default_factory=lambda: (
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
if DISABLE_VECTOR_DB
else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
)
)

View File

@@ -1,19 +1,13 @@
from onyx.cache.factory import get_cache_backend
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.app_configs import DISABLE_USER_KNOWLEDGE
from onyx.configs.app_configs import DISABLE_VECTOR_DB
from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
from onyx.configs.app_configs import MAX_ALLOWED_UPLOAD_SIZE_MB
from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
from onyx.configs.app_configs import SHOW_EXTRA_CONNECTORS
from onyx.configs.app_configs import USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.configs.constants import KV_SETTINGS_KEY
from onyx.configs.constants import OnyxRedisLocks
from onyx.key_value_store.factory import get_kv_store
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.settings.models import (
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Settings
from onyx.utils.logger import setup_logger
@@ -57,36 +51,9 @@ def load_settings() -> Settings:
if DISABLE_USER_KNOWLEDGE:
settings.user_knowledge_enabled = False
settings.user_file_max_upload_size_mb = USER_FILE_MAX_UPLOAD_SIZE_MB
settings.show_extra_connectors = SHOW_EXTRA_CONNECTORS
settings.opensearch_indexing_enabled = ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
# Resolve context-aware defaults for token threshold.
# None = admin hasn't set a value yet → use context-aware default.
# 0 = admin explicitly chose "no limit" → preserve as-is.
if settings.file_token_count_threshold_k is None:
settings.file_token_count_threshold_k = (
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
if DISABLE_VECTOR_DB
else DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
)
# Upload size: 0 and None are treated as "unset" (not "no limit") →
# fall back to min(configured default, hard ceiling).
if not settings.user_file_max_upload_size_mb:
settings.user_file_max_upload_size_mb = min(
DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB,
MAX_ALLOWED_UPLOAD_SIZE_MB,
)
# Clamp to env ceiling so stale KV values are capped even if the
# operator lowered MAX_ALLOWED_UPLOAD_SIZE_MB after a higher value
# was already saved (api.py only guards new writes).
if (
settings.user_file_max_upload_size_mb > 0
and settings.user_file_max_upload_size_mb > MAX_ALLOWED_UPLOAD_SIZE_MB
):
settings.user_file_max_upload_size_mb = MAX_ALLOWED_UPLOAD_SIZE_MB
return settings

View File

@@ -169,10 +169,10 @@ class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):
chat_file = self._load_file(file_id)
# Only PLAIN_TEXT and TABULAR are guaranteed to contain actual text bytes.
# Only PLAIN_TEXT and CSV are guaranteed to contain actual text bytes.
# DOC type in a loaded file means plaintext extraction failed and the
# content is the original binary (e.g. raw PDF/DOCX bytes).
if chat_file.file_type not in (ChatFileType.PLAIN_TEXT, ChatFileType.TABULAR):
if chat_file.file_type not in (ChatFileType.PLAIN_TEXT, ChatFileType.CSV):
raise ToolCallException(
message=f"File {file_id} is not a text file (type={chat_file.file_type})",
llm_facing_message=(

View File

@@ -191,6 +191,25 @@ IGNORED_SYNCING_TENANT_LIST = (
else None
)
# Global flag to skip userfile threshold for all users/tenants
SKIP_USERFILE_THRESHOLD = (
os.environ.get("SKIP_USERFILE_THRESHOLD", "").lower() == "true"
)
# Comma-separated list of specific tenant IDs to skip threshold (multi-tenant only)
SKIP_USERFILE_THRESHOLD_TENANT_IDS = os.environ.get(
"SKIP_USERFILE_THRESHOLD_TENANT_IDS"
)
SKIP_USERFILE_THRESHOLD_TENANT_LIST = (
[
tenant.strip()
for tenant in SKIP_USERFILE_THRESHOLD_TENANT_IDS.split(",")
if tenant.strip()
]
if SKIP_USERFILE_THRESHOLD_TENANT_IDS
else None
)
ENVIRONMENT = os.environ.get("ENVIRONMENT") or "not_explicitly_set"

View File

@@ -1,6 +1,4 @@
import time
from datetime import datetime
from datetime import timezone
import pytest
@@ -19,10 +17,6 @@ PRIVATE_CHANNEL_USERS = [
"test_user_2@onyx-test.com",
]
# Predates any test workspace messages, so the result set should match
# the "no start time" case while exercising the oldest= parameter.
OLDEST_TS_2016 = datetime(2016, 1, 1, tzinfo=timezone.utc).timestamp()
pytestmark = pytest.mark.usefixtures("enable_ee")
@@ -111,17 +105,15 @@ def test_load_from_checkpoint_access__private_channel(
],
indirect=True,
)
@pytest.mark.parametrize("start_ts", [None, OLDEST_TS_2016])
def test_slim_documents_access__public_channel(
slack_connector: SlackConnector,
start_ts: float | None,
) -> None:
"""Test that retrieve_all_slim_docs_perm_sync returns correct access information for slim documents."""
if not slack_connector.client:
raise RuntimeError("Web client must be defined")
slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
start=start_ts,
start=0.0,
end=time.time(),
)
@@ -157,7 +149,7 @@ def test_slim_documents_access__private_channel(
raise RuntimeError("Web client must be defined")
slim_docs_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
start=None,
start=0.0,
end=time.time(),
)

View File

@@ -1175,7 +1175,7 @@ def test_code_interpreter_receives_chat_files(
file_descriptor: FileDescriptor = {
"id": user_file.file_id,
"type": ChatFileType.TABULAR,
"type": ChatFileType.CSV,
"name": "data.csv",
"user_file_id": str(user_file.id),
}

View File

@@ -139,7 +139,7 @@ def test_csv_file_type() -> None:
result = _extract_referenced_file_descriptors([tool_call], message)
assert len(result) == 1
assert result[0]["type"] == ChatFileType.TABULAR
assert result[0]["type"] == ChatFileType.CSV
def test_unknown_extension_defaults_to_plain_text() -> None:

View File

@@ -1,5 +1,3 @@
from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
@@ -33,7 +31,6 @@ def mock_jira_cc_pair(
"jira_base_url": jira_base_url,
"project_key": project_key,
}
mock_cc_pair.connector.indexing_start = None
return mock_cc_pair
@@ -68,75 +65,3 @@ def test_jira_permission_sync(
fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
):
print(doc)
def test_jira_doc_sync_passes_indexing_start(
jira_connector: JiraConnector,
mock_jira_cc_pair: MagicMock,
mock_fetch_all_existing_docs_fn: MagicMock,
mock_fetch_all_existing_docs_ids_fn: MagicMock,
) -> None:
"""Verify that generic_doc_sync derives indexing_start from cc_pair
and forwards it to retrieve_all_slim_docs_perm_sync."""
indexing_start_dt = datetime(2025, 6, 1, tzinfo=timezone.utc)
mock_jira_cc_pair.connector.indexing_start = indexing_start_dt
with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
mock_build_client.return_value = jira_connector._jira_client
assert jira_connector._jira_client is not None
jira_connector._jira_client._options = MagicMock()
jira_connector._jira_client._options.return_value = {
"rest_api_version": JIRA_SERVER_API_VERSION
}
with patch.object(
type(jira_connector),
"retrieve_all_slim_docs_perm_sync",
return_value=iter([]),
) as mock_retrieve:
list(
jira_doc_sync(
cc_pair=mock_jira_cc_pair,
fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
)
)
mock_retrieve.assert_called_once()
call_kwargs = mock_retrieve.call_args
assert call_kwargs.kwargs["start"] == indexing_start_dt.timestamp()
def test_jira_doc_sync_passes_none_when_no_indexing_start(
jira_connector: JiraConnector,
mock_jira_cc_pair: MagicMock,
mock_fetch_all_existing_docs_fn: MagicMock,
mock_fetch_all_existing_docs_ids_fn: MagicMock,
) -> None:
"""Verify that indexing_start is None when the connector has no indexing_start set."""
mock_jira_cc_pair.connector.indexing_start = None
with patch("onyx.connectors.jira.connector.build_jira_client") as mock_build_client:
mock_build_client.return_value = jira_connector._jira_client
assert jira_connector._jira_client is not None
jira_connector._jira_client._options = MagicMock()
jira_connector._jira_client._options.return_value = {
"rest_api_version": JIRA_SERVER_API_VERSION
}
with patch.object(
type(jira_connector),
"retrieve_all_slim_docs_perm_sync",
return_value=iter([]),
) as mock_retrieve:
list(
jira_doc_sync(
cc_pair=mock_jira_cc_pair,
fetch_all_existing_docs_fn=mock_fetch_all_existing_docs_fn,
fetch_all_existing_docs_ids_fn=mock_fetch_all_existing_docs_ids_fn,
)
)
mock_retrieve.assert_called_once()
call_kwargs = mock_retrieve.call_args
assert call_kwargs.kwargs["start"] is None

View File

@@ -11,6 +11,7 @@ from litellm.types.utils import ChatCompletionDeltaToolCall
from litellm.types.utils import Delta
from litellm.types.utils import Function as LiteLLMFunction
import onyx.llm.models
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
from onyx.llm.constants import LlmProviderNames
from onyx.llm.interfaces import LLMUserIdentity
@@ -1479,6 +1480,147 @@ def test_bifrost_normalizes_api_base_in_model_kwargs() -> None:
assert llm._model_kwargs["api_base"] == "https://bifrost.example.com/v1"
def test_prompt_contains_tool_call_history_true() -> None:
from onyx.llm.multi_llm import _prompt_contains_tool_call_history
messages: LanguageModelInput = [
UserMessage(content="What's the weather?"),
AssistantMessage(
content=None,
tool_calls=[
ToolCall(
id="tc_1",
function=FunctionCall(name="get_weather", arguments="{}"),
)
],
),
]
assert _prompt_contains_tool_call_history(messages) is True
def test_prompt_contains_tool_call_history_false_no_tools() -> None:
from onyx.llm.multi_llm import _prompt_contains_tool_call_history
messages: LanguageModelInput = [
UserMessage(content="Hello"),
AssistantMessage(content="Hi there!"),
]
assert _prompt_contains_tool_call_history(messages) is False
def test_prompt_contains_tool_call_history_false_user_only() -> None:
from onyx.llm.multi_llm import _prompt_contains_tool_call_history
messages: LanguageModelInput = [UserMessage(content="Hello")]
assert _prompt_contains_tool_call_history(messages) is False
def test_bedrock_claude_drops_thinking_when_thinking_blocks_missing() -> None:
"""When thinking is enabled but assistant messages with tool_calls lack
thinking_blocks, the thinking param must be dropped to avoid the Bedrock
BadRequestError about missing thinking blocks."""
llm = LitellmLLM(
api_key=None,
timeout=30,
model_provider=LlmProviderNames.BEDROCK,
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
max_input_tokens=200000,
)
messages: LanguageModelInput = [
UserMessage(content="What's the weather?"),
AssistantMessage(
content=None,
tool_calls=[
ToolCall(
id="tc_1",
function=FunctionCall(
name="get_weather",
arguments='{"city": "Paris"}',
),
)
],
),
onyx.llm.models.ToolMessage(
content="22°C sunny",
tool_call_id="tc_1",
),
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
with (
patch("litellm.completion") as mock_completion,
patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
):
mock_completion.return_value = []
list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
kwargs = mock_completion.call_args.kwargs
assert "thinking" not in kwargs, (
"thinking param should be dropped when thinking_blocks are missing "
"from assistant messages with tool_calls"
)
def test_bedrock_claude_keeps_thinking_when_no_tool_history() -> None:
"""When thinking is enabled and there are no historical assistant messages
with tool_calls, the thinking param should be preserved."""
llm = LitellmLLM(
api_key=None,
timeout=30,
model_provider=LlmProviderNames.BEDROCK,
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
max_input_tokens=200000,
)
messages: LanguageModelInput = [
UserMessage(content="What's the weather?"),
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
with (
patch("litellm.completion") as mock_completion,
patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
):
mock_completion.return_value = []
list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
kwargs = mock_completion.call_args.kwargs
assert "thinking" in kwargs, (
"thinking param should be preserved when no assistant messages "
"with tool_calls exist in history"
)
assert kwargs["thinking"]["type"] == "enabled"
def test_bifrost_claude_includes_allowed_openai_params() -> None:
llm = LitellmLLM(
api_key="test_key",

View File

@@ -4,23 +4,13 @@ from unittest.mock import MagicMock
import pytest
from fastapi import UploadFile
from onyx.natural_language_processing import utils as nlp_utils
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import count_tokens
from onyx.server.features.projects import projects_file_utils as utils
from onyx.server.settings.models import Settings
class _Tokenizer(BaseTokenizer):
class _Tokenizer:
def encode(self, text: str) -> list[int]:
return [1] * len(text)
def tokenize(self, text: str) -> list[str]:
return list(text)
def decode(self, _tokens: list[int]) -> str:
return ""
class _NonSeekableFile(BytesIO):
def tell(self) -> int:
@@ -39,26 +29,10 @@ def _make_upload_no_size(filename: str, content: bytes) -> UploadFile:
return UploadFile(filename=filename, file=BytesIO(content), size=None)
def _make_settings(upload_size_mb: int = 1, token_threshold_k: int = 100) -> Settings:
return Settings(
user_file_max_upload_size_mb=upload_size_mb,
file_token_count_threshold_k=token_threshold_k,
)
def _patch_common_dependencies(
monkeypatch: pytest.MonkeyPatch,
upload_size_mb: int = 1,
token_threshold_k: int = 100,
) -> None:
def _patch_common_dependencies(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(utils, "fetch_default_llm_model", lambda _db: None)
monkeypatch.setattr(utils, "get_tokenizer", lambda **_kwargs: _Tokenizer())
monkeypatch.setattr(utils, "is_file_password_protected", lambda **_kwargs: False)
monkeypatch.setattr(
utils,
"load_settings",
lambda: _make_settings(upload_size_mb, token_threshold_k),
)
def test_get_upload_size_bytes_falls_back_to_stream_size() -> None:
@@ -102,8 +76,9 @@ def test_is_upload_too_large_logs_warning_when_size_unknown(
def test_categorize_uploaded_files_accepts_size_under_limit(
monkeypatch: pytest.MonkeyPatch,
) -> None:
# upload_size_mb=1 → max_bytes = 1*1024*1024; file size 99 is well under
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
upload = _make_upload("small.png", size=99)
@@ -116,7 +91,9 @@ def test_categorize_uploaded_files_accepts_size_under_limit(
def test_categorize_uploaded_files_uses_seek_fallback_when_upload_size_missing(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
upload = _make_upload_no_size("small.png", content=b"x" * 99)
@@ -129,11 +106,12 @@ def test_categorize_uploaded_files_uses_seek_fallback_when_upload_size_missing(
def test_categorize_uploaded_files_accepts_size_at_limit(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
# 1 MB = 1048576 bytes; file at exactly that boundary should be accepted
upload = _make_upload("edge.png", size=1048576)
upload = _make_upload("edge.png", size=100)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 1
@@ -143,10 +121,12 @@ def test_categorize_uploaded_files_accepts_size_at_limit(
def test_categorize_uploaded_files_rejects_size_over_limit_with_reason(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
upload = _make_upload("large.png", size=1048577) # 1 byte over 1 MB
upload = _make_upload("large.png", size=101)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 0
@@ -157,11 +137,13 @@ def test_categorize_uploaded_files_rejects_size_over_limit_with_reason(
def test_categorize_uploaded_files_mixed_batch_keeps_valid_and_rejects_oversized(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
small = _make_upload("small.png", size=50)
large = _make_upload("large.png", size=1048577)
large = _make_upload("large.png", size=101)
result = utils.categorize_uploaded_files([small, large], MagicMock())
@@ -171,12 +153,15 @@ def test_categorize_uploaded_files_mixed_batch_keeps_valid_and_rejects_oversized
assert result.rejected[0].reason == "Exceeds 1 MB file size limit"
def test_categorize_uploaded_files_enforces_size_limit_always(
def test_categorize_uploaded_files_enforces_size_limit_even_when_threshold_is_skipped(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "SKIP_USERFILE_THRESHOLD", True)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
upload = _make_upload("oversized.pdf", size=1048577)
upload = _make_upload("oversized.pdf", size=101)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 0
@@ -187,12 +172,14 @@ def test_categorize_uploaded_files_enforces_size_limit_always(
def test_categorize_uploaded_files_checks_size_before_text_extraction(
monkeypatch: pytest.MonkeyPatch,
) -> None:
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 100)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
extract_mock = MagicMock(return_value="this should not run")
monkeypatch.setattr(utils, "extract_file_text", extract_mock)
oversized_doc = _make_upload("oversized.pdf", size=1048577)
oversized_doc = _make_upload("oversized.pdf", size=101)
result = utils.categorize_uploaded_files([oversized_doc], MagicMock())
extract_mock.assert_not_called()
@@ -201,219 +188,40 @@ def test_categorize_uploaded_files_checks_size_before_text_extraction(
assert result.rejected[0].reason == "Exceeds 1 MB file size limit"
def test_categorize_enforces_size_limit_when_upload_size_mb_is_positive(
def test_categorize_uploaded_files_accepts_python_file(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A positive upload_size_mb is always enforced."""
_patch_common_dependencies(monkeypatch, upload_size_mb=1)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 10)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 10_000)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
upload = _make_upload("huge.png", size=1048577, content=b"x")
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 0
assert len(result.rejected) == 1
def test_categorize_enforces_token_limit_when_threshold_k_is_positive(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A positive token_threshold_k is always enforced."""
_patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=5)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 6000)
upload = _make_upload("big_image.png", size=100)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 0
assert len(result.rejected) == 1
def test_categorize_no_token_limit_when_threshold_k_is_zero(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""token_threshold_k=0 means no token limit; high-token files are accepted."""
_patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=0)
py_source = b'def hello():\n print("world")\n'
monkeypatch.setattr(
utils, "estimate_image_tokens_for_upload", lambda _upload: 999_999
utils, "extract_file_text", lambda **_kwargs: py_source.decode()
)
upload = _make_upload("huge_image.png", size=100)
upload = _make_upload("script.py", size=len(py_source), content=py_source)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.rejected) == 0
assert len(result.acceptable) == 1
assert result.acceptable[0].filename == "script.py"
assert len(result.rejected) == 0
def test_categorize_both_limits_enforced(
def test_categorize_uploaded_files_rejects_binary_file(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Both positive limits are enforced; file exceeding token limit is rejected."""
_patch_common_dependencies(monkeypatch, upload_size_mb=10, token_threshold_k=5)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 6000)
_patch_common_dependencies(monkeypatch)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_BYTES", 10_000)
monkeypatch.setattr(utils, "USER_FILE_MAX_UPLOAD_SIZE_MB", 1)
upload = _make_upload("over_tokens.png", size=100)
monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: "")
binary_content = bytes(range(256)) * 4
upload = _make_upload("data.bin", size=len(binary_content), content=binary_content)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 0
assert len(result.rejected) == 1
assert result.rejected[0].reason == "Exceeds 5K token limit"
def test_categorize_rejection_reason_contains_dynamic_values(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Rejection reasons reflect the admin-configured limits, not hardcoded values."""
_patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)
monkeypatch.setattr(utils, "estimate_image_tokens_for_upload", lambda _upload: 8000)
# File within size limit but over token limit
upload = _make_upload("tokens.png", size=100)
result = utils.categorize_uploaded_files([upload], MagicMock())
assert result.rejected[0].reason == "Exceeds 7K token limit"
# File over size limit
_patch_common_dependencies(monkeypatch, upload_size_mb=42, token_threshold_k=7)
oversized = _make_upload("big.png", size=42 * 1024 * 1024 + 1)
result2 = utils.categorize_uploaded_files([oversized], MagicMock())
assert result2.rejected[0].reason == "Exceeds 42 MB file size limit"
# --- count_tokens tests ---
def test_count_tokens_small_text() -> None:
"""Small text should be encoded in a single call and return correct count."""
tokenizer = _Tokenizer()
text = "hello world"
assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))
def test_count_tokens_chunked_matches_single_call() -> None:
"""Chunked encoding should produce the same result as single-call for small text."""
tokenizer = _Tokenizer()
text = "a" * 1000
assert count_tokens(text, tokenizer) == len(tokenizer.encode(text))
def test_count_tokens_large_text_is_chunked(monkeypatch: pytest.MonkeyPatch) -> None:
"""Text exceeding _ENCODE_CHUNK_SIZE should be split into multiple encode calls."""
monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
tokenizer = _Tokenizer()
text = "a" * 250
# _Tokenizer returns 1 token per char, so total should be 250
assert count_tokens(text, tokenizer) == 250
def test_count_tokens_with_token_limit_exits_early(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When token_limit is set and exceeded, count_tokens should stop early."""
monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
encode_call_count = 0
original_tokenizer = _Tokenizer()
class _CountingTokenizer(BaseTokenizer):
def encode(self, text: str) -> list[int]:
nonlocal encode_call_count
encode_call_count += 1
return original_tokenizer.encode(text)
def tokenize(self, text: str) -> list[str]:
return list(text)
def decode(self, _tokens: list[int]) -> str:
return ""
tokenizer = _CountingTokenizer()
# 500 chars → 5 chunks of 100; limit=150 → should stop after 2 chunks
text = "a" * 500
result = count_tokens(text, tokenizer, token_limit=150)
assert result == 200 # 2 chunks × 100 tokens each
assert encode_call_count == 2, "Should have stopped after 2 chunks"
def test_count_tokens_with_token_limit_not_exceeded(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When token_limit is set but not exceeded, all chunks are encoded."""
monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
tokenizer = _Tokenizer()
text = "a" * 250
result = count_tokens(text, tokenizer, token_limit=1000)
assert result == 250
def test_count_tokens_no_limit_encodes_all_chunks(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Without token_limit, all chunks are encoded regardless of count."""
monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
tokenizer = _Tokenizer()
text = "a" * 500
result = count_tokens(text, tokenizer)
assert result == 500
# --- early exit via token_limit in categorize tests ---
def test_categorize_early_exits_tokenization_for_large_text(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Large text files should be rejected via early-exit tokenization
without encoding all chunks."""
_patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
# token_threshold = 1000; _ENCODE_CHUNK_SIZE = 100 → text of 500 chars = 5 chunks
# Should stop after 2nd chunk (200 tokens > 1000? No... need 1 token per char)
# With _Tokenizer: 1 token per char. threshold=1000, chunk=100 → need 11 chunks
# Let's use a bigger text
monkeypatch.setattr(nlp_utils, "_ENCODE_CHUNK_SIZE", 100)
large_text = "x" * 5000 # 5000 tokens, threshold 1000
monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: large_text)
encode_call_count = 0
original_tokenizer = _Tokenizer()
class _CountingTokenizer(BaseTokenizer):
def encode(self, text: str) -> list[int]:
nonlocal encode_call_count
encode_call_count += 1
return original_tokenizer.encode(text)
def tokenize(self, text: str) -> list[str]:
return list(text)
def decode(self, _tokens: list[int]) -> str:
return ""
monkeypatch.setattr(utils, "get_tokenizer", lambda **_kwargs: _CountingTokenizer())
upload = _make_upload("big.txt", size=5000, content=large_text.encode())
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.rejected) == 1
assert "token limit" in result.rejected[0].reason
# 5000 chars / 100 chunk_size = 50 chunks total; should stop well before all 50
assert (
encode_call_count < 50
), f"Expected early exit but encoded {encode_call_count} chunks out of 50"
def test_categorize_text_under_token_limit_accepted(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Text files under the token threshold should be accepted with exact count."""
_patch_common_dependencies(monkeypatch, upload_size_mb=1000, token_threshold_k=1)
small_text = "x" * 500 # 500 tokens < 1000 threshold
monkeypatch.setattr(utils, "extract_file_text", lambda **_kwargs: small_text)
upload = _make_upload("ok.txt", size=500, content=small_text.encode())
result = utils.categorize_uploaded_files([upload], MagicMock())
assert len(result.acceptable) == 1
assert result.acceptable_file_to_token_count["ok.txt"] == 500
assert result.rejected[0].filename == "data.bin"
assert "Unsupported file type" in result.rejected[0].reason

View File

@@ -1,23 +1,12 @@
import pytest
from onyx.configs.app_configs import DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.server.settings import store as settings_store
from onyx.server.settings.models import (
DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB,
)
from onyx.server.settings.models import DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
from onyx.server.settings.models import Settings
class _FakeKvStore:
def __init__(self, data: dict | None = None) -> None:
self._data = data
def load(self, _key: str) -> dict:
if self._data is None:
raise KvKeyNotFoundError()
return self._data
raise KvKeyNotFoundError()
class _FakeCache:
@@ -31,140 +20,13 @@ class _FakeCache:
self._vals[key] = value.encode("utf-8")
def test_load_settings_uses_model_defaults_when_no_stored_value(
def test_load_settings_includes_user_file_max_upload_size_mb(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When no settings are stored (vector DB enabled), load_settings() should
resolve the default token threshold to 200."""
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", False)
monkeypatch.setattr(settings_store, "USER_FILE_MAX_UPLOAD_SIZE_MB", 77)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
assert (
settings.file_token_count_threshold_k
== DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_VECTOR_DB
)
def test_load_settings_uses_high_token_default_when_vector_db_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When vector DB is disabled and no settings are stored, the token
threshold should default to 10000 (10M tokens)."""
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
assert (
settings.file_token_count_threshold_k
== DEFAULT_FILE_TOKEN_COUNT_THRESHOLD_K_NO_VECTOR_DB
)
def test_load_settings_preserves_explicit_value_when_vector_db_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When vector DB is disabled but admin explicitly set a token threshold,
that value should be preserved (not overridden by the 10000 default)."""
stored = Settings(file_token_count_threshold_k=500).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)
settings = settings_store.load_settings()
assert settings.file_token_count_threshold_k == 500
def test_load_settings_preserves_zero_token_threshold(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A value of 0 means 'no limit' and should be preserved."""
stored = Settings(file_token_count_threshold_k=0).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "DISABLE_VECTOR_DB", True)
settings = settings_store.load_settings()
assert settings.file_token_count_threshold_k == 0
def test_load_settings_resolves_zero_upload_size_to_default(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A value of 0 should be treated as unset and resolved to the default."""
stored = Settings(user_file_max_upload_size_mb=0).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB
def test_load_settings_clamps_upload_size_to_env_max(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When the stored upload size exceeds MAX_ALLOWED_UPLOAD_SIZE_MB, it should
be clamped to the env-configured maximum."""
stored = Settings(user_file_max_upload_size_mb=500).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 250)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == 250
def test_load_settings_preserves_upload_size_within_max(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When the stored upload size is within MAX_ALLOWED_UPLOAD_SIZE_MB, it should
be preserved unchanged."""
stored = Settings(user_file_max_upload_size_mb=150).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 250)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == 150
def test_load_settings_zero_upload_size_resolves_to_default(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""A value of 0 should be treated as unset and resolved to the default,
clamped to MAX_ALLOWED_UPLOAD_SIZE_MB."""
stored = Settings(user_file_max_upload_size_mb=0).model_dump()
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore(stored))
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 100)
monkeypatch.setattr(settings_store, "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", 100)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == 100
def test_load_settings_default_clamped_to_max(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB exceeds MAX_ALLOWED_UPLOAD_SIZE_MB,
the effective default should be min(DEFAULT, MAX)."""
monkeypatch.setattr(settings_store, "get_kv_store", lambda: _FakeKvStore())
monkeypatch.setattr(settings_store, "get_cache_backend", lambda: _FakeCache())
monkeypatch.setattr(settings_store, "DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB", 100)
monkeypatch.setattr(settings_store, "MAX_ALLOWED_UPLOAD_SIZE_MB", 50)
settings = settings_store.load_settings()
assert settings.user_file_max_upload_size_mb == 50
assert settings.user_file_max_upload_size_mb == 77

View File

@@ -82,7 +82,7 @@ class TestChatFileConversion:
ChatLoadedFile(
file_id="file-2",
content=b"csv,data\n1,2",
file_type=ChatFileType.TABULAR,
file_type=ChatFileType.CSV,
filename="data.csv",
content_text="csv,data\n1,2",
token_count=5,

View File

@@ -39,22 +39,6 @@ server {
# Conditionally include MCP location configuration
include /etc/nginx/conf.d/mcp.conf.inc;
location ~ ^/scim(/.*)?$ {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
proxy_redirect off;
proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;
proxy_pass http://api_server;
}
# Match both /api/* and /openapi.json in a single rule
location ~ ^/(api|openapi.json)(/.*)?$ {
# Rewrite /api prefixed matched paths

View File

@@ -39,20 +39,6 @@ server {
# Conditionally include MCP location configuration
include /etc/nginx/conf.d/mcp.conf.inc;
location ~ ^/scim(/.*)?$ {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# don't trust client-supplied X-Forwarded-* headers — use nginx's own values
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
proxy_redirect off;
proxy_pass http://api_server;
}
# Match both /api/* and /openapi.json in a single rule
location ~ ^/(api|openapi.json)(/.*)?$ {
# Rewrite /api prefixed matched paths

View File

@@ -39,23 +39,6 @@ server {
# Conditionally include MCP location configuration
include /etc/nginx/conf.d/mcp.conf.inc;
location ~ ^/scim(/.*)?$ {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# don't trust client-supplied X-Forwarded-* headers — use nginx's own values
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Port $server_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
proxy_redirect off;
proxy_connect_timeout ${NGINX_PROXY_CONNECT_TIMEOUT}s;
proxy_send_timeout ${NGINX_PROXY_SEND_TIMEOUT}s;
proxy_read_timeout ${NGINX_PROXY_READ_TIMEOUT}s;
proxy_pass http://api_server;
}
# Match both /api/* and /openapi.json in a single rule
location ~ ^/(api|openapi.json)(/.*)?$ {
# Rewrite /api prefixed matched paths

View File

@@ -66,3 +66,10 @@ DB_READONLY_PASSWORD=password
# Show extra/uncommon connectors
# See https://docs.onyx.app/admins/connectors/overview for a full list of connectors
SHOW_EXTRA_CONNECTORS=False
# User File Upload Configuration
# Skip the token count threshold check (100,000 tokens) for uploaded files
# For self-hosted: set to true to skip for all users
#SKIP_USERFILE_THRESHOLD=false
# For multi-tenant: comma-separated list of tenant IDs to skip threshold
#SKIP_USERFILE_THRESHOLD_TENANT_IDS=

View File

@@ -35,10 +35,6 @@ USER_AUTH_SECRET=""
## Chat Configuration
# HARD_DELETE_CHATS=
# MAX_ALLOWED_UPLOAD_SIZE_MB=250
# Default per-user upload size limit (MB) when no admin value is set.
# Automatically clamped to MAX_ALLOWED_UPLOAD_SIZE_MB at runtime.
# DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB=100
## Base URL for redirects
# WEB_DOMAIN=
@@ -46,6 +42,13 @@ USER_AUTH_SECRET=""
## Enterprise Features, requires a paid plan and licenses
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=false
## User File Upload Configuration
# Skip the token count threshold check (100,000 tokens) for uploaded files
# For self-hosted: set to true to skip for all users
# SKIP_USERFILE_THRESHOLD=false
# For multi-tenant: comma-separated list of tenant IDs to skip threshold
# SKIP_USERFILE_THRESHOLD_TENANT_IDS=
################################################################################
## SERVICES CONFIGURATIONS

View File

@@ -5,7 +5,7 @@ home: https://www.onyx.app/
sources:
- "https://github.com/onyx-dot-app/onyx"
type: application
version: 0.4.37
version: 0.4.36
appVersion: latest
annotations:
category: Productivity

View File

@@ -63,22 +63,6 @@ data:
}
{{- end }}
location ~ ^/scim(/.*)?$ {
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_buffering off;
proxy_redirect off;
# timeout settings
proxy_connect_timeout {{ .Values.nginx.timeouts.connect }}s;
proxy_send_timeout {{ .Values.nginx.timeouts.send }}s;
proxy_read_timeout {{ .Values.nginx.timeouts.read }}s;
proxy_pass http://api_server;
}
location ~ ^/(api|openapi\.json)(/.*)?$ {
rewrite ^/api(/.*)$ $1 break;
proxy_set_header X-Real-IP $remote_addr;

View File

@@ -282,7 +282,7 @@ nginx:
# The ingress-nginx subchart doesn't auto-detect our custom ConfigMap changes.
# Workaround: Helm upgrade will restart if the following annotation value changes.
podAnnotations:
onyx.app/nginx-config-version: "3"
onyx.app/nginx-config-version: "2"
# Propagate DOMAIN into nginx so server_name continues to use the same env var
extraEnvs:
@@ -1285,5 +1285,11 @@ configMap:
DOMAIN: "localhost"
# Chat Configs
HARD_DELETE_CHATS: ""
MAX_ALLOWED_UPLOAD_SIZE_MB: ""
DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB: ""
# User File Upload Configuration
# Skip the token count threshold check (100,000 tokens) for uploaded files
# For self-hosted: set to true to skip for all users
SKIP_USERFILE_THRESHOLD: ""
# For multi-tenant: comma-separated list of tenant IDs to skip threshold
SKIP_USERFILE_THRESHOLD_TENANT_IDS: ""
# Maximum user upload file size in MB for chat/projects uploads
USER_FILE_MAX_UPLOAD_SIZE_MB: ""

View File

@@ -28,7 +28,7 @@ Some commands require external tools to be installed and configured:
- **uv** - Required for `backend` commands
- Install from [docs.astral.sh/uv](https://docs.astral.sh/uv/)
- **GitHub CLI** (`gh`) - Required for `run-ci`, `cherry-pick`, and `trace` commands
- **GitHub CLI** (`gh`) - Required for `run-ci` and `cherry-pick` commands
- Install from [cli.github.com](https://cli.github.com/)
- Authenticate with `gh auth login`
@@ -412,62 +412,6 @@ The `compare` subcommand writes a `summary.json` alongside the report with aggre
counts (changed, added, removed, unchanged). The HTML report is only generated when
visual differences are detected.
### `trace` - View Playwright Traces from CI
Download Playwright trace artifacts from a GitHub Actions run and open them
with `playwright show-trace`. Traces are only generated for failing tests
(`retain-on-failure`).
```shell
ods trace [run-id-or-url]
```
The run can be specified as a numeric run ID, a full GitHub Actions URL, or
omitted to find the latest Playwright run for the current branch.
**Flags:**
| Flag | Default | Description |
|------|---------|-------------|
| `--branch`, `-b` | | Find latest run for this branch |
| `--pr` | | Find latest run for this PR number |
| `--project`, `-p` | | Filter to a specific project (`admin`, `exclusive`, `lite`) |
| `--list`, `-l` | `false` | List available traces without opening |
| `--no-open` | `false` | Download traces but don't open them |
When multiple traces are found, an interactive picker lets you select which
traces to open. Use arrow keys or `j`/`k` to navigate, `space` to toggle,
`a` to select all, `n` to deselect all, and `enter` to open. Falls back to a
plain-text prompt when no TTY is available.
Downloaded artifacts are cached in `/tmp/ods-traces/<run-id>/` so repeated
invocations for the same run are instant.
**Examples:**
```shell
# Latest run for the current branch
ods trace
# Specific run ID
ods trace 12345678
# Full GitHub Actions URL
ods trace https://github.com/onyx-dot-app/onyx/actions/runs/12345678
# Latest run for a PR
ods trace --pr 9500
# Latest run for a specific branch
ods trace --branch main
# Only download admin project traces
ods trace --project admin
# List traces without opening
ods trace --list
```
### Testing Changes Locally (Dry Run)
Both `run-ci` and `cherry-pick` support `--dry-run` to test without making remote changes:

View File

@@ -55,7 +55,6 @@ func NewRootCommand() *cobra.Command {
cmd.AddCommand(NewWebCommand())
cmd.AddCommand(NewLatestStableTagCommand())
cmd.AddCommand(NewWhoisCommand())
cmd.AddCommand(NewTraceCommand())
return cmd
}

View File

@@ -1,556 +0,0 @@
package cmd
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/onyx-dot-app/onyx/tools/ods/internal/git"
"github.com/onyx-dot-app/onyx/tools/ods/internal/paths"
"github.com/onyx-dot-app/onyx/tools/ods/internal/tui"
)
const playwrightWorkflow = "Run Playwright Tests"
// TraceOptions holds options for the trace command
type TraceOptions struct {
Branch string
PR string
Project string
List bool
NoOpen bool
}
// traceInfo describes a single trace.zip found in the downloaded artifacts.
type traceInfo struct {
Path string // absolute path to trace.zip
Project string // project group extracted from artifact dir (e.g. "admin", "admin-shard-1")
TestDir string // test directory name (human-readable-ish)
}
// NewTraceCommand creates a new trace command
func NewTraceCommand() *cobra.Command {
opts := &TraceOptions{}
cmd := &cobra.Command{
Use: "trace [run-id-or-url]",
Short: "Download and view Playwright traces from GitHub Actions",
Long: `Download Playwright trace artifacts from a GitHub Actions run and open them
with 'playwright show-trace'.
The run can be specified as:
- A GitHub Actions run ID (numeric)
- A full GitHub Actions run URL
- Omitted, to find the latest Playwright run for the current branch
You can also look up the latest run by branch name or PR number.
Examples:
ods trace # latest run for current branch
ods trace 12345678 # specific run ID
ods trace https://github.com/onyx-dot-app/onyx/actions/runs/12345678
ods trace --pr 9500 # latest run for PR #9500
ods trace --branch main # latest run for main branch
ods trace --project admin # only download admin project traces
ods trace --list # list available traces without opening`,
Args: cobra.MaximumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
runTrace(args, opts)
},
}
cmd.Flags().StringVarP(&opts.Branch, "branch", "b", "", "Find latest run for this branch")
cmd.Flags().StringVar(&opts.PR, "pr", "", "Find latest run for this PR number")
cmd.Flags().StringVarP(&opts.Project, "project", "p", "", "Filter to a specific project (admin, exclusive, lite)")
cmd.Flags().BoolVarP(&opts.List, "list", "l", false, "List available traces without opening")
cmd.Flags().BoolVar(&opts.NoOpen, "no-open", false, "Download traces but don't open them")
return cmd
}
// ghRun represents a GitHub Actions workflow run from `gh run list`
type ghRun struct {
DatabaseID int64 `json:"databaseId"`
Status string `json:"status"`
Conclusion string `json:"conclusion"`
HeadBranch string `json:"headBranch"`
URL string `json:"url"`
}
func runTrace(args []string, opts *TraceOptions) {
git.CheckGitHubCLI()
runID, err := resolveRunID(args, opts)
if err != nil {
log.Fatalf("Failed to resolve run: %v", err)
}
log.Infof("Using run ID: %s", runID)
destDir, err := downloadTraceArtifacts(runID, opts.Project)
if err != nil {
log.Fatalf("Failed to download artifacts: %v", err)
}
traces, err := findTraceInfos(destDir, runID)
if err != nil {
log.Fatalf("Failed to find traces: %v", err)
}
if len(traces) == 0 {
log.Info("No trace files found in the downloaded artifacts.")
log.Info("Traces are only generated for failing tests (retain-on-failure).")
return
}
projects := groupByProject(traces)
if opts.List || opts.NoOpen {
printTraceList(traces, projects)
fmt.Printf("\nTraces downloaded to: %s\n", destDir)
return
}
if len(traces) == 1 {
openTraces(traces)
return
}
for {
selected := selectTraces(traces, projects)
if len(selected) == 0 {
return
}
openTraces(selected)
}
}
// resolveRunID determines the run ID from the provided arguments and options.
func resolveRunID(args []string, opts *TraceOptions) (string, error) {
if len(args) == 1 {
return parseRunIDFromArg(args[0])
}
if opts.PR != "" {
return findLatestRunForPR(opts.PR)
}
branch := opts.Branch
if branch == "" {
var err error
branch, err = git.GetCurrentBranch()
if err != nil {
return "", fmt.Errorf("failed to get current branch: %w", err)
}
if branch == "" {
return "", fmt.Errorf("detached HEAD; specify a --branch, --pr, or run ID")
}
log.Infof("Using current branch: %s", branch)
}
return findLatestRunForBranch(branch)
}
var runURLPattern = regexp.MustCompile(`/actions/runs/(\d+)`)
// parseRunIDFromArg extracts a run ID from either a numeric string or a full URL.
func parseRunIDFromArg(arg string) (string, error) {
if matched, _ := regexp.MatchString(`^\d+$`, arg); matched {
return arg, nil
}
matches := runURLPattern.FindStringSubmatch(arg)
if matches != nil {
return matches[1], nil
}
return "", fmt.Errorf("could not parse run ID from %q; expected a numeric ID or GitHub Actions URL", arg)
}
// findLatestRunForBranch finds the most recent Playwright workflow run for a branch.
func findLatestRunForBranch(branch string) (string, error) {
log.Infof("Looking up latest Playwright run for branch: %s", branch)
cmd := exec.Command("gh", "run", "list",
"--workflow", playwrightWorkflow,
"--branch", branch,
"--limit", "1",
"--json", "databaseId,status,conclusion,headBranch,url",
)
output, err := cmd.Output()
if err != nil {
return "", ghError(err, "gh run list failed")
}
var runs []ghRun
if err := json.Unmarshal(output, &runs); err != nil {
return "", fmt.Errorf("failed to parse run list: %w", err)
}
if len(runs) == 0 {
return "", fmt.Errorf("no Playwright runs found for branch %q", branch)
}
run := runs[0]
log.Infof("Found run: %s (status: %s, conclusion: %s)", run.URL, run.Status, run.Conclusion)
return fmt.Sprintf("%d", run.DatabaseID), nil
}
// findLatestRunForPR finds the most recent Playwright workflow run for a PR.
func findLatestRunForPR(prNumber string) (string, error) {
log.Infof("Looking up branch for PR #%s", prNumber)
cmd := exec.Command("gh", "pr", "view", prNumber,
"--json", "headRefName",
"--jq", ".headRefName",
)
output, err := cmd.Output()
if err != nil {
return "", ghError(err, "gh pr view failed")
}
branch := strings.TrimSpace(string(output))
if branch == "" {
return "", fmt.Errorf("could not determine branch for PR #%s", prNumber)
}
log.Infof("PR #%s is on branch: %s", prNumber, branch)
return findLatestRunForBranch(branch)
}
// downloadTraceArtifacts downloads playwright trace artifacts for a run.
// Returns the path to the download directory.
func downloadTraceArtifacts(runID string, project string) (string, error) {
cacheKey := runID
if project != "" {
cacheKey = runID + "-" + project
}
destDir := filepath.Join(os.TempDir(), "ods-traces", cacheKey)
// Reuse a previous download if traces exist
if info, err := os.Stat(destDir); err == nil && info.IsDir() {
traces, _ := findTraces(destDir)
if len(traces) > 0 {
log.Infof("Using cached download at %s", destDir)
return destDir, nil
}
_ = os.RemoveAll(destDir)
}
if err := os.MkdirAll(destDir, 0755); err != nil {
return "", fmt.Errorf("failed to create directory %s: %w", destDir, err)
}
ghArgs := []string{"run", "download", runID, "--dir", destDir}
if project != "" {
ghArgs = append(ghArgs, "--pattern", fmt.Sprintf("playwright-test-results-%s-*", project))
} else {
ghArgs = append(ghArgs, "--pattern", "playwright-test-results-*")
}
log.Infof("Downloading trace artifacts...")
log.Debugf("Running: gh %s", strings.Join(ghArgs, " "))
cmd := exec.Command("gh", ghArgs...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
_ = os.RemoveAll(destDir)
return "", fmt.Errorf("gh run download failed: %w\nMake sure the run ID is correct and the artifacts haven't expired (30 day retention)", err)
}
return destDir, nil
}
// findTraces recursively finds all trace.zip files under a directory.
func findTraces(root string) ([]string, error) {
var traces []string
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && info.Name() == "trace.zip" {
traces = append(traces, path)
}
return nil
})
return traces, err
}
// findTraceInfos walks the download directory and returns structured trace info.
// Expects: destDir/{artifact-dir}/{test-dir}/trace.zip
func findTraceInfos(destDir, runID string) ([]traceInfo, error) {
var traces []traceInfo
err := filepath.Walk(destDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() || info.Name() != "trace.zip" {
return nil
}
rel, _ := filepath.Rel(destDir, path)
parts := strings.SplitN(rel, string(filepath.Separator), 3)
artifactDir := ""
testDir := filepath.Base(filepath.Dir(path))
if len(parts) >= 2 {
artifactDir = parts[0]
testDir = parts[1]
}
traces = append(traces, traceInfo{
Path: path,
Project: extractProject(artifactDir, runID),
TestDir: testDir,
})
return nil
})
sort.Slice(traces, func(i, j int) bool {
pi, pj := projectSortKey(traces[i].Project), projectSortKey(traces[j].Project)
if pi != pj {
return pi < pj
}
return traces[i].TestDir < traces[j].TestDir
})
return traces, err
}
// extractProject derives a project group from an artifact directory name.
// e.g. "playwright-test-results-admin-12345" -> "admin"
//
// "playwright-test-results-admin-shard-1-12345" -> "admin-shard-1"
func extractProject(artifactDir, runID string) string {
name := strings.TrimPrefix(artifactDir, "playwright-test-results-")
name = strings.TrimSuffix(name, "-"+runID)
if name == "" {
return artifactDir
}
return name
}
// projectSortKey returns a sort-friendly key that orders admin < exclusive < lite.
func projectSortKey(project string) string {
switch {
case strings.HasPrefix(project, "admin"):
return "0-" + project
case strings.HasPrefix(project, "exclusive"):
return "1-" + project
case strings.HasPrefix(project, "lite"):
return "2-" + project
default:
return "3-" + project
}
}
// groupByProject returns an ordered list of unique project names found in traces.
func groupByProject(traces []traceInfo) []string {
seen := map[string]bool{}
var projects []string
for _, t := range traces {
if !seen[t.Project] {
seen[t.Project] = true
projects = append(projects, t.Project)
}
}
sort.Slice(projects, func(i, j int) bool {
return projectSortKey(projects[i]) < projectSortKey(projects[j])
})
return projects
}
// printTraceList displays traces grouped by project.
func printTraceList(traces []traceInfo, projects []string) {
fmt.Printf("\nFound %d trace(s) across %d project(s):\n", len(traces), len(projects))
idx := 1
for _, proj := range projects {
count := 0
for _, t := range traces {
if t.Project == proj {
count++
}
}
fmt.Printf("\n %s (%d):\n", proj, count)
for _, t := range traces {
if t.Project == proj {
fmt.Printf(" [%2d] %s\n", idx, t.TestDir)
idx++
}
}
}
}
// selectTraces tries the TUI picker first, falling back to a plain-text
// prompt when the terminal cannot be initialised (e.g. piped output).
func selectTraces(traces []traceInfo, projects []string) []traceInfo {
// Build picker groups in the same order as the sorted traces slice.
var groups []tui.PickerGroup
for _, proj := range projects {
var items []string
for _, t := range traces {
if t.Project == proj {
items = append(items, t.TestDir)
}
}
groups = append(groups, tui.PickerGroup{Label: proj, Items: items})
}
indices, err := tui.Pick(groups)
if err != nil {
// Terminal not available — fall back to text prompt
log.Debugf("TUI picker unavailable: %v", err)
printTraceList(traces, projects)
return promptTraceSelection(traces, projects)
}
if indices == nil {
return nil // user cancelled
}
selected := make([]traceInfo, len(indices))
for i, idx := range indices {
selected[i] = traces[idx]
}
return selected
}
// promptTraceSelection asks the user which traces to open via plain text.
// Accepts numbers (1,3,5), ranges (1-5), "all", or a project name.
func promptTraceSelection(traces []traceInfo, projects []string) []traceInfo {
fmt.Printf("\nOpen which traces? (e.g. 1,3,5 | 1-5 | all | %s): ", strings.Join(projects, " | "))
reader := bufio.NewReader(os.Stdin)
input, err := reader.ReadString('\n')
if err != nil {
log.Fatalf("Failed to read input: %v", err)
}
input = strings.TrimSpace(input)
if input == "" || strings.EqualFold(input, "all") {
return traces
}
// Check if input matches a project name
for _, proj := range projects {
if strings.EqualFold(input, proj) {
var selected []traceInfo
for _, t := range traces {
if t.Project == proj {
selected = append(selected, t)
}
}
return selected
}
}
// Parse as number/range selection
indices := parseTraceSelection(input, len(traces))
if len(indices) == 0 {
log.Warn("No valid selection; opening all traces")
return traces
}
selected := make([]traceInfo, len(indices))
for i, idx := range indices {
selected[i] = traces[idx]
}
return selected
}
// parseTraceSelection parses a comma-separated list of numbers and ranges into
// 0-based indices. Input is 1-indexed (matches display). Out-of-range values
// are silently ignored.
func parseTraceSelection(input string, max int) []int {
var result []int
seen := map[int]bool{}
for _, part := range strings.Split(input, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
if idx := strings.Index(part, "-"); idx > 0 {
lo, err1 := strconv.Atoi(strings.TrimSpace(part[:idx]))
hi, err2 := strconv.Atoi(strings.TrimSpace(part[idx+1:]))
if err1 != nil || err2 != nil {
continue
}
for i := lo; i <= hi; i++ {
zi := i - 1
if zi >= 0 && zi < max && !seen[zi] {
result = append(result, zi)
seen[zi] = true
}
}
} else {
n, err := strconv.Atoi(part)
if err != nil {
continue
}
zi := n - 1
if zi >= 0 && zi < max && !seen[zi] {
result = append(result, zi)
seen[zi] = true
}
}
}
return result
}
// openTraces opens the selected traces with playwright show-trace,
// running npx from the web/ directory to use the project's Playwright version.
func openTraces(traces []traceInfo) {
tracePaths := make([]string, len(traces))
for i, t := range traces {
tracePaths[i] = t.Path
}
args := append([]string{"playwright", "show-trace"}, tracePaths...)
log.Infof("Opening %d trace(s) with playwright show-trace...", len(traces))
cmd := exec.Command("npx", args...)
// Run from web/ to pick up the locally-installed Playwright version
if root, err := paths.GitRoot(); err == nil {
cmd.Dir = filepath.Join(root, "web")
}
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
if err := cmd.Run(); err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
// Normal exit (e.g. user closed the window) — just log and return
// so the picker loop can continue.
log.Debugf("playwright exited with code %d", exitErr.ExitCode())
return
}
log.Errorf("playwright show-trace failed: %v\nMake sure Playwright is installed (npx playwright install)", err)
}
}
// ghError wraps a gh CLI error with stderr output.
func ghError(err error, msg string) error {
if exitErr, ok := err.(*exec.ExitError); ok {
return fmt.Errorf("%s: %w: %s", msg, err, string(exitErr.Stderr))
}
return fmt.Errorf("%s: %w", msg, err)
}

View File

@@ -3,19 +3,13 @@ module github.com/onyx-dot-app/onyx/tools/ods
go 1.26.0
require (
github.com/gdamore/tcell/v2 v2.13.8
github.com/jmelahman/tag v0.5.2
github.com/sirupsen/logrus v1.9.4
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.10.2
github.com/spf13/pflag v1.0.10
)
require (
github.com/gdamore/encoding v1.0.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
golang.org/x/sys v0.42.0 // indirect
golang.org/x/term v0.41.0 // indirect
golang.org/x/text v0.35.0 // indirect
golang.org/x/sys v0.39.0 // indirect
)

View File

@@ -1,68 +1,30 @@
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gdamore/encoding v1.0.1 h1:YzKZckdBL6jVt2Gc+5p82qhrGiqMdG/eNs6Wy0u3Uhw=
github.com/gdamore/encoding v1.0.1/go.mod h1:0Z0cMFinngz9kS1QfMjCP8TY7em3bZYeeklsSDPivEo=
github.com/gdamore/tcell/v2 v2.13.8 h1:Mys/Kl5wfC/GcC5Cx4C2BIQH9dbnhnkPgS9/wF3RlfU=
github.com/gdamore/tcell/v2 v2.13.8/go.mod h1:+Wfe208WDdB7INEtCsNrAN6O2m+wsTPk1RAovjaILlo=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmelahman/tag v0.5.2 h1:g6A/aHehu5tkA31mPoDsXBNr1FigZ9A82Y8WVgb/WsM=
github.com/jmelahman/tag v0.5.2/go.mod h1:qmuqk19B1BKkpcg3kn7l/Eey+UqucLxgOWkteUGiG4Q=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,419 +0,0 @@
package tui
import (
"fmt"
"github.com/gdamore/tcell/v2"
)
// PickerGroup represents a labelled group of selectable items.
type PickerGroup struct {
Label string
Items []string
}
// entry is a single row in the picker (either a group header or an item).
type entry struct {
label string
isHeader bool
selected bool
groupIdx int
flatIdx int // index across all items (ignoring headers), -1 for headers
}
// Pick shows a full-screen grouped multi-select picker.
// All items start deselected. Returns the flat indices of selected items
// (0-based, spanning all groups in order). Returns nil if cancelled.
// Returns a non-nil error if the terminal cannot be initialised, in which
// case the caller should fall back to a simpler prompt.
func Pick(groups []PickerGroup) ([]int, error) {
screen, err := tcell.NewScreen()
if err != nil {
return nil, err
}
if err := screen.Init(); err != nil {
return nil, err
}
defer screen.Fini()
entries := buildEntries(groups)
totalItems := countItems(entries)
cursor := firstSelectableIndex(entries)
offset := 0
for {
w, h := screen.Size()
selectedCount := countSelected(entries)
drawPicker(screen, entries, groups, cursor, offset, w, h, selectedCount, totalItems)
screen.Show()
ev := screen.PollEvent()
switch ev := ev.(type) {
case *tcell.EventResize:
screen.Sync()
case *tcell.EventKey:
switch action := keyAction(ev); action {
case actionQuit:
return nil, nil
case actionConfirm:
if countSelected(entries) > 0 {
return collectSelected(entries), nil
}
case actionUp:
if cursor > 0 {
cursor--
}
case actionDown:
if cursor < len(entries)-1 {
cursor++
}
case actionTop:
cursor = 0
case actionBottom:
if len(entries) == 0 {
cursor = 0
} else {
cursor = len(entries) - 1
}
case actionPageUp:
listHeight := h - headerLines - footerLines
cursor -= listHeight
if cursor < 0 {
cursor = 0
}
case actionPageDown:
listHeight := h - headerLines - footerLines
cursor += listHeight
if cursor >= len(entries) {
cursor = len(entries) - 1
}
case actionToggle:
toggleAtCursor(entries, cursor)
case actionAll:
setAll(entries, true)
case actionNone:
setAll(entries, false)
}
// Keep the cursor visible
listHeight := h - headerLines - footerLines
if listHeight < 1 {
listHeight = 1
}
if cursor < offset {
offset = cursor
}
if cursor >= offset+listHeight {
offset = cursor - listHeight + 1
}
}
}
}
// --- actions ----------------------------------------------------------------
type action int
const (
actionNoop action = iota
actionQuit
actionConfirm
actionUp
actionDown
actionTop
actionBottom
actionPageUp
actionPageDown
actionToggle
actionAll
actionNone
)
func keyAction(ev *tcell.EventKey) action {
switch ev.Key() {
case tcell.KeyEscape, tcell.KeyCtrlC:
return actionQuit
case tcell.KeyEnter:
return actionConfirm
case tcell.KeyUp:
return actionUp
case tcell.KeyDown:
return actionDown
case tcell.KeyHome:
return actionTop
case tcell.KeyEnd:
return actionBottom
case tcell.KeyPgUp:
return actionPageUp
case tcell.KeyPgDn:
return actionPageDown
case tcell.KeyRune:
switch ev.Rune() {
case 'q':
return actionQuit
case ' ':
return actionToggle
case 'j':
return actionDown
case 'k':
return actionUp
case 'g':
return actionTop
case 'G':
return actionBottom
case 'a':
return actionAll
case 'n':
return actionNone
}
}
return actionNoop
}
// --- data helpers ------------------------------------------------------------
func buildEntries(groups []PickerGroup) []entry {
var entries []entry
flat := 0
for gi, g := range groups {
entries = append(entries, entry{
label: g.Label,
isHeader: true,
groupIdx: gi,
flatIdx: -1,
})
for _, item := range g.Items {
entries = append(entries, entry{
label: item,
isHeader: false,
selected: false,
groupIdx: gi,
flatIdx: flat,
})
flat++
}
}
return entries
}
func firstSelectableIndex(entries []entry) int {
for i, e := range entries {
if !e.isHeader {
return i
}
}
return 0
}
func countItems(entries []entry) int {
n := 0
for _, e := range entries {
if !e.isHeader {
n++
}
}
return n
}
func countSelected(entries []entry) int {
n := 0
for _, e := range entries {
if !e.isHeader && e.selected {
n++
}
}
return n
}
func collectSelected(entries []entry) []int {
var result []int
for _, e := range entries {
if !e.isHeader && e.selected {
result = append(result, e.flatIdx)
}
}
return result
}
func toggleAtCursor(entries []entry, cursor int) {
if cursor < 0 || cursor >= len(entries) {
return
}
e := entries[cursor]
if e.isHeader {
// Toggle entire group: if all selected -> deselect all, else select all
allSelected := true
for _, e2 := range entries {
if !e2.isHeader && e2.groupIdx == e.groupIdx && !e2.selected {
allSelected = false
break
}
}
for i := range entries {
if !entries[i].isHeader && entries[i].groupIdx == e.groupIdx {
entries[i].selected = !allSelected
}
}
} else {
entries[cursor].selected = !entries[cursor].selected
}
}
func setAll(entries []entry, selected bool) {
for i := range entries {
if !entries[i].isHeader {
entries[i].selected = selected
}
}
}
// --- drawing ----------------------------------------------------------------
const (
headerLines = 2 // title + blank line
footerLines = 2 // blank line + keybinds
)
var (
styleDefault = tcell.StyleDefault
styleTitle = tcell.StyleDefault.Bold(true)
styleGroup = tcell.StyleDefault.Bold(true).Foreground(tcell.ColorTeal)
styleGroupCur = tcell.StyleDefault.Bold(true).Foreground(tcell.ColorTeal).Reverse(true)
styleCheck = tcell.StyleDefault.Foreground(tcell.ColorGreen).Bold(true)
styleUncheck = tcell.StyleDefault.Dim(true)
styleItem = tcell.StyleDefault
styleItemCur = tcell.StyleDefault.Bold(true).Underline(true)
styleCheckCur = tcell.StyleDefault.Foreground(tcell.ColorGreen).Bold(true).Underline(true)
styleUncheckCur = tcell.StyleDefault.Dim(true).Underline(true)
styleFooter = tcell.StyleDefault.Dim(true)
)
func drawPicker(
screen tcell.Screen,
entries []entry,
groups []PickerGroup,
cursor, offset, w, h, selectedCount, totalItems int,
) {
screen.Clear()
// Title
title := fmt.Sprintf(" Select traces to open (%d/%d selected)", selectedCount, totalItems)
drawLine(screen, 0, 0, w, title, styleTitle)
// List area
listHeight := h - headerLines - footerLines
if listHeight < 1 {
listHeight = 1
}
for i := 0; i < listHeight; i++ {
ei := offset + i
if ei >= len(entries) {
break
}
y := headerLines + i
renderEntry(screen, entries, groups, ei, cursor, w, y)
}
// Scrollbar hint
if len(entries) > listHeight {
drawScrollbar(screen, w-1, headerLines, listHeight, offset, len(entries))
}
// Footer
footerY := h - 1
footer := " ↑/↓ move space toggle a all n none enter open q/esc quit"
drawLine(screen, 0, footerY, w, footer, styleFooter)
}
func renderEntry(screen tcell.Screen, entries []entry, groups []PickerGroup, ei, cursor, w, y int) {
e := entries[ei]
isCursor := ei == cursor
if e.isHeader {
groupSelected := 0
groupTotal := 0
for _, e2 := range entries {
if !e2.isHeader && e2.groupIdx == e.groupIdx {
groupTotal++
if e2.selected {
groupSelected++
}
}
}
label := fmt.Sprintf(" %s (%d/%d)", e.label, groupSelected, groupTotal)
style := styleGroup
if isCursor {
style = styleGroupCur
}
drawLine(screen, 0, y, w, label, style)
return
}
// Item row: " [x] label" or " > [x] label"
prefix := " "
if isCursor {
prefix = " > "
}
check := "[ ]"
cStyle := styleUncheck
iStyle := styleItem
if isCursor {
cStyle = styleUncheckCur
iStyle = styleItemCur
}
if e.selected {
check = "[x]"
cStyle = styleCheck
if isCursor {
cStyle = styleCheckCur
}
}
x := drawStr(screen, 0, y, w, prefix, iStyle)
x = drawStr(screen, x, y, w, check, cStyle)
drawStr(screen, x, y, w, " "+e.label, iStyle)
}
func drawScrollbar(screen tcell.Screen, x, top, height, offset, total int) {
if total <= height || height < 1 {
return
}
thumbSize := max(1, height*height/total)
thumbPos := top + offset*height/total
for y := top; y < top+height; y++ {
ch := '│'
style := styleDefault.Dim(true)
if y >= thumbPos && y < thumbPos+thumbSize {
ch = '┃'
style = styleDefault
}
screen.SetContent(x, y, ch, nil, style)
}
}
// drawLine fills an entire row starting at x=startX, padding to width w.
func drawLine(screen tcell.Screen, startX, y, w int, s string, style tcell.Style) {
x := drawStr(screen, startX, y, w, s, style)
// Clear the rest of the line
for ; x < w; x++ {
screen.SetContent(x, y, ' ', nil, style)
}
}
// drawStr writes a string at (x, y) up to maxX and returns the next x position.
func drawStr(screen tcell.Screen, x, y, maxX int, s string, style tcell.Style) int {
for _, ch := range s {
if x >= maxX {
break
}
screen.SetContent(x, y, ch, nil, style)
x++
}
return x
}

24
web/package-lock.json generated
View File

@@ -7901,9 +7901,7 @@
}
},
"node_modules/anymatch/node_modules/picomatch": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"version": "2.3.1",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -10703,9 +10701,7 @@
"license": "MIT"
},
"node_modules/handlebars": {
"version": "4.7.9",
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.9.tgz",
"integrity": "sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==",
"version": "4.7.8",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -12559,9 +12555,7 @@
}
},
"node_modules/jest-util/node_modules/picomatch": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"version": "2.3.1",
"dev": true,
"license": "MIT",
"engines": {
@@ -13887,9 +13881,7 @@
}
},
"node_modules/micromatch/node_modules/picomatch": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"version": "2.3.1",
"license": "MIT",
"engines": {
"node": ">=8.6"
@@ -15009,9 +15001,7 @@
"license": "ISC"
},
"node_modules/picomatch": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"version": "4.0.3",
"license": "MIT",
"engines": {
"node": ">=12"
@@ -15899,9 +15889,7 @@
}
},
"node_modules/readdirp/node_modules/picomatch": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"version": "2.3.1",
"license": "MIT",
"engines": {
"node": ">=8.6"

View File

@@ -2,7 +2,6 @@
import { useState, useMemo, useEffect } from "react";
import useSWR from "swr";
import Text from "@/refresh-components/texts/Text";
import { Select } from "@/refresh-components/cards";
import { useCreateModal } from "@/refresh-components/contexts/ModalContext";
import { toast } from "@/hooks/useToast";
@@ -24,8 +23,9 @@ import { ProviderIcon } from "@/app/admin/configuration/llm/ProviderIcon";
import Message from "@/refresh-components/messages/Message";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import InputSelect from "@/refresh-components/inputs/InputSelect";
import { Button } from "@opal/components";
import { Button, Text } from "@opal/components";
import { SvgSlash, SvgUnplug } from "@opal/icons";
import { markdown } from "@opal/utils";
const NO_DEFAULT_VALUE = "__none__";
@@ -201,10 +201,10 @@ export default function ImageGenerationContent() {
<div className="flex flex-col gap-6">
{/* Section Header */}
<div className="flex flex-col gap-0.5">
<Text mainContentEmphasis text05>
<Text font="main-content-emphasis" color="text-05">
Image Generation Model
</Text>
<Text secondaryBody text03>
<Text font="secondary-body" color="text-03">
Select a model to generate images in chat.
</Text>
</div>
@@ -223,7 +223,7 @@ export default function ImageGenerationContent() {
{/* Provider Groups */}
{IMAGE_PROVIDER_GROUPS.map((group) => (
<div key={group.name} className="flex flex-col gap-2">
<Text secondaryBody text03>
<Text font="secondary-body" color="text-03">
{group.name}
</Text>
<div className="flex flex-col gap-2">
@@ -277,12 +277,13 @@ export default function ImageGenerationContent() {
{needsReplacement ? (
hasReplacements ? (
<Section alignItems="start">
<Text as="p" text03>
<b>{disconnectProvider.title}</b> is currently the default
image generation model. Session history will be preserved.
<Text as="p" color="text-03">
{markdown(
`**${disconnectProvider.title}** is currently the default image generation model. Session history will be preserved.`
)}
</Text>
<Section alignItems="start" gap={0.25}>
<Text as="p" text04>
<Text as="p" color="text-04">
Set New Default
</Text>
<InputSelect
@@ -329,22 +330,24 @@ export default function ImageGenerationContent() {
</Section>
) : (
<>
<Text as="p" text03>
<b>{disconnectProvider.title}</b> is currently the default
image generation model.
<Text as="p" color="text-03">
{markdown(
`**${disconnectProvider.title}** is currently the default image generation model.`
)}
</Text>
<Text as="p" text03>
<Text as="p" color="text-03">
Connect another provider to continue using image generation.
</Text>
</>
)
) : (
<>
<Text as="p" text03>
<b>{disconnectProvider.title}</b> models will no longer be used
to generate images.
<Text as="p" color="text-03">
{markdown(
`**${disconnectProvider.title}** models will no longer be used to generate images.`
)}
</Text>
<Text as="p" text03>
<Text as="p" color="text-03">
Session history will be preserved.
</Text>
</>

View File

@@ -15,7 +15,7 @@ import { Callout } from "@/components/ui/callout";
import { cn } from "@/lib/utils";
import { toast } from "@/hooks/useToast";
import { SvgGlobe, SvgOnyxLogo, SvgSlash, SvgUnplug } from "@opal/icons";
import { Button as OpalButton } from "@opal/components";
import { Button } from "@opal/components";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { WebProviderSetupModal } from "@/app/admin/configuration/web-search/WebProviderSetupModal";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
@@ -151,7 +151,7 @@ function WebSearchDisconnectModal({
description="This will remove the stored credentials for this provider."
onClose={onClose}
submit={
<OpalButton
<Button
variant="danger"
onClick={onDisconnect}
disabled={
@@ -159,7 +159,7 @@ function WebSearchDisconnectModal({
}
>
Disconnect
</OpalButton>
</Button>
}
>
{needsReplacement ? (

View File

@@ -75,14 +75,14 @@ export enum ChatFileType {
IMAGE = "image",
DOCUMENT = "document",
PLAIN_TEXT = "plain_text",
TABULAR = "tabular",
CSV = "csv",
USER_KNOWLEDGE = "user_knowledge",
}
export const isTextFile = (fileType: ChatFileType) =>
[
ChatFileType.PLAIN_TEXT,
ChatFileType.TABULAR,
ChatFileType.CSV,
ChatFileType.USER_KNOWLEDGE,
ChatFileType.DOCUMENT,
].includes(fileType);

View File

@@ -42,7 +42,7 @@ export default function FileDisplay({ files }: FileDisplayProps) {
file.type === ChatFileType.DOCUMENT
);
const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE);
const csvFiles = files.filter((file) => file.type === ChatFileType.TABULAR);
const csvFiles = files.filter((file) => file.type === ChatFileType.CSV);
const presentingDocument: MinimalOnyxDocument = {
document_id: previewingFile?.id ?? "",

View File

@@ -37,7 +37,6 @@ export interface Settings {
// User Knowledge settings
user_knowledge_enabled?: boolean;
user_file_max_upload_size_mb?: number | null;
file_token_count_threshold_k?: number | null;
// Connector settings
show_extra_connectors?: boolean;
@@ -69,12 +68,6 @@ export interface Settings {
// Application version from the ONYX_VERSION env var on the server.
version?: string | null;
// Hard ceiling for user_file_max_upload_size_mb, derived from env var.
max_allowed_upload_size_mb?: number;
// Factory defaults for the restore button.
default_user_file_max_upload_size_mb?: number;
default_file_token_count_threshold_k?: number;
}
export enum NotificationType {

View File

@@ -85,6 +85,8 @@ function buildFileKey(file: File): string {
return `${file.size}|${namePrefix}`;
}
const DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB = 50;
interface ProjectsContextType {
projects: Project[];
recentFiles: ProjectFile[];
@@ -339,20 +341,21 @@ export function ProjectsProvider({ children }: ProjectsProviderProps) {
onFailure?: (failedTempIds: string[]) => void
): Promise<ProjectFile[]> => {
const rawMax = settingsContext?.settings?.user_file_max_upload_size_mb;
const maxUploadSizeMb =
rawMax && rawMax > 0 ? rawMax : DEFAULT_USER_FILE_MAX_UPLOAD_SIZE_MB;
const maxUploadSizeBytes = maxUploadSizeMb * 1024 * 1024;
const oversizedFiles =
rawMax && rawMax > 0
? files.filter((file) => file.size > rawMax * 1024 * 1024)
: [];
const validFiles =
rawMax && rawMax > 0
? files.filter((file) => file.size <= rawMax * 1024 * 1024)
: files;
const oversizedFiles = files.filter(
(file) => file.size > maxUploadSizeBytes
);
const validFiles = files.filter(
(file) => file.size <= maxUploadSizeBytes
);
if (oversizedFiles.length > 0) {
const skippedNames = oversizedFiles.map((file) => file.name).join(", ");
toast.warning(
`Skipped ${oversizedFiles.length} oversized file(s) (>${rawMax} MB): ${skippedNames}`
`Skipped ${oversizedFiles.length} oversized file(s) (>${maxUploadSizeMb} MB): ${skippedNames}`
);
}

View File

@@ -142,6 +142,7 @@ function PopoverContent({
collisionPadding={8}
className={cn(
"bg-background-neutral-00 p-1 z-popover rounded-12 border shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
"flex flex-col",
"max-h-[var(--radix-popover-content-available-height)]",
"overflow-hidden",
widthClasses[width]
@@ -226,7 +227,7 @@ export function PopoverMenu({
});
return (
<Section alignItems="stretch">
<Section alignItems="stretch" height="auto" className="flex-1 min-h-0">
<ShadowDiv
scrollContainerRef={scrollContainerRef}
className="flex flex-col gap-1 max-h-[20rem] w-full"

View File

@@ -105,7 +105,7 @@ export default function ShadowDiv({
}, [containerRef, checkScroll]);
return (
<div className="relative min-h-0">
<div className="relative min-h-0 flex flex-col">
<div
ref={containerRef}
className={cn("overflow-y-auto", className)}

View File

@@ -25,7 +25,6 @@ import {
SvgFold,
SvgExternalLink,
SvgAlertCircle,
SvgRefreshCw,
} from "@opal/icons";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
import { Content } from "@opal/layouts";
@@ -55,7 +54,6 @@ import * as ExpandableCard from "@/layouts/expandable-card-layouts";
import * as ActionsLayouts from "@/layouts/actions-layouts";
import { getActionIcon } from "@/lib/tools/mcpUtils";
import { Disabled } from "@opal/core";
import IconButton from "@/refresh-components/buttons/IconButton";
import InputTypeIn from "@/refresh-components/inputs/InputTypeIn";
import useFilter from "@/hooks/useFilter";
import { MCPServer } from "@/lib/tools/interfaces";
@@ -83,10 +81,6 @@ interface ChatPreferencesFormValues {
maximum_chat_retention_days: string;
anonymous_user_enabled: boolean;
disable_default_assistant: boolean;
// File limits
user_file_max_upload_size_mb: string;
file_token_count_threshold_k: string;
}
interface MCPServerCardTool {
@@ -191,173 +185,6 @@ function MCPServerCard({
);
}
type FileLimitFieldName =
| "user_file_max_upload_size_mb"
| "file_token_count_threshold_k";
interface NumericLimitFieldProps {
name: FileLimitFieldName;
defaultValue: string;
saveSettings: (updates: Partial<Settings>) => Promise<void>;
maxValue?: number;
allowZero?: boolean;
}
function NumericLimitField({
name,
defaultValue,
saveSettings,
maxValue,
allowZero = false,
}: NumericLimitFieldProps) {
const { values, setFieldValue } =
useFormikContext<ChatPreferencesFormValues>();
const initialValue = useRef(values[name]);
const restoringRef = useRef(false);
const value = values[name];
const parsed = parseInt(value, 10);
const isOverMax =
maxValue !== undefined && !isNaN(parsed) && parsed > maxValue;
const handleRestore = () => {
restoringRef.current = true;
initialValue.current = defaultValue;
void setFieldValue(name, defaultValue);
void saveSettings({ [name]: parseInt(defaultValue, 10) });
};
const handleBlur = () => {
// The restore button triggers a blur — skip since handleRestore already saved.
if (restoringRef.current) {
restoringRef.current = false;
return;
}
const parsed = parseInt(value, 10);
const isValid = !isNaN(parsed) && (allowZero ? parsed >= 0 : parsed > 0);
// Revert invalid input (empty, NaN, negative).
if (!isValid) {
if (allowZero) {
// Empty/invalid means "no limit" — persist 0 and clear the field.
void setFieldValue(name, "");
void saveSettings({ [name]: 0 });
initialValue.current = "";
} else {
void setFieldValue(name, initialValue.current);
}
return;
}
// Block save when the value exceeds the hard ceiling.
if (maxValue !== undefined && parsed > maxValue) {
return;
}
// For allowZero fields, 0 means "no limit" — clear the display
// so the "No limit" placeholder is visible, but still persist 0.
if (allowZero && parsed === 0) {
void setFieldValue(name, "");
if (initialValue.current !== "") {
void saveSettings({ [name]: 0 });
initialValue.current = "";
}
return;
}
const normalizedDisplay = String(parsed);
// Update the display to the canonical form (e.g. strip leading zeros).
if (value !== normalizedDisplay) {
void setFieldValue(name, normalizedDisplay);
}
// Persist only when the value actually changed.
if (normalizedDisplay !== initialValue.current) {
void saveSettings({ [name]: parsed });
initialValue.current = normalizedDisplay;
}
};
return (
<div className="group w-full">
<InputTypeInField
name={name}
inputMode="numeric"
showClearButton={false}
pattern="[0-9]*"
placeholder={allowZero ? "No limit" : `Default: ${defaultValue}`}
variant={isOverMax ? "error" : undefined}
rightSection={
(value || "") !== defaultValue ? (
<div className="opacity-0 group-hover:opacity-100 group-focus-within:opacity-100 transition-opacity">
<IconButton
icon={SvgRefreshCw}
tooltip="Restore default"
internal
type="button"
onClick={handleRestore}
/>
</div>
) : undefined
}
onBlur={handleBlur}
/>
</div>
);
}
interface FileSizeLimitFieldsProps {
saveSettings: (updates: Partial<Settings>) => Promise<void>;
defaultUploadSizeMb: string;
defaultTokenThresholdK: string;
maxAllowedUploadSizeMb?: number;
}
function FileSizeLimitFields({
saveSettings,
defaultUploadSizeMb,
defaultTokenThresholdK,
maxAllowedUploadSizeMb,
}: FileSizeLimitFieldsProps) {
return (
<div className="flex gap-4 w-full items-start">
<div className="flex-1">
<InputLayouts.Vertical
title="File Size Limit (MB)"
subDescription={
maxAllowedUploadSizeMb
? `Max: ${maxAllowedUploadSizeMb} MB`
: undefined
}
nonInteractive
>
<NumericLimitField
name="user_file_max_upload_size_mb"
defaultValue={defaultUploadSizeMb}
saveSettings={saveSettings}
maxValue={maxAllowedUploadSizeMb}
/>
</InputLayouts.Vertical>
</div>
<div className="flex-1">
<InputLayouts.Vertical
title="File Token Limit (thousand tokens)"
nonInteractive
>
<NumericLimitField
name="file_token_count_threshold_k"
defaultValue={defaultTokenThresholdK}
saveSettings={saveSettings}
allowZero
/>
</InputLayouts.Vertical>
</div>
</div>
);
}
/**
* Inner form component that uses useFormikContext to access values
* and create save handlers for settings fields.
@@ -374,7 +201,6 @@ function ChatPreferencesForm() {
// Tools availability
const { tools: availableTools } = useAvailableTools();
const vectorDbEnabled = useVectorDbEnabled();
const searchTool = availableTools.find(
(t) => t.in_code_tool_id === SEARCH_TOOL_ID
);
@@ -897,28 +723,6 @@ function ChatPreferencesForm() {
</InputLayouts.Horizontal>
</Card>
<Card>
<InputLayouts.Vertical
title="File Attachment Size Limit"
description="Files attached in chats and projects must fit within both limits to be accepted. Larger files increase latency, memory usage, and token costs."
>
<FileSizeLimitFields
saveSettings={saveSettings}
defaultUploadSizeMb={
settings?.settings.default_user_file_max_upload_size_mb?.toString() ??
"100"
}
defaultTokenThresholdK={
settings?.settings.default_file_token_count_threshold_k?.toString() ??
"200"
}
maxAllowedUploadSizeMb={
settings?.settings.max_allowed_upload_size_mb
}
/>
</InputLayouts.Vertical>
</Card>
<Card>
<InputLayouts.Horizontal
title="Allow Anonymous Users"
@@ -1058,21 +862,6 @@ export default function ChatPreferencesPage() {
anonymous_user_enabled: settings.settings.anonymous_user_enabled ?? false,
disable_default_assistant:
settings.settings.disable_default_assistant ?? false,
// File limits — for upload size: 0/null means "use default";
// for token threshold: null means "use default", 0 means "no limit".
user_file_max_upload_size_mb:
(settings.settings.user_file_max_upload_size_mb ?? 0) <= 0
? settings.settings.default_user_file_max_upload_size_mb?.toString() ??
"100"
: settings.settings.user_file_max_upload_size_mb!.toString(),
file_token_count_threshold_k:
settings.settings.file_token_count_threshold_k == null
? settings.settings.default_file_token_count_threshold_k?.toString() ??
"200"
: settings.settings.file_token_count_threshold_k === 0
? ""
: settings.settings.file_token_count_threshold_k.toString(),
};
return (

View File

@@ -7,7 +7,6 @@ import {
IconProps,
OpenAIIcon,
} from "@/components/icons/icons";
import Text from "@/refresh-components/texts/Text";
import { Select } from "@/refresh-components/cards";
import Message from "@/refresh-components/messages/Message";
import * as SettingsLayouts from "@/layouts/settings-layouts";
@@ -26,7 +25,8 @@ import { toast } from "@/hooks/useToast";
import { Callout } from "@/components/ui/callout";
import { Content } from "@opal/layouts";
import { SvgMicrophone, SvgSlash, SvgUnplug } from "@opal/icons";
import { Button as OpalButton } from "@opal/components";
import { Button, Text } from "@opal/components";
import { markdown } from "@opal/utils";
import ConfirmationModalLayout from "@/refresh-components/layouts/ConfirmationModalLayout";
import { Section } from "@/layouts/general-layouts";
import { ADMIN_ROUTES } from "@/lib/admin-routes";
@@ -205,7 +205,7 @@ function VoiceDisconnectModal({
description="Voice models"
onClose={onClose}
submit={
<OpalButton
<Button
variant="danger"
onClick={onDisconnect}
disabled={
@@ -213,19 +213,19 @@ function VoiceDisconnectModal({
}
>
Disconnect
</OpalButton>
</Button>
}
>
{needsReplacement ? (
hasReplacements ? (
<Section alignItems="start">
<Text as="p" text03>
<b>{disconnectTarget.providerLabel}</b> models will no longer be
used for speech-to-text or text-to-speech, and it will no longer
be your default. Session history will be preserved.
<Text as="p" color="text-03">
{markdown(
`**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default. Session history will be preserved.`
)}
</Text>
<Section alignItems="start" gap={0.25}>
<Text as="p" text04>
<Text as="p" color="text-04">
Set New Default
</Text>
<InputSelect
@@ -256,23 +256,24 @@ function VoiceDisconnectModal({
</Section>
) : (
<>
<Text as="p" text03>
<b>{disconnectTarget.providerLabel}</b> models will no longer be
used for speech-to-text or text-to-speech, and it will no longer
be your default.
<Text as="p" color="text-03">
{markdown(
`**${disconnectTarget.providerLabel}** models will no longer be used for speech-to-text or text-to-speech, and it will no longer be your default.`
)}
</Text>
<Text as="p" text03>
<Text as="p" color="text-03">
Connect another provider to continue using voice.
</Text>
</>
)
) : (
<>
<Text as="p" text03>
<b>{disconnectTarget.providerLabel}</b> models will no longer be
available for voice.
<Text as="p" color="text-03">
{markdown(
`**${disconnectTarget.providerLabel}** models will no longer be available for voice.`
)}
</Text>
<Text as="p" text03>
<Text as="p" color="text-03">
Session history will be preserved.
</Text>
</>
@@ -536,7 +537,7 @@ export default function VoiceConfigurationPage() {
<Callout type="danger" title="Failed to load voice settings">
{message}
{detail && (
<Text as="p" mainContentBody text03>
<Text as="p" font="main-content-body" color="text-03">
{detail}
</Text>
)}
@@ -626,7 +627,7 @@ export default function VoiceConfigurationPage() {
{TTS_PROVIDER_GROUPS.map((group) => (
<div key={group.providerType} className="flex flex-col gap-2">
<Text secondaryBody text03>
<Text font="secondary-body" color="text-03">
{group.providerLabel}
</Text>
<div className="flex flex-col gap-2">

View File

@@ -4,6 +4,14 @@ import { expectScreenshot } from "@tests/e2e/utils/visualRegression";
test.use({ storageState: "admin_auth.json" });
/** Maps each settings slug to the header title shown on that page. */
const SLUG_TO_HEADER: Record<string, string> = {
general: "Profile",
"chat-preferences": "Chats",
"accounts-access": "Accounts",
connectors: "Connectors",
};
for (const theme of THEMES) {
test.describe(`Settings pages (${theme} mode)`, () => {
test.beforeEach(async ({ page }) => {
@@ -11,21 +19,33 @@ for (const theme of THEMES) {
});
test("should screenshot each settings tab", async ({ page }) => {
await page.goto("/app/settings");
await page.waitForLoadState("networkidle");
await page.goto("/app/settings/general");
await page
.getByTestId("settings-left-tab-navigation")
.waitFor({ state: "visible" });
const nav = page.getByTestId("settings-left-tab-navigation");
const tabs = nav.locator("a");
await expect(tabs.first()).toBeVisible({ timeout: 10_000 });
const count = await tabs.count();
expect(count).toBeGreaterThan(0);
for (let i = 0; i < count; i++) {
const tab = tabs.nth(i);
const href = await tab.getAttribute("href");
const slug = href ? href.replace("/app/settings/", "") : `tab-${i}`;
await tab.click();
await page.waitForLoadState("networkidle");
const expectedHeader = SLUG_TO_HEADER[slug];
if (expectedHeader) {
await expect(
page
.locator(".opal-content-md-header")
.filter({ hasText: expectedHeader })
).toBeVisible({ timeout: 10_000 });
} else {
await page.waitForLoadState("networkidle");
}
await expectScreenshot(page, {
name: `settings-${theme}-${slug}`,