mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-03-03 14:45:46 +00:00
Compare commits
13 Commits
v3.0.0-clo
...
tokenizer
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d9ccd8bc9 | ||
|
|
24ac8b37d3 | ||
|
|
be8b108ae4 | ||
|
|
f380a75df3 | ||
|
|
21ec93663b | ||
|
|
d789c74024 | ||
|
|
fe014776f7 | ||
|
|
700ca0e0fc | ||
|
|
a84f8238ec | ||
|
|
4fc802e19d | ||
|
|
6cfd49439a | ||
|
|
71a1faa47e | ||
|
|
1a65217baf |
@@ -31,6 +31,7 @@ from ee.onyx.server.query_and_chat.query_backend import (
|
||||
from ee.onyx.server.query_and_chat.search_backend import router as search_router
|
||||
from ee.onyx.server.query_history.api import router as query_history_router
|
||||
from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
|
||||
from ee.onyx.server.scim.api import register_scim_exception_handlers
|
||||
from ee.onyx.server.scim.api import scim_router
|
||||
from ee.onyx.server.seeding import seed_db
|
||||
from ee.onyx.server.tenants.api import router as tenants_router
|
||||
@@ -167,6 +168,7 @@ def get_application() -> FastAPI:
|
||||
# they use their own SCIM bearer token auth).
|
||||
# Not behind APP_API_PREFIX because IdPs expect /scim/v2/... directly.
|
||||
application.include_router(scim_router)
|
||||
register_scim_exception_handlers(application)
|
||||
|
||||
# Ensure all routes have auth enabled or are explicitly marked as public
|
||||
check_ee_router_auth(application)
|
||||
|
||||
@@ -15,7 +15,9 @@ from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import FastAPI
|
||||
from fastapi import Query
|
||||
from fastapi import Request
|
||||
from fastapi import Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi_users.password import PasswordHelper
|
||||
@@ -24,6 +26,7 @@ from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ee.onyx.db.scim import ScimDAL
|
||||
from ee.onyx.server.scim.auth import ScimAuthError
|
||||
from ee.onyx.server.scim.auth import verify_scim_token
|
||||
from ee.onyx.server.scim.filtering import parse_scim_filter
|
||||
from ee.onyx.server.scim.models import SCIM_LIST_RESPONSE_SCHEMA
|
||||
@@ -77,6 +80,22 @@ scim_router = APIRouter(prefix="/scim/v2", tags=["SCIM"])
|
||||
_pw_helper = PasswordHelper()
|
||||
|
||||
|
||||
def register_scim_exception_handlers(app: FastAPI) -> None:
|
||||
"""Register SCIM-specific exception handlers on the FastAPI app.
|
||||
|
||||
Call this after ``app.include_router(scim_router)`` so that auth
|
||||
failures from ``verify_scim_token`` return RFC 7644 §3.12 error
|
||||
envelopes (with ``schemas`` and ``status`` fields) instead of
|
||||
FastAPI's default ``{"detail": "..."}`` format.
|
||||
"""
|
||||
|
||||
@app.exception_handler(ScimAuthError)
|
||||
async def _handle_scim_auth_error(
|
||||
_request: Request, exc: ScimAuthError
|
||||
) -> ScimJSONResponse:
|
||||
return _scim_error_response(exc.status_code, exc.detail)
|
||||
|
||||
|
||||
def _get_provider(
|
||||
_token: ScimToken = Depends(verify_scim_token),
|
||||
) -> ScimProvider:
|
||||
@@ -404,12 +423,6 @@ def create_user(
|
||||
|
||||
email = user_resource.userName.strip()
|
||||
|
||||
# externalId is how the IdP correlates this user on subsequent requests.
|
||||
# Without it, the IdP can't find the user and will try to re-create,
|
||||
# hitting a 409 conflict — so we require it up front.
|
||||
if not user_resource.externalId:
|
||||
return _scim_error_response(400, "externalId is required")
|
||||
|
||||
# Enforce seat limit
|
||||
seat_error = _check_seat_availability(dal)
|
||||
if seat_error:
|
||||
@@ -436,16 +449,19 @@ def create_user(
|
||||
dal.rollback()
|
||||
return _scim_error_response(409, f"User with email {email} already exists")
|
||||
|
||||
# Create SCIM mapping (externalId is validated above, always present)
|
||||
# Create SCIM mapping when externalId is provided — this is how the IdP
|
||||
# correlates this user on subsequent requests. Per RFC 7643, externalId
|
||||
# is optional and assigned by the provisioning client.
|
||||
external_id = user_resource.externalId
|
||||
scim_username = user_resource.userName.strip()
|
||||
fields = _fields_from_resource(user_resource)
|
||||
dal.create_user_mapping(
|
||||
external_id=external_id,
|
||||
user_id=user.id,
|
||||
scim_username=scim_username,
|
||||
fields=fields,
|
||||
)
|
||||
if external_id:
|
||||
dal.create_user_mapping(
|
||||
external_id=external_id,
|
||||
user_id=user.id,
|
||||
scim_username=scim_username,
|
||||
fields=fields,
|
||||
)
|
||||
|
||||
dal.commit()
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ import hashlib
|
||||
import secrets
|
||||
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi import Request
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -28,6 +27,21 @@ from onyx.auth.utils import get_hashed_bearer_token_from_request
|
||||
from onyx.db.engine.sql_engine import get_session
|
||||
from onyx.db.models import ScimToken
|
||||
|
||||
|
||||
class ScimAuthError(Exception):
|
||||
"""Raised when SCIM bearer token authentication fails.
|
||||
|
||||
Unlike HTTPException, this carries the status and detail so the SCIM
|
||||
exception handler can wrap them in an RFC 7644 §3.12 error envelope
|
||||
with ``schemas`` and ``status`` fields.
|
||||
"""
|
||||
|
||||
def __init__(self, status_code: int, detail: str) -> None:
|
||||
self.status_code = status_code
|
||||
self.detail = detail
|
||||
super().__init__(detail)
|
||||
|
||||
|
||||
SCIM_TOKEN_PREFIX = "onyx_scim_"
|
||||
SCIM_TOKEN_LENGTH = 48
|
||||
|
||||
@@ -82,23 +96,14 @@ def verify_scim_token(
|
||||
"""
|
||||
hashed = _get_hashed_scim_token_from_request(request)
|
||||
if not hashed:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Missing or invalid SCIM bearer token",
|
||||
)
|
||||
raise ScimAuthError(401, "Missing or invalid SCIM bearer token")
|
||||
|
||||
token = dal.get_token_by_hash(hashed)
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid SCIM bearer token",
|
||||
)
|
||||
raise ScimAuthError(401, "Invalid SCIM bearer token")
|
||||
|
||||
if not token.is_active:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="SCIM token has been revoked",
|
||||
)
|
||||
raise ScimAuthError(401, "SCIM token has been revoked")
|
||||
|
||||
return token
|
||||
|
||||
@@ -153,26 +153,28 @@ class ScimProvider(ABC):
|
||||
self,
|
||||
user: User,
|
||||
fields: ScimMappingFields,
|
||||
) -> ScimName | None:
|
||||
) -> ScimName:
|
||||
"""Build SCIM name components for the response.
|
||||
|
||||
Round-trips stored ``given_name``/``family_name`` when available (so
|
||||
the IdP gets back what it sent). Falls back to splitting
|
||||
``personal_name`` for users provisioned before we stored components.
|
||||
Always returns a ScimName — Okta's spec tests expect ``name``
|
||||
(with ``givenName``/``familyName``) on every user resource.
|
||||
Providers may override for custom behavior.
|
||||
"""
|
||||
if fields.given_name is not None or fields.family_name is not None:
|
||||
return ScimName(
|
||||
givenName=fields.given_name,
|
||||
familyName=fields.family_name,
|
||||
formatted=user.personal_name,
|
||||
givenName=fields.given_name or "",
|
||||
familyName=fields.family_name or "",
|
||||
formatted=user.personal_name or "",
|
||||
)
|
||||
if not user.personal_name:
|
||||
return None
|
||||
return ScimName(givenName="", familyName="", formatted="")
|
||||
parts = user.personal_name.split(" ", 1)
|
||||
return ScimName(
|
||||
givenName=parts[0],
|
||||
familyName=parts[1] if len(parts) > 1 else None,
|
||||
familyName=parts[1] if len(parts) > 1 else "",
|
||||
formatted=user.personal_name,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ def create_user_files(
|
||||
) -> CategorizedFilesResult:
|
||||
|
||||
# Categorize the files
|
||||
categorized_files = categorize_uploaded_files(files)
|
||||
categorized_files = categorize_uploaded_files(files, db_session)
|
||||
# NOTE: At the moment, zip metadata is not used for user files.
|
||||
# Should revisit to decide whether this should be a feature.
|
||||
upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
|
||||
|
||||
@@ -67,6 +67,18 @@ Status checked against LiteLLM v1.81.6-nightly (2026-02-02):
|
||||
STATUS: STILL NEEDED - litellm_core_utils/litellm_logging.py lines 3185-3199 set
|
||||
usage as a dict with chat completion format instead of keeping it as
|
||||
ResponseAPIUsage. Our patch creates a deep copy before modification.
|
||||
|
||||
7. Responses API metadata=None TypeError (_patch_responses_metadata_none):
|
||||
- LiteLLM's @client decorator wrapper in utils.py uses kwargs.get("metadata", {})
|
||||
to check for router calls, but when metadata is explicitly None (key exists with
|
||||
value None), the default {} is not used
|
||||
- This causes "argument of type 'NoneType' is not iterable" TypeError which swallows
|
||||
the real exception (e.g. AuthenticationError for wrong API key)
|
||||
- Surfaces as: APIConnectionError: OpenAIException - argument of type 'NoneType' is
|
||||
not iterable
|
||||
STATUS: STILL NEEDED - litellm/utils.py wrapper function (line 1721) does not guard
|
||||
against metadata being explicitly None. Triggered when Responses API bridge
|
||||
passes **litellm_params containing metadata=None.
|
||||
"""
|
||||
|
||||
import time
|
||||
@@ -725,6 +737,44 @@ def _patch_logging_assembled_streaming_response() -> None:
|
||||
LiteLLMLoggingObj._get_assembled_streaming_response = _patched_get_assembled_streaming_response # type: ignore[method-assign]
|
||||
|
||||
|
||||
def _patch_responses_metadata_none() -> None:
|
||||
"""
|
||||
Patches litellm.responses to normalize metadata=None to metadata={} in kwargs.
|
||||
|
||||
LiteLLM's @client decorator wrapper in utils.py (line 1721) does:
|
||||
_is_litellm_router_call = "model_group" in kwargs.get("metadata", {})
|
||||
When metadata is explicitly None in kwargs, kwargs.get("metadata", {}) returns
|
||||
None (the key exists, so the default is not used), causing:
|
||||
TypeError: argument of type 'NoneType' is not iterable
|
||||
|
||||
This swallows the real exception (e.g. AuthenticationError) and surfaces as:
|
||||
APIConnectionError: OpenAIException - argument of type 'NoneType' is not iterable
|
||||
|
||||
This happens when the Responses API bridge calls litellm.responses() with
|
||||
**litellm_params which may contain metadata=None.
|
||||
|
||||
STATUS: STILL NEEDED - litellm/utils.py wrapper function uses kwargs.get("metadata", {})
|
||||
which does not guard against metadata being explicitly None. Same pattern exists
|
||||
on line 1407 for async path.
|
||||
"""
|
||||
import litellm as _litellm
|
||||
from functools import wraps
|
||||
|
||||
original_responses = _litellm.responses
|
||||
|
||||
if getattr(original_responses, "_metadata_patched", False):
|
||||
return
|
||||
|
||||
@wraps(original_responses)
|
||||
def _patched_responses(*args: Any, **kwargs: Any) -> Any:
|
||||
if kwargs.get("metadata") is None:
|
||||
kwargs["metadata"] = {}
|
||||
return original_responses(*args, **kwargs)
|
||||
|
||||
_patched_responses._metadata_patched = True # type: ignore[attr-defined]
|
||||
_litellm.responses = _patched_responses
|
||||
|
||||
|
||||
def apply_monkey_patches() -> None:
|
||||
"""
|
||||
Apply all necessary monkey patches to LiteLLM for compatibility.
|
||||
@@ -736,6 +786,7 @@ def apply_monkey_patches() -> None:
|
||||
- Patching AzureOpenAIResponsesAPIConfig.should_fake_stream to enable native streaming
|
||||
- Patching ResponsesAPIResponse.model_construct to fix usage format in all code paths
|
||||
- Patching LiteLLMLoggingObj._get_assembled_streaming_response to avoid mutating original response
|
||||
- Patching litellm.responses to fix metadata=None causing TypeError in error handling
|
||||
"""
|
||||
_patch_ollama_chunk_parser()
|
||||
_patch_openai_responses_parallel_tool_calls()
|
||||
@@ -743,3 +794,4 @@ def apply_monkey_patches() -> None:
|
||||
_patch_azure_responses_should_fake_stream()
|
||||
_patch_responses_api_usage_format()
|
||||
_patch_logging_assembled_streaming_response()
|
||||
_patch_responses_metadata_none()
|
||||
|
||||
@@ -7,13 +7,14 @@ from PIL import UnidentifiedImageError
|
||||
from pydantic import BaseModel
|
||||
from pydantic import ConfigDict
|
||||
from pydantic import Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
|
||||
from onyx.db.llm import fetch_default_llm_model
|
||||
from onyx.file_processing.extract_file_text import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
from onyx.file_processing.password_validation import is_file_password_protected
|
||||
from onyx.llm.factory import get_default_llm
|
||||
from onyx.natural_language_processing.utils import get_tokenizer
|
||||
from onyx.utils.logger import setup_logger
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
@@ -116,7 +117,9 @@ def estimate_image_tokens_for_upload(
|
||||
pass
|
||||
|
||||
|
||||
def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
|
||||
def categorize_uploaded_files(
|
||||
files: list[UploadFile], db_session: Session
|
||||
) -> CategorizedFiles:
|
||||
"""
|
||||
Categorize uploaded files based on text extractability and tokenized length.
|
||||
|
||||
@@ -128,11 +131,11 @@ def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
|
||||
"""
|
||||
|
||||
results = CategorizedFiles()
|
||||
llm = get_default_llm()
|
||||
default_model = fetch_default_llm_model(db_session)
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
model_name=llm.config.model_name, provider_type=llm.config.model_provider
|
||||
)
|
||||
model_name = default_model.name if default_model else None
|
||||
provider_type = default_model.llm_provider.provider if default_model else None
|
||||
tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)
|
||||
|
||||
# Check if threshold checks should be skipped
|
||||
skip_threshold = False
|
||||
|
||||
@@ -809,7 +809,7 @@ pypandoc-binary==1.16.2
|
||||
# via onyx
|
||||
pyparsing==3.2.5
|
||||
# via httplib2
|
||||
pypdf==6.7.4
|
||||
pypdf==6.7.5
|
||||
# via
|
||||
# onyx
|
||||
# unstructured-client
|
||||
|
||||
@@ -389,19 +389,22 @@ def test_delete_user(scim_token: str, idp_style: str) -> None:
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_user_missing_external_id(scim_token: str) -> None:
|
||||
"""POST /Users without externalId returns 400."""
|
||||
def test_create_user_missing_external_id(scim_token: str, idp_style: str) -> None:
|
||||
"""POST /Users without externalId succeeds (RFC 7643: externalId is optional)."""
|
||||
email = f"scim_no_extid_{idp_style}@example.com"
|
||||
resp = ScimClient.post(
|
||||
"/Users",
|
||||
scim_token,
|
||||
json={
|
||||
"schemas": [SCIM_USER_SCHEMA],
|
||||
"userName": "scim_no_extid@example.com",
|
||||
"userName": email,
|
||||
"active": True,
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
assert "externalId" in resp.json()["detail"]
|
||||
assert resp.status_code == 201
|
||||
body = resp.json()
|
||||
assert body["userName"] == email
|
||||
assert body.get("externalId") is None
|
||||
|
||||
|
||||
def test_create_user_duplicate_email(scim_token: str, idp_style: str) -> None:
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from ee.onyx.server.scim.auth import _hash_scim_token
|
||||
from ee.onyx.server.scim.auth import generate_scim_token
|
||||
from ee.onyx.server.scim.auth import SCIM_TOKEN_PREFIX
|
||||
from ee.onyx.server.scim.auth import ScimAuthError
|
||||
from ee.onyx.server.scim.auth import verify_scim_token
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ class TestVerifyScimToken:
|
||||
def test_missing_header_raises_401(self) -> None:
|
||||
request = self._make_request(None)
|
||||
dal = self._make_dal()
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
with pytest.raises(ScimAuthError) as exc_info:
|
||||
verify_scim_token(request, dal)
|
||||
assert exc_info.value.status_code == 401
|
||||
assert "Missing" in str(exc_info.value.detail)
|
||||
@@ -68,7 +68,7 @@ class TestVerifyScimToken:
|
||||
def test_wrong_prefix_raises_401(self) -> None:
|
||||
request = self._make_request("Bearer on_some_api_key")
|
||||
dal = self._make_dal()
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
with pytest.raises(ScimAuthError) as exc_info:
|
||||
verify_scim_token(request, dal)
|
||||
assert exc_info.value.status_code == 401
|
||||
|
||||
@@ -76,7 +76,7 @@ class TestVerifyScimToken:
|
||||
raw, _, _ = generate_scim_token()
|
||||
request = self._make_request(f"Bearer {raw}")
|
||||
dal = self._make_dal(token=None)
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
with pytest.raises(ScimAuthError) as exc_info:
|
||||
verify_scim_token(request, dal)
|
||||
assert exc_info.value.status_code == 401
|
||||
assert "Invalid" in str(exc_info.value.detail)
|
||||
@@ -87,7 +87,7 @@ class TestVerifyScimToken:
|
||||
mock_token = MagicMock()
|
||||
mock_token.is_active = False
|
||||
dal = self._make_dal(token=mock_token)
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
with pytest.raises(ScimAuthError) as exc_info:
|
||||
verify_scim_token(request, dal)
|
||||
assert exc_info.value.status_code == 401
|
||||
assert "revoked" in str(exc_info.value.detail)
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestOktaProvider:
|
||||
result = provider.build_user_resource(user, None)
|
||||
|
||||
assert result.name == ScimName(
|
||||
givenName="Madonna", familyName=None, formatted="Madonna"
|
||||
givenName="Madonna", familyName="", formatted="Madonna"
|
||||
)
|
||||
|
||||
def test_build_user_resource_no_name(self) -> None:
|
||||
@@ -117,7 +117,7 @@ class TestOktaProvider:
|
||||
user = _make_mock_user(personal_name=None)
|
||||
result = provider.build_user_resource(user, None)
|
||||
|
||||
assert result.name is None
|
||||
assert result.name == ScimName(givenName="", familyName="", formatted="")
|
||||
assert result.displayName is None
|
||||
|
||||
def test_build_user_resource_scim_username_preserves_case(self) -> None:
|
||||
|
||||
@@ -214,13 +214,16 @@ class TestCreateUser:
|
||||
mock_dal.add_user.assert_called_once()
|
||||
mock_dal.commit.assert_called_once()
|
||||
|
||||
def test_missing_external_id_returns_400(
|
||||
@patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
|
||||
def test_missing_external_id_creates_user_without_mapping(
|
||||
self,
|
||||
mock_seats: MagicMock, # noqa: ARG002
|
||||
mock_db_session: MagicMock,
|
||||
mock_token: MagicMock,
|
||||
mock_dal: MagicMock, # noqa: ARG002
|
||||
mock_dal: MagicMock,
|
||||
provider: ScimProvider,
|
||||
) -> None:
|
||||
mock_dal.get_user_by_email.return_value = None
|
||||
resource = make_scim_user(externalId=None)
|
||||
|
||||
result = create_user(
|
||||
@@ -230,7 +233,11 @@ class TestCreateUser:
|
||||
db_session=mock_db_session,
|
||||
)
|
||||
|
||||
assert_scim_error(result, 400)
|
||||
parsed = parse_scim_user(result, status=201)
|
||||
assert parsed.userName is not None
|
||||
mock_dal.add_user.assert_called_once()
|
||||
mock_dal.create_user_mapping.assert_not_called()
|
||||
mock_dal.commit.assert_called_once()
|
||||
|
||||
@patch("ee.onyx.server.scim.api._check_seat_availability", return_value=None)
|
||||
def test_duplicate_email_returns_409(
|
||||
|
||||
@@ -126,7 +126,9 @@ Resources:
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- secretsmanager:GetSecretValue
|
||||
Resource: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password-*
|
||||
Resource:
|
||||
- !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password-*
|
||||
- !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret-*
|
||||
|
||||
Outputs:
|
||||
OutputEcsCluster:
|
||||
|
||||
@@ -167,10 +167,12 @@ Resources:
|
||||
- ImportedNamespace: !ImportValue
|
||||
Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
|
||||
- Name: AUTH_TYPE
|
||||
Value: disabled
|
||||
Value: basic
|
||||
Secrets:
|
||||
- Name: POSTGRES_PASSWORD
|
||||
ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password
|
||||
- Name: USER_AUTH_SECRET
|
||||
ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret
|
||||
VolumesFrom: []
|
||||
SystemControls: []
|
||||
|
||||
|
||||
@@ -166,9 +166,11 @@ Resources:
|
||||
- ImportedNamespace: !ImportValue
|
||||
Fn::Sub: "${Environment}-onyx-cluster-OnyxNamespaceName"
|
||||
- Name: AUTH_TYPE
|
||||
Value: disabled
|
||||
Value: basic
|
||||
Secrets:
|
||||
- Name: POSTGRES_PASSWORD
|
||||
ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/postgres/user/password
|
||||
- Name: USER_AUTH_SECRET
|
||||
ValueFrom: !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${Environment}/onyx/user-auth-secret
|
||||
VolumesFrom: []
|
||||
SystemControls: []
|
||||
|
||||
@@ -19,6 +19,6 @@ dependencies:
|
||||
version: 5.4.0
|
||||
- name: code-interpreter
|
||||
repository: https://onyx-dot-app.github.io/python-sandbox/
|
||||
version: 0.3.0
|
||||
digest: sha256:cf8f01906d46034962c6ce894770621ee183ac761e6942951118aeb48540eddd
|
||||
generated: "2026-02-24T10:59:38.78318-08:00"
|
||||
version: 0.3.1
|
||||
digest: sha256:4965b6ea3674c37163832a2192cd3bc8004f2228729fca170af0b9f457e8f987
|
||||
generated: "2026-03-02T15:29:39.632344-08:00"
|
||||
|
||||
@@ -45,6 +45,6 @@ dependencies:
|
||||
repository: https://charts.min.io/
|
||||
condition: minio.enabled
|
||||
- name: code-interpreter
|
||||
version: 0.3.0
|
||||
version: 0.3.1
|
||||
repository: https://onyx-dot-app.github.io/python-sandbox/
|
||||
condition: codeInterpreter.enabled
|
||||
|
||||
@@ -92,7 +92,7 @@ backend = [
|
||||
"python-gitlab==5.6.0",
|
||||
"python-pptx==0.6.23",
|
||||
"pypandoc_binary==1.16.2",
|
||||
"pypdf==6.7.4",
|
||||
"pypdf==6.7.5",
|
||||
"pytest-mock==3.12.0",
|
||||
"pytest-playwright==0.7.0",
|
||||
"python-docx==1.1.2",
|
||||
|
||||
8
uv.lock
generated
8
uv.lock
generated
@@ -4678,7 +4678,7 @@ requires-dist = [
|
||||
{ name = "pygithub", marker = "extra == 'backend'", specifier = "==2.5.0" },
|
||||
{ name = "pympler", marker = "extra == 'backend'", specifier = "==1.1" },
|
||||
{ name = "pypandoc-binary", marker = "extra == 'backend'", specifier = "==1.16.2" },
|
||||
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.7.4" },
|
||||
{ name = "pypdf", marker = "extra == 'backend'", specifier = "==6.7.5" },
|
||||
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.5" },
|
||||
{ name = "pytest-alembic", marker = "extra == 'dev'", specifier = "==0.12.1" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
|
||||
@@ -5925,11 +5925,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "6.7.4"
|
||||
version = "6.7.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/09/dc/f52deef12797ad58b88e4663f097a343f53b9361338aef6573f135ac302f/pypdf-6.7.4.tar.gz", hash = "sha256:9edd1cd47938bb35ec87795f61225fd58a07cfaf0c5699018ae1a47d6f8ab0e3", size = 5304821, upload-time = "2026-02-27T10:44:39.395Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f6/52/37cc0aa9e9d1bf7729a737a0d83f8b3f851c8eb137373d9f71eafb0a3405/pypdf-6.7.5.tar.gz", hash = "sha256:40bb2e2e872078655f12b9b89e2f900888bb505e88a82150b64f9f34fa25651d", size = 5304278, upload-time = "2026-03-02T09:05:21.464Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/be/cded021305f5c81b47265b8c5292b99388615a4391c21ff00fd538d34a56/pypdf-6.7.4-py3-none-any.whl", hash = "sha256:527d6da23274a6c70a9cb59d1986d93946ba8e36a6bc17f3f7cce86331492dda", size = 331496, upload-time = "2026-02-27T10:44:37.527Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/89/336673efd0a88956562658aba4f0bbef7cb92a6fbcbcaf94926dbc82b408/pypdf-6.7.5-py3-none-any.whl", hash = "sha256:07ba7f1d6e6d9aa2a17f5452e320a84718d4ce863367f7ede2fd72280349ab13", size = 331421, upload-time = "2026-03-02T09:05:19.722Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
20
web/lib/opal/src/icons/bookmark.tsx
Normal file
20
web/lib/opal/src/icons/bookmark.tsx
Normal file
@@ -0,0 +1,20 @@
|
||||
import type { IconProps } from "@opal/types";
|
||||
const SvgBookmark = ({ size, ...props }: IconProps) => (
|
||||
<svg
|
||||
width={size}
|
||||
height={size}
|
||||
viewBox="0 0 16 16"
|
||||
fill="none"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
stroke="currentColor"
|
||||
{...props}
|
||||
>
|
||||
<path
|
||||
d="M12.6667 14L7.99999 10.6667L3.33333 14V3.33333C3.33333 2.97971 3.4738 2.64057 3.72385 2.39052C3.9739 2.14048 4.31304 2 4.66666 2H11.3333C11.6869 2 12.0261 2.14048 12.2761 2.39052C12.5262 2.64057 12.6667 2.97971 12.6667 3.33333V14Z"
|
||||
strokeWidth={1.5}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
);
|
||||
export default SvgBookmark;
|
||||
@@ -25,6 +25,7 @@ export { default as SvgBarChartSmall } from "@opal/icons/bar-chart-small";
|
||||
export { default as SvgBell } from "@opal/icons/bell";
|
||||
export { default as SvgBlocks } from "@opal/icons/blocks";
|
||||
export { default as SvgBookOpen } from "@opal/icons/book-open";
|
||||
export { default as SvgBookmark } from "@opal/icons/bookmark";
|
||||
export { default as SvgBooksLineSmall } from "@opal/icons/books-line-small";
|
||||
export { default as SvgBooksStackSmall } from "@opal/icons/books-stack-small";
|
||||
export { default as SvgBracketCurly } from "@opal/icons/bracket-curly";
|
||||
|
||||
@@ -78,6 +78,16 @@ const nextConfig = {
|
||||
},
|
||||
async rewrites() {
|
||||
return [
|
||||
{
|
||||
source: "/ph_ingest/static/:path*",
|
||||
destination: "https://us-assets.i.posthog.com/static/:path*",
|
||||
},
|
||||
{
|
||||
source: "/ph_ingest/:path*",
|
||||
destination: `${
|
||||
process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.i.posthog.com"
|
||||
}/:path*`,
|
||||
},
|
||||
{
|
||||
source: "/api/docs/:path*", // catch /api/docs and /api/docs/...
|
||||
destination: `${
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useState } from "react";
|
||||
import { FiDownload } from "react-icons/fi";
|
||||
import { memo, useState } from "react";
|
||||
import { SvgDownload } from "@opal/icons";
|
||||
import { ImageShape } from "@/app/app/services/streamingModels";
|
||||
import { FullImageModal } from "@/app/app/components/files/images/FullImageModal";
|
||||
import { buildImgUrl } from "@/app/app/components/files/images/utils";
|
||||
@@ -24,17 +24,22 @@ const SHAPE_CLASSES: Record<ImageShape, { container: string; image: string }> =
|
||||
},
|
||||
};
|
||||
|
||||
// Used to stop image flashing as images are loaded and response continues
|
||||
const loadedImages = new Set<string>();
|
||||
|
||||
interface InMessageImageProps {
|
||||
fileId: string;
|
||||
fileName?: string;
|
||||
shape?: ImageShape;
|
||||
}
|
||||
|
||||
export function InMessageImage({
|
||||
export const InMessageImage = memo(function InMessageImage({
|
||||
fileId,
|
||||
fileName,
|
||||
shape = DEFAULT_SHAPE,
|
||||
}: InMessageImageProps) {
|
||||
const [fullImageShowing, setFullImageShowing] = useState(false);
|
||||
const [imageLoaded, setImageLoaded] = useState(false);
|
||||
const [imageLoaded, setImageLoaded] = useState(loadedImages.has(fileId));
|
||||
|
||||
const normalizedShape = SHAPE_CLASSES[shape] ? shape : DEFAULT_SHAPE;
|
||||
const { container: shapeContainerClasses, image: shapeImageClasses } =
|
||||
@@ -45,11 +50,15 @@ export function InMessageImage({
|
||||
|
||||
try {
|
||||
const response = await fetch(buildImgUrl(fileId));
|
||||
if (!response.ok) {
|
||||
console.error("Failed to download image:", response.status);
|
||||
return;
|
||||
}
|
||||
const blob = await response.blob();
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = `image-${fileId}.png`; // You can adjust the filename/extension as needed
|
||||
a.download = fileName || `image-${fileId}.png`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
@@ -76,7 +85,10 @@ export function InMessageImage({
|
||||
width={1200}
|
||||
height={1200}
|
||||
alt="Chat Message Image"
|
||||
onLoad={() => setImageLoaded(true)}
|
||||
onLoad={() => {
|
||||
loadedImages.add(fileId);
|
||||
setImageLoaded(true);
|
||||
}}
|
||||
className={cn(
|
||||
"object-contain object-left overflow-hidden rounded-lg w-full h-full transition-opacity duration-300 cursor-pointer",
|
||||
shapeImageClasses,
|
||||
@@ -94,7 +106,7 @@ export function InMessageImage({
|
||||
)}
|
||||
>
|
||||
<Button
|
||||
icon={FiDownload}
|
||||
icon={SvgDownload}
|
||||
tooltip="Download"
|
||||
onClick={handleDownload}
|
||||
/>
|
||||
@@ -102,4 +114,4 @@ export function InMessageImage({
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,3 +1,21 @@
|
||||
const CHAT_FILE_URL_REGEX = /\/api\/chat\/file\/([^/?#]+)/;
|
||||
const IMAGE_EXTENSIONS = /\.(png|jpe?g|gif|webp|svg|bmp|ico|tiff?)$/i;
|
||||
|
||||
export function buildImgUrl(fileId: string) {
|
||||
return `/api/chat/file/${fileId}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* If `href` points to a chat file and `linkText` ends with an image extension,
|
||||
* returns the file ID. Otherwise returns null.
|
||||
*/
|
||||
export function extractChatImageFileId(
|
||||
href: string | undefined,
|
||||
linkText: string
|
||||
): string | null {
|
||||
if (!href) return null;
|
||||
const match = CHAT_FILE_URL_REGEX.exec(href);
|
||||
if (!match?.[1]) return null;
|
||||
if (!IMAGE_EXTENSIONS.test(linkText)) return null;
|
||||
return match[1];
|
||||
}
|
||||
|
||||
@@ -195,7 +195,7 @@ const HumanMessage = React.memo(function HumanMessage({
|
||||
<div className="md:max-w-[37.5rem] flex basis-[100%] md:basis-auto justify-end md:order-1">
|
||||
<div
|
||||
className={
|
||||
"max-w-[30rem] md:max-w-[37.5rem] whitespace-break-spaces rounded-t-16 rounded-bl-16 bg-background-tint-02 py-2 px-3"
|
||||
"max-w-[30rem] md:max-w-[37.5rem] whitespace-break-spaces break-anywhere rounded-t-16 rounded-bl-16 bg-background-tint-02 py-2 px-3"
|
||||
}
|
||||
onCopy={(e) => {
|
||||
const selection = window.getSelection();
|
||||
|
||||
@@ -14,6 +14,8 @@ import {
|
||||
import { extractCodeText, preprocessLaTeX } from "@/app/app/message/codeUtils";
|
||||
import { CodeBlock } from "@/app/app/message/CodeBlock";
|
||||
import { transformLinkUri, cn } from "@/lib/utils";
|
||||
import { InMessageImage } from "@/app/app/components/files/images/InMessageImage";
|
||||
import { extractChatImageFileId } from "@/app/app/components/files/images/utils";
|
||||
|
||||
/**
|
||||
* Processes content for markdown rendering by handling code blocks and LaTeX
|
||||
@@ -58,17 +60,31 @@ export const useMarkdownComponents = (
|
||||
);
|
||||
|
||||
const anchorCallback = useCallback(
|
||||
(props: any) => (
|
||||
<MemoizedAnchor
|
||||
updatePresentingDocument={state?.setPresentingDocument || (() => {})}
|
||||
docs={state?.docs || []}
|
||||
userFiles={state?.userFiles || []}
|
||||
citations={state?.citations}
|
||||
href={props.href}
|
||||
>
|
||||
{props.children}
|
||||
</MemoizedAnchor>
|
||||
),
|
||||
(props: any) => {
|
||||
const imageFileId = extractChatImageFileId(
|
||||
props.href,
|
||||
String(props.children ?? "")
|
||||
);
|
||||
if (imageFileId) {
|
||||
return (
|
||||
<InMessageImage
|
||||
fileId={imageFileId}
|
||||
fileName={String(props.children ?? "")}
|
||||
/>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<MemoizedAnchor
|
||||
updatePresentingDocument={state?.setPresentingDocument || (() => {})}
|
||||
docs={state?.docs || []}
|
||||
userFiles={state?.userFiles || []}
|
||||
citations={state?.citations}
|
||||
href={props.href}
|
||||
>
|
||||
{props.children}
|
||||
</MemoizedAnchor>
|
||||
);
|
||||
},
|
||||
[
|
||||
state?.docs,
|
||||
state?.userFiles,
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
:root {
|
||||
--app-page-main-content-width: 52.5rem;
|
||||
--block-width-form-input-min: 10rem;
|
||||
|
||||
--container-sm: 42rem;
|
||||
--container-sm-md: 47rem;
|
||||
--container-md: 54.5rem;
|
||||
--container-lg: 62rem;
|
||||
--container-full: 100%;
|
||||
}
|
||||
|
||||
@@ -3,9 +3,7 @@ import posthog from "posthog-js";
|
||||
import { PostHogProvider } from "posthog-js/react";
|
||||
import { useEffect } from "react";
|
||||
|
||||
const isPostHogEnabled = !!(
|
||||
process.env.NEXT_PUBLIC_POSTHOG_KEY && process.env.NEXT_PUBLIC_POSTHOG_HOST
|
||||
);
|
||||
const isPostHogEnabled = !!process.env.NEXT_PUBLIC_POSTHOG_KEY;
|
||||
|
||||
type PHProviderProps = { children: React.ReactNode };
|
||||
|
||||
@@ -13,7 +11,9 @@ export function PHProvider({ children }: PHProviderProps) {
|
||||
useEffect(() => {
|
||||
if (isPostHogEnabled) {
|
||||
posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY!, {
|
||||
api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST!,
|
||||
api_host: "/ph_ingest",
|
||||
ui_host:
|
||||
process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.posthog.com",
|
||||
person_profiles: "identified_only",
|
||||
capture_pageview: false,
|
||||
session_recording: {
|
||||
|
||||
@@ -43,8 +43,11 @@ import { Content } from "@opal/layouts";
|
||||
import Spacer from "@/refresh-components/Spacer";
|
||||
|
||||
const widthClasses = {
|
||||
md: "w-[min(50rem,100%)]",
|
||||
lg: "w-[min(60rem,100%)]",
|
||||
sm: "w-[min(var(--container-sm),100%)]",
|
||||
"sm-md": "w-[min(var(--container-sm-md),100%)]",
|
||||
md: "w-[min(var(--container-md),100%)]",
|
||||
lg: "w-[min(var(--container-lg),100%)]",
|
||||
full: "w-[var(--container-full)]",
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -57,18 +60,19 @@ const widthClasses = {
|
||||
* - Full height container with centered content
|
||||
* - Automatic overflow-y scrolling
|
||||
* - Contains the scroll container ID that Settings.Header uses for shadow detection
|
||||
* - Configurable width: "md" (50rem max) or "full" (full width with 4rem padding)
|
||||
* - Configurable width via CSS variables defined in sizes.css:
|
||||
* "sm" (672px), "sm-md" (752px), "md" (872px, default), "lg" (992px), "full" (100%)
|
||||
*
|
||||
* @example
|
||||
* ```tsx
|
||||
* // Default medium width (50rem max)
|
||||
* // Default medium width (872px max)
|
||||
* <SettingsLayouts.Root>
|
||||
* <SettingsLayouts.Header {...} />
|
||||
* <SettingsLayouts.Body>...</SettingsLayouts.Body>
|
||||
* </SettingsLayouts.Root>
|
||||
*
|
||||
* // Full width with padding
|
||||
* <SettingsLayouts.Root width="full">
|
||||
* // Large width (992px max)
|
||||
* <SettingsLayouts.Root width="lg">
|
||||
* <SettingsLayouts.Header {...} />
|
||||
* <SettingsLayouts.Body>...</SettingsLayouts.Body>
|
||||
* </SettingsLayouts.Root>
|
||||
|
||||
@@ -472,7 +472,7 @@ function ChatPreferencesForm() {
|
||||
|
||||
<Section
|
||||
flexDirection="row"
|
||||
justifyContent="start"
|
||||
justifyContent="between"
|
||||
alignItems="center"
|
||||
gap={0.25}
|
||||
>
|
||||
@@ -480,22 +480,29 @@ function ChatPreferencesForm() {
|
||||
<EmptyMessage title="No connectors set up" />
|
||||
) : (
|
||||
<>
|
||||
{uniqueSources.slice(0, 3).map((source) => {
|
||||
const meta = getSourceMetadata(source);
|
||||
return (
|
||||
<Card
|
||||
key={source}
|
||||
padding={0.75}
|
||||
className="w-[10rem]"
|
||||
>
|
||||
<Content
|
||||
icon={meta.icon}
|
||||
title={meta.displayName}
|
||||
sizePreset="main-ui"
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
<Section
|
||||
flexDirection="row"
|
||||
justifyContent="start"
|
||||
alignItems="center"
|
||||
gap={0.25}
|
||||
>
|
||||
{uniqueSources.slice(0, 3).map((source) => {
|
||||
const meta = getSourceMetadata(source);
|
||||
return (
|
||||
<Card
|
||||
key={source}
|
||||
padding={0.75}
|
||||
className="w-[10rem]"
|
||||
>
|
||||
<Content
|
||||
icon={meta.icon}
|
||||
title={meta.displayName}
|
||||
sizePreset="main-ui"
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
})}
|
||||
</Section>
|
||||
|
||||
<Button
|
||||
href="/admin/indexing/status"
|
||||
|
||||
@@ -46,7 +46,7 @@ export const csvVariant: PreviewVariant = {
|
||||
return (
|
||||
<Section justifyContent="start" alignItems="start" padding={1}>
|
||||
<Table>
|
||||
<TableHeader className="sticky top-0 z-sticky">
|
||||
<TableHeader className="sticky top-0 z-sticky bg-background-tint-01">
|
||||
<TableRow noHover>
|
||||
{headers.map((h: string, i: number) => (
|
||||
<TableHead key={i}>
|
||||
@@ -64,7 +64,7 @@ export const csvVariant: PreviewVariant = {
|
||||
<TableCell
|
||||
key={cIdx}
|
||||
className={cn(
|
||||
cIdx === 0 && "sticky left-0",
|
||||
cIdx === 0 && "sticky left-0 bg-background-tint-01",
|
||||
"py-4 px-4 whitespace-normal break-words"
|
||||
)}
|
||||
>
|
||||
|
||||
@@ -370,5 +370,12 @@ module.exports = {
|
||||
plugin(({ addVariant }) => {
|
||||
addVariant("focus-within-nonactive", "&:focus-within:not(:active)");
|
||||
}),
|
||||
plugin(({ addUtilities }) => {
|
||||
addUtilities({
|
||||
".break-anywhere": {
|
||||
"overflow-wrap": "anywhere",
|
||||
},
|
||||
});
|
||||
}),
|
||||
],
|
||||
};
|
||||
|
||||
@@ -6,6 +6,9 @@ import { expectElementScreenshot } from "@tests/e2e/utils/visualRegression";
|
||||
|
||||
const SHORT_USER_MESSAGE = "What is Onyx?";
|
||||
|
||||
const LONG_WORD_USER_MESSAGE =
|
||||
"Please look into this issue: __________________________________________ and also this token: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA and this URL: https://example.com/a/very/long/path/that/keeps/going/and/going/and/going/without/any/breaks/whatsoever/to/test/overflow";
|
||||
|
||||
const LONG_USER_MESSAGE = `I've been evaluating several enterprise search and AI platforms for our organization, and I have a number of detailed questions about Onyx that I'd like to understand before we make a decision.
|
||||
|
||||
First, can you explain how Onyx handles document indexing across multiple data sources? We currently use Confluence, Google Drive, Slack, and GitHub, and we need to ensure that all of these can be indexed simultaneously without performance degradation.
|
||||
@@ -369,6 +372,36 @@ for (const theme of THEMES) {
|
||||
);
|
||||
});
|
||||
|
||||
test("user message with very long words wraps without overflowing", async ({
|
||||
page,
|
||||
}) => {
|
||||
await openChat(page);
|
||||
await mockChatEndpoint(page, SHORT_AI_RESPONSE);
|
||||
|
||||
await sendMessage(page, LONG_WORD_USER_MESSAGE);
|
||||
|
||||
const userMessage = page.locator("#onyx-human-message").first();
|
||||
await expect(userMessage).toContainText("__________");
|
||||
|
||||
await screenshotChatContainer(
|
||||
page,
|
||||
`chat-long-word-user-message-${theme}`
|
||||
);
|
||||
|
||||
// Assert the message bubble does not overflow horizontally.
|
||||
const overflows = await userMessage.evaluate((el) => {
|
||||
const bubble = el.querySelector<HTMLElement>(
|
||||
".whitespace-break-spaces"
|
||||
);
|
||||
if (!bubble)
|
||||
throw new Error(
|
||||
"Expected human message bubble (.whitespace-break-spaces) to exist"
|
||||
);
|
||||
return bubble.scrollWidth > bubble.offsetWidth;
|
||||
});
|
||||
expect(overflows).toBe(false);
|
||||
});
|
||||
|
||||
test("long user message with long AI response renders correctly", async ({
|
||||
page,
|
||||
}) => {
|
||||
|
||||
Reference in New Issue
Block a user