chore: Remove end of lived backend routes (#8453)

This commit is contained in:
Yuhong Sun
2026-02-13 17:57:06 -08:00
committed by GitHub
parent d9feaf43a7
commit 89d2759021
26 changed files with 142 additions and 933 deletions

View File

@@ -27,6 +27,8 @@ class SearchFlowClassificationResponse(BaseModel):
is_search_flow: bool is_search_flow: bool
# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
class SendSearchQueryRequest(BaseModel): class SendSearchQueryRequest(BaseModel):
search_query: str search_query: str
filters: BaseFilters | None = None filters: BaseFilters | None = None

View File

@@ -67,6 +67,8 @@ def search_flow_classification(
return SearchFlowClassificationResponse(is_search_flow=is_search_flow) return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
# NOTE: This endpoint is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
@router.post( @router.post(
"/send-search-message", "/send-search-message",
response_model=None, response_model=None,

View File

@@ -3,34 +3,26 @@ from collections.abc import Callable
from typing import cast from typing import cast
from uuid import UUID from uuid import UUID
from fastapi import HTTPException
from fastapi.datastructures import Headers from fastapi.datastructures import Headers
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.auth.users import is_user_admin
from onyx.chat.models import ChatHistoryResult from onyx.chat.models import ChatHistoryResult
from onyx.chat.models import ChatLoadedFile from onyx.chat.models import ChatLoadedFile
from onyx.chat.models import ChatMessageSimple from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import FileToolMetadata from onyx.chat.models import FileToolMetadata
from onyx.chat.models import PersonaOverrideConfig
from onyx.chat.models import ToolCallSimple from onyx.chat.models import ToolCallSimple
from onyx.configs.constants import DEFAULT_PERSONA_ID from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
from onyx.context.search.enums import RecencyBiasSetting
from onyx.db.chat import create_chat_session from onyx.db.chat import create_chat_session
from onyx.db.chat import get_chat_messages_by_session from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_or_create_root_message from onyx.db.chat import get_or_create_root_message
from onyx.db.kg_config import get_kg_config_settings from onyx.db.kg_config import get_kg_config_settings
from onyx.db.kg_config import is_kg_config_settings_enabled_valid from onyx.db.kg_config import is_kg_config_settings_enabled_valid
from onyx.db.llm import fetch_existing_doc_sets
from onyx.db.llm import fetch_existing_tools
from onyx.db.models import ChatMessage from onyx.db.models import ChatMessage
from onyx.db.models import ChatSession from onyx.db.models import ChatSession
from onyx.db.models import Persona from onyx.db.models import Persona
from onyx.db.models import SearchDoc as DbSearchDoc from onyx.db.models import SearchDoc as DbSearchDoc
from onyx.db.models import Tool
from onyx.db.models import User
from onyx.db.models import UserFile from onyx.db.models import UserFile
from onyx.db.projects import check_project_ownership from onyx.db.projects import check_project_ownership
from onyx.file_processing.extract_file_text import extract_file_text from onyx.file_processing.extract_file_text import extract_file_text
@@ -47,9 +39,6 @@ from onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT
from onyx.server.query_and_chat.models import ChatSessionCreationRequest from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.streaming_models import CitationInfo from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.tools.models import ToolCallKickoff from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.custom.custom_tool import (
build_custom_tools_from_openapi_schema_and_headers,
)
from onyx.utils.logger import setup_logger from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from onyx.utils.timing import log_function_time from onyx.utils.timing import log_function_time
@@ -278,70 +267,6 @@ def extract_headers(
return extracted_headers return extracted_headers
def create_temporary_persona(
persona_config: PersonaOverrideConfig, db_session: Session, user: User
) -> Persona:
if not is_user_admin(user):
raise HTTPException(
status_code=403,
detail="User is not authorized to create a persona in one shot queries",
)
"""Create a temporary Persona object from the provided configuration."""
persona = Persona(
name=persona_config.name,
description=persona_config.description,
num_chunks=persona_config.num_chunks,
llm_relevance_filter=persona_config.llm_relevance_filter,
llm_filter_extraction=persona_config.llm_filter_extraction,
recency_bias=RecencyBiasSetting.BASE_DECAY,
llm_model_provider_override=persona_config.llm_model_provider_override,
llm_model_version_override=persona_config.llm_model_version_override,
)
if persona_config.prompts:
# Use the first prompt from the override config for embedded prompt fields
first_prompt = persona_config.prompts[0]
persona.system_prompt = first_prompt.system_prompt
persona.task_prompt = first_prompt.task_prompt
persona.datetime_aware = first_prompt.datetime_aware
persona.tools = []
if persona_config.custom_tools_openapi:
from onyx.chat.emitter import get_default_emitter
for schema in persona_config.custom_tools_openapi:
tools = cast(
list[Tool],
build_custom_tools_from_openapi_schema_and_headers(
tool_id=0, # dummy tool id
openapi_schema=schema,
emitter=get_default_emitter(),
),
)
persona.tools.extend(tools)
if persona_config.tools:
tool_ids = [tool.id for tool in persona_config.tools]
persona.tools.extend(
fetch_existing_tools(db_session=db_session, tool_ids=tool_ids)
)
if persona_config.tool_ids:
persona.tools.extend(
fetch_existing_tools(
db_session=db_session, tool_ids=persona_config.tool_ids
)
)
fetched_docs = fetch_existing_doc_sets(
db_session=db_session, doc_ids=persona_config.document_set_ids
)
persona.document_sets = fetched_docs
return persona
def process_kg_commands( def process_kg_commands(
message: str, persona_name: str, tenant_id: str, db_session: Session # noqa: ARG001 message: str, persona_name: str, tenant_id: str, db_session: Session # noqa: ARG001
) -> None: ) -> None:

View File

@@ -1,17 +1,13 @@
from collections.abc import Callable
from collections.abc import Iterator from collections.abc import Iterator
from enum import Enum
from typing import Any from typing import Any
from uuid import UUID from uuid import UUID
from pydantic import BaseModel from pydantic import BaseModel
from pydantic import Field
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
from onyx.context.search.enums import SearchType
from onyx.context.search.models import SearchDoc from onyx.context.search.models import SearchDoc
from onyx.file_store.models import FileDescriptor
from onyx.file_store.models import InMemoryChatFile from onyx.file_store.models import InMemoryChatFile
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import CitationInfo from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import GeneratedImage from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import Packet from onyx.server.query_and_chat.streaming_models import Packet
@@ -20,54 +16,6 @@ from onyx.tools.models import ToolCallKickoff
from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType
class StreamStopReason(Enum):
CONTEXT_LENGTH = "context_length"
CANCELLED = "cancelled"
FINISHED = "finished"
class StreamType(Enum):
SUB_QUESTIONS = "sub_questions"
SUB_ANSWER = "sub_answer"
MAIN_ANSWER = "main_answer"
class StreamStopInfo(BaseModel):
stop_reason: StreamStopReason
stream_type: StreamType = StreamType.MAIN_ANSWER
def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
data = super().model_dump(mode="json", *args, **kwargs) # type: ignore
data["stop_reason"] = self.stop_reason.name
return data
class UserKnowledgeFilePacket(BaseModel):
user_files: list[FileDescriptor]
class RelevanceAnalysis(BaseModel):
relevant: bool
content: str | None = None
class DocumentRelevance(BaseModel):
"""Contains all relevance information for a given search"""
relevance_summaries: dict[str, RelevanceAnalysis]
class OnyxAnswerPiece(BaseModel):
# A small piece of a complete answer. Used for streaming back answers.
answer_piece: str | None # if None, specifies the end of an Answer
class MessageResponseIDInfo(BaseModel):
user_message_id: int | None
reserved_assistant_message_id: int
class StreamingError(BaseModel): class StreamingError(BaseModel):
error: str error: str
stack_trace: str | None = None stack_trace: str | None = None
@@ -78,23 +26,11 @@ class StreamingError(BaseModel):
details: dict | None = None # Additional context (tool name, model name, etc.) details: dict | None = None # Additional context (tool name, model name, etc.)
class OnyxAnswer(BaseModel):
answer: str | None
class FileChatDisplay(BaseModel):
file_ids: list[str]
class CustomToolResponse(BaseModel): class CustomToolResponse(BaseModel):
response: ToolResultType response: ToolResultType
tool_name: str tool_name: str
class ToolConfig(BaseModel):
id: int
class ProjectSearchConfig(BaseModel): class ProjectSearchConfig(BaseModel):
"""Configuration for search tool availability in project context.""" """Configuration for search tool availability in project context."""
@@ -102,83 +38,15 @@ class ProjectSearchConfig(BaseModel):
disable_forced_tool: bool disable_forced_tool: bool
class PromptOverrideConfig(BaseModel):
name: str
description: str = ""
system_prompt: str
task_prompt: str = ""
datetime_aware: bool = True
include_citations: bool = True
class PersonaOverrideConfig(BaseModel):
name: str
description: str
search_type: SearchType = SearchType.SEMANTIC
num_chunks: float | None = None
llm_relevance_filter: bool = False
llm_filter_extraction: bool = False
llm_model_provider_override: str | None = None
llm_model_version_override: str | None = None
prompts: list[PromptOverrideConfig] = Field(default_factory=list)
# Note: prompt_ids removed - prompts are now embedded in personas
document_set_ids: list[int] = Field(default_factory=list)
tools: list[ToolConfig] = Field(default_factory=list)
tool_ids: list[int] = Field(default_factory=list)
custom_tools_openapi: list[dict[str, Any]] = Field(default_factory=list)
AnswerQuestionPossibleReturn = (
OnyxAnswerPiece
| CitationInfo
| FileChatDisplay
| CustomToolResponse
| StreamingError
| StreamStopInfo
)
class CreateChatSessionID(BaseModel): class CreateChatSessionID(BaseModel):
chat_session_id: UUID chat_session_id: UUID
AnswerQuestionStreamReturn = Iterator[AnswerQuestionPossibleReturn] AnswerStreamPart = Packet | MessageResponseIDInfo | StreamingError | CreateChatSessionID
class LLMMetricsContainer(BaseModel):
prompt_tokens: int
response_tokens: int
StreamProcessor = Callable[[Iterator[str]], AnswerQuestionStreamReturn]
AnswerStreamPart = (
Packet
| StreamStopInfo
| MessageResponseIDInfo
| StreamingError
| UserKnowledgeFilePacket
| CreateChatSessionID
)
AnswerStream = Iterator[AnswerStreamPart] AnswerStream = Iterator[AnswerStreamPart]
class ChatBasicResponse(BaseModel):
# This is built piece by piece, any of these can be None as the flow could break
answer: str
answer_citationless: str
top_documents: list[SearchDoc]
error_msg: str | None
message_id: int
citation_info: list[CitationInfo]
class ToolCallResponse(BaseModel): class ToolCallResponse(BaseModel):
"""Tool call with full details for non-streaming response.""" """Tool call with full details for non-streaming response."""
@@ -191,8 +59,23 @@ class ToolCallResponse(BaseModel):
pre_reasoning: str | None = None pre_reasoning: str | None = None
class ChatBasicResponse(BaseModel):
# This is built piece by piece, any of these can be None as the flow could break
answer: str
answer_citationless: str
top_documents: list[SearchDoc]
error_msg: str | None
message_id: int
citation_info: list[CitationInfo]
class ChatFullResponse(BaseModel): class ChatFullResponse(BaseModel):
"""Complete non-streaming response with all available data.""" """Complete non-streaming response with all available data.
NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
"""
# Core response fields # Core response fields
answer: str answer: str

View File

@@ -37,7 +37,6 @@ from onyx.chat.models import ChatMessageSimple
from onyx.chat.models import CreateChatSessionID from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import ExtractedProjectFiles from onyx.chat.models import ExtractedProjectFiles
from onyx.chat.models import FileToolMetadata from onyx.chat.models import FileToolMetadata
from onyx.chat.models import MessageResponseIDInfo
from onyx.chat.models import ProjectFileMetadata from onyx.chat.models import ProjectFileMetadata
from onyx.chat.models import ProjectSearchConfig from onyx.chat.models import ProjectSearchConfig
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
@@ -81,8 +80,7 @@ from onyx.llm.utils import litellm_exception_to_error_msg
from onyx.onyxbot.slack.models import SlackContext from onyx.onyxbot.slack.models import SlackContext
from onyx.redis.redis_pool import get_redis_client from onyx.redis.redis_pool import get_redis_client
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import OptionalSearchSetting
from onyx.server.query_and_chat.models import SendMessageRequest from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart from onyx.server.query_and_chat.streaming_models import AgentResponseStart
@@ -1027,68 +1025,6 @@ def llm_loop_completion_handle(
) )
def stream_chat_message_objects(
new_msg_req: CreateChatMessageRequest,
user: User,
db_session: Session,
# if specified, uses the last user message and does not create a new user message based
# on the `new_msg_req.message`. Currently, requires a state where the last message is a
litellm_additional_headers: dict[str, str] | None = None,
custom_tool_additional_headers: dict[str, str] | None = None,
bypass_acl: bool = False,
# Additional context that should be included in the chat history, for example:
# Slack threads where the conversation cannot be represented by a chain of User/Assistant
# messages. Both of the below are used for Slack
# NOTE: is not stored in the database, only passed in to the LLM as context
additional_context: str | None = None,
# Slack context for federated Slack search
slack_context: SlackContext | None = None,
) -> AnswerStream:
forced_tool_id = (
new_msg_req.forced_tool_ids[0] if new_msg_req.forced_tool_ids else None
)
if (
new_msg_req.retrieval_options
and new_msg_req.retrieval_options.run_search == OptionalSearchSetting.ALWAYS
):
all_tools = get_tools(db_session)
search_tool_id = next(
(tool.id for tool in all_tools if tool.in_code_tool_id == SEARCH_TOOL_ID),
None,
)
forced_tool_id = search_tool_id
translated_new_msg_req = SendMessageRequest(
message=new_msg_req.message,
llm_override=new_msg_req.llm_override,
mock_llm_response=new_msg_req.mock_llm_response,
allowed_tool_ids=new_msg_req.allowed_tool_ids,
forced_tool_id=forced_tool_id,
file_descriptors=new_msg_req.file_descriptors,
internal_search_filters=(
new_msg_req.retrieval_options.filters
if new_msg_req.retrieval_options
else None
),
deep_research=new_msg_req.deep_research,
parent_message_id=new_msg_req.parent_message_id,
chat_session_id=new_msg_req.chat_session_id,
origin=new_msg_req.origin,
include_citations=new_msg_req.include_citations,
)
return handle_stream_message_objects(
new_msg_req=translated_new_msg_req,
user=user,
db_session=db_session,
litellm_additional_headers=litellm_additional_headers,
custom_tool_additional_headers=custom_tool_additional_headers,
bypass_acl=bypass_acl,
additional_context=additional_context,
slack_context=slack_context,
)
def remove_answer_citations(answer: str) -> str: def remove_answer_citations(answer: str) -> str:
pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)" pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)"

View File

@@ -6,7 +6,6 @@ from uuid import UUID
from pydantic import BaseModel from pydantic import BaseModel
from pydantic import Field from pydantic import Field
from pydantic import field_validator
from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSource
from onyx.db.models import SearchSettings from onyx.db.models import SearchSettings
@@ -97,21 +96,6 @@ class IndexFilters(BaseFilters, UserFileFilters, AssistantKnowledgeFilters):
tenant_id: str | None = None tenant_id: str | None = None
class ChunkContext(BaseModel):
# If not specified (None), picked up from Persona settings if there is space
# if specified (even if 0), it always uses the specified number of chunks above and below
chunks_above: int | None = None
chunks_below: int | None = None
full_doc: bool = False
@field_validator("chunks_above", "chunks_below")
@classmethod
def check_non_negative(cls, value: int, field: Any) -> int:
if value is not None and value < 0:
raise ValueError(f"{field.name} must be non-negative")
return value
class BasicChunkRequest(BaseModel): class BasicChunkRequest(BaseModel):
query: str query: str

View File

@@ -19,7 +19,6 @@ from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import DocumentRelevance
from onyx.configs.chat_configs import HARD_DELETE_CHATS from onyx.configs.chat_configs import HARD_DELETE_CHATS
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
from onyx.context.search.models import InferenceSection from onyx.context.search.models import InferenceSection
@@ -672,27 +671,6 @@ def set_as_latest_chat_message(
db_session.commit() db_session.commit()
def update_search_docs_table_with_relevance(
db_session: Session,
reference_db_search_docs: list[DBSearchDoc],
relevance_summary: DocumentRelevance,
) -> None:
for search_doc in reference_db_search_docs:
relevance_data = relevance_summary.relevance_summaries.get(
search_doc.document_id
)
if relevance_data is not None:
db_session.execute(
update(DBSearchDoc)
.where(DBSearchDoc.id == search_doc.id)
.values(
is_relevant=relevance_data.relevant,
relevance_explanation=relevance_data.content,
)
)
db_session.commit()
def _sanitize_for_postgres(value: str) -> str: def _sanitize_for_postgres(value: str) -> str:
"""Remove NUL (0x00) characters from strings as PostgreSQL doesn't allow them.""" """Remove NUL (0x00) characters from strings as PostgreSQL doesn't allow them."""
sanitized = value.replace("\x00", "") sanitized = value.replace("\x00", "")

View File

@@ -4,23 +4,21 @@ from collections.abc import Generator
from contextlib import contextmanager from contextlib import contextmanager
from typing import Any from typing import Any
from pydantic import BaseModel
from sqlalchemy import Engine from sqlalchemy import Engine
from sqlalchemy import event from sqlalchemy import event
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.session import SessionTransaction from sqlalchemy.orm.session import SessionTransaction
from onyx.chat.models import MessageResponseIDInfo from onyx.chat.chat_state import ChatStateContainer
from onyx.chat.models import StreamingError from onyx.chat.models import ChatFullResponse
from onyx.chat.process_message import AnswerStream from onyx.chat.process_message import gather_stream_full
from onyx.chat.process_message import handle_stream_message_objects from onyx.chat.process_message import handle_stream_message_objects
from onyx.chat.process_message import remove_answer_citations
from onyx.chat.process_message import stream_chat_message_objects
from onyx.configs.constants import DEFAULT_PERSONA_ID from onyx.configs.constants import DEFAULT_PERSONA_ID
from onyx.db.chat import create_chat_session from onyx.db.chat import create_chat_session
from onyx.db.engine.sql_engine import get_sqlalchemy_engine from onyx.db.engine.sql_engine import get_sqlalchemy_engine
from onyx.db.users import get_user_by_email from onyx.db.users import get_user_by_email
from onyx.evals.models import ChatFullEvalResult
from onyx.evals.models import EvalationAck from onyx.evals.models import EvalationAck
from onyx.evals.models import EvalConfigurationOptions from onyx.evals.models import EvalConfigurationOptions
from onyx.evals.models import EvalMessage from onyx.evals.models import EvalMessage
@@ -33,18 +31,7 @@ from onyx.evals.provider import get_provider
from onyx.llm.override_models import LLMOverride from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import ChatSessionCreationRequest from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.server.query_and_chat.models import RetrievalDetails
from onyx.server.query_and_chat.models import SendMessageRequest from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import CitationInfo
from onyx.server.query_and_chat.streaming_models import CustomToolStart
from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
from onyx.server.query_and_chat.streaming_models import OpenUrlStart
from onyx.server.query_and_chat.streaming_models import Packet
from onyx.server.query_and_chat.streaming_models import PythonToolStart
from onyx.server.query_and_chat.streaming_models import SearchToolStart
from onyx.utils.logger import setup_logger from onyx.utils.logger import setup_logger
from shared_configs.contextvars import get_current_tenant_id from shared_configs.contextvars import get_current_tenant_id
@@ -87,193 +74,29 @@ def isolated_ephemeral_session_factory(
conn.close() conn.close()
class GatherStreamResult(BaseModel): def _chat_full_response_to_eval_result(
"""Result of gathering a stream with tool call information.""" full: ChatFullResponse,
stream_start_time: float,
answer: str ) -> ChatFullEvalResult:
answer_citationless: str """Map ChatFullResponse from gather_stream_full to eval result components."""
tools_called: list[str] tools_called = [tc.tool_name for tc in full.tool_calls]
tool_call_details: list[dict[str, Any]] tool_call_details: list[dict[str, Any]] = [
message_id: int {"tool_name": tc.tool_name, "tool_arguments": tc.tool_arguments}
error_msg: str | None = None for tc in full.tool_calls
citations: list[CitationInfo] = [] ]
timings: EvalTimings | None = None
def gather_stream_with_tools(packets: AnswerStream) -> GatherStreamResult:
"""
Gather streaming packets and extract both answer content and tool call information.
Returns a GatherStreamResult containing the answer and all tools that were called.
"""
stream_start_time = time.time()
answer: str | None = None
citations: list[CitationInfo] = []
error_msg: str | None = None
message_id: int | None = None
tools_called: list[str] = []
tool_call_details: list[dict[str, Any]] = []
# Timing tracking
first_token_time: float | None = None
tool_start_times: dict[str, float] = {} # tool_name -> start time
tool_execution_ms: dict[str, float] = {} # tool_name -> duration in ms
current_tool: str | None = None
def _finalize_tool_timing(tool_name: str) -> None:
"""Record the duration for a tool that just finished."""
if tool_name in tool_start_times:
duration_ms = (time.time() - tool_start_times[tool_name]) * 1000
tool_execution_ms[tool_name] = duration_ms
for packet in packets:
if isinstance(packet, Packet):
obj = packet.obj
# Handle answer content
if isinstance(obj, AgentResponseStart):
# When answer starts, finalize any in-progress tool
if current_tool:
_finalize_tool_timing(current_tool)
current_tool = None
elif isinstance(obj, AgentResponseDelta):
if answer is None:
answer = ""
first_token_time = time.time()
if obj.content:
answer += obj.content
elif isinstance(obj, CitationInfo):
citations.append(obj)
# Track tool calls with timing
elif isinstance(obj, SearchToolStart):
# Finalize any previous tool
if current_tool:
_finalize_tool_timing(current_tool)
tool_name = "WebSearchTool" if obj.is_internet_search else "SearchTool"
current_tool = tool_name
tool_start_times[tool_name] = time.time()
tools_called.append(tool_name)
tool_call_details.append(
{
"tool_name": tool_name,
"tool_type": "search",
"is_internet_search": obj.is_internet_search,
}
)
elif isinstance(obj, ImageGenerationToolStart):
if current_tool:
_finalize_tool_timing(current_tool)
tool_name = "ImageGenerationTool"
current_tool = tool_name
tool_start_times[tool_name] = time.time()
tools_called.append(tool_name)
tool_call_details.append(
{
"tool_name": tool_name,
"tool_type": "image_generation",
}
)
elif isinstance(obj, PythonToolStart):
if current_tool:
_finalize_tool_timing(current_tool)
tool_name = "PythonTool"
current_tool = tool_name
tool_start_times[tool_name] = time.time()
tools_called.append(tool_name)
tool_call_details.append(
{
"tool_name": tool_name,
"tool_type": "python",
"code": obj.code,
}
)
elif isinstance(obj, OpenUrlStart):
if current_tool:
_finalize_tool_timing(current_tool)
tool_name = "OpenURLTool"
current_tool = tool_name
tool_start_times[tool_name] = time.time()
tools_called.append(tool_name)
tool_call_details.append(
{
"tool_name": tool_name,
"tool_type": "open_url",
}
)
elif isinstance(obj, CustomToolStart):
if current_tool:
_finalize_tool_timing(current_tool)
tool_name = obj.tool_name
current_tool = tool_name
tool_start_times[tool_name] = time.time()
tools_called.append(tool_name)
tool_call_details.append(
{
"tool_name": tool_name,
"tool_type": "custom",
}
)
elif isinstance(packet, StreamingError):
logger.warning(f"Streaming error during eval: {packet.error}")
error_msg = packet.error
elif isinstance(packet, MessageResponseIDInfo):
message_id = packet.reserved_assistant_message_id
# Finalize any remaining tool timing
if current_tool:
_finalize_tool_timing(current_tool)
stream_end_time = time.time() stream_end_time = time.time()
if message_id is None:
# If we got a streaming error, include it in the exception
if error_msg:
raise ValueError(f"Message ID is required. Stream error: {error_msg}")
raise ValueError(
f"Message ID is required. No MessageResponseIDInfo received. "
f"Tools called: {tools_called}"
)
# Allow empty answers for tool-only turns (e.g., in multi-turn evals)
# Some turns may only execute tools without generating a text response
if answer is None:
logger.warning(
"No answer content generated. Tools called: %s. "
"This may be expected for tool-only turns.",
tools_called,
)
answer = ""
# Calculate timings
total_ms = (stream_end_time - stream_start_time) * 1000 total_ms = (stream_end_time - stream_start_time) * 1000
first_token_ms = (
(first_token_time - stream_start_time) * 1000 if first_token_time else None
)
stream_processing_ms = (stream_end_time - stream_start_time) * 1000
timings = EvalTimings( timings = EvalTimings(
total_ms=total_ms, total_ms=total_ms,
llm_first_token_ms=first_token_ms, llm_first_token_ms=None,
tool_execution_ms=tool_execution_ms, tool_execution_ms={},
stream_processing_ms=stream_processing_ms, stream_processing_ms=total_ms,
) )
return ChatFullEvalResult(
return GatherStreamResult( answer=full.answer,
answer=answer,
answer_citationless=remove_answer_citations(answer),
tools_called=tools_called, tools_called=tools_called,
tool_call_details=tool_call_details, tool_call_details=tool_call_details,
message_id=message_id, citations=full.citation_info,
error_msg=error_msg,
citations=citations,
timings=timings, timings=timings,
) )
@@ -413,14 +236,17 @@ def _get_answer_with_tools(
), ),
) )
stream_start_time = time.time()
state_container = ChatStateContainer()
packets = handle_stream_message_objects( packets = handle_stream_message_objects(
new_msg_req=request, new_msg_req=request,
user=user, user=user,
db_session=db_session, db_session=db_session,
external_state_container=state_container,
) )
full = gather_stream_full(packets, state_container)
# Gather stream with tool call tracking result = _chat_full_response_to_eval_result(full, stream_start_time)
result = gather_stream_with_tools(packets)
# Evaluate tool assertions # Evaluate tool assertions
assertion_passed, assertion_details = evaluate_tool_assertions( assertion_passed, assertion_details = evaluate_tool_assertions(
@@ -551,30 +377,30 @@ def _get_multi_turn_answer_with_tools(
), ),
) )
# Create request for this turn # Create request for this turn using SendMessageRequest (same API as handle_stream_message_objects)
# Use AUTO_PLACE_AFTER_LATEST_MESSAGE to chain messages # Use AUTO_PLACE_AFTER_LATEST_MESSAGE to chain messages
request = CreateChatMessageRequest( forced_tool_id = forced_tool_ids[0] if forced_tool_ids else None
request = SendMessageRequest(
chat_session_id=chat_session_id, chat_session_id=chat_session_id,
parent_message_id=AUTO_PLACE_AFTER_LATEST_MESSAGE, parent_message_id=AUTO_PLACE_AFTER_LATEST_MESSAGE,
message=msg.message, message=msg.message,
file_descriptors=[],
search_doc_ids=None,
retrieval_options=RetrievalDetails(),
llm_override=llm_override, llm_override=llm_override,
persona_override_config=full_configuration.persona_override_config,
skip_gen_ai_answer_generation=False,
allowed_tool_ids=full_configuration.allowed_tool_ids, allowed_tool_ids=full_configuration.allowed_tool_ids,
forced_tool_ids=forced_tool_ids or None, forced_tool_id=forced_tool_id,
) )
# Stream and gather results for this turn # Stream and gather results for this turn via handle_stream_message_objects + gather_stream_full
packets = stream_chat_message_objects( stream_start_time = time.time()
state_container = ChatStateContainer()
packets = handle_stream_message_objects(
new_msg_req=request, new_msg_req=request,
user=user, user=user,
db_session=db_session, db_session=db_session,
external_state_container=state_container,
) )
full = gather_stream_full(packets, state_container)
result = gather_stream_with_tools(packets) result = _chat_full_response_to_eval_result(full, stream_start_time)
# Evaluate tool assertions for this turn # Evaluate tool assertions for this turn
assertion_passed, assertion_details = evaluate_tool_assertions( assertion_passed, assertion_details = evaluate_tool_assertions(

View File

@@ -7,9 +7,6 @@ from pydantic import BaseModel
from pydantic import Field from pydantic import Field
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import PersonaOverrideConfig
from onyx.chat.models import PromptOverrideConfig
from onyx.chat.models import ToolConfig
from onyx.db.tools import get_builtin_tool from onyx.db.tools import get_builtin_tool
from onyx.llm.override_models import LLMOverride from onyx.llm.override_models import LLMOverride
from onyx.server.query_and_chat.streaming_models import CitationInfo from onyx.server.query_and_chat.streaming_models import CitationInfo
@@ -34,6 +31,16 @@ class EvalTimings(BaseModel):
stream_processing_ms: float | None = None # Time to process the stream stream_processing_ms: float | None = None # Time to process the stream
class ChatFullEvalResult(BaseModel):
"""Raw eval components from ChatFullResponse (before tool assertions)."""
answer: str
tools_called: list[str]
tool_call_details: list[dict[str, Any]]
citations: list[CitationInfo]
timings: EvalTimings
class EvalToolResult(BaseModel): class EvalToolResult(BaseModel):
"""Result of a single eval with tool call information.""" """Result of a single eval with tool call information."""
@@ -72,8 +79,6 @@ class MultiTurnEvalResult(BaseModel):
class EvalConfiguration(BaseModel): class EvalConfiguration(BaseModel):
builtin_tool_types: list[str] = Field(default_factory=list)
persona_override_config: PersonaOverrideConfig | None = None
llm: LLMOverride = Field(default_factory=LLMOverride) llm: LLMOverride = Field(default_factory=LLMOverride)
search_permissions_email: str search_permissions_email: str
allowed_tool_ids: list[int] allowed_tool_ids: list[int]
@@ -81,7 +86,6 @@ class EvalConfiguration(BaseModel):
class EvalConfigurationOptions(BaseModel): class EvalConfigurationOptions(BaseModel):
builtin_tool_types: list[str] = list(BUILT_IN_TOOL_MAP.keys()) builtin_tool_types: list[str] = list(BUILT_IN_TOOL_MAP.keys())
persona_override_config: PersonaOverrideConfig | None = None
llm: LLMOverride = LLMOverride( llm: LLMOverride = LLMOverride(
model_provider=None, model_provider=None,
model_version="gpt-4o", model_version="gpt-4o",
@@ -96,26 +100,7 @@ class EvalConfigurationOptions(BaseModel):
experiment_name: str | None = None experiment_name: str | None = None
def get_configuration(self, db_session: Session) -> EvalConfiguration: def get_configuration(self, db_session: Session) -> EvalConfiguration:
persona_override_config = self.persona_override_config or PersonaOverrideConfig(
name="Eval",
description="A persona for evaluation",
tools=[
ToolConfig(id=get_builtin_tool(db_session, BUILT_IN_TOOL_MAP[tool]).id)
for tool in self.builtin_tool_types
],
prompts=[
PromptOverrideConfig(
name="Default",
description="Default prompt for evaluation",
system_prompt="You are a helpful assistant.",
task_prompt="",
datetime_aware=True,
)
],
)
return EvalConfiguration( return EvalConfiguration(
persona_override_config=persona_override_config,
llm=self.llm, llm=self.llm,
search_permissions_email=self.search_permissions_email, search_permissions_email=self.search_permissions_email,
allowed_tool_ids=[ allowed_tool_ids=[

View File

@@ -2,7 +2,6 @@ from collections.abc import Callable
from typing import Any from typing import Any
from onyx.auth.schemas import UserRole from onyx.auth.schemas import UserRole
from onyx.chat.models import PersonaOverrideConfig
from onyx.configs.model_configs import GEN_AI_TEMPERATURE from onyx.configs.model_configs import GEN_AI_TEMPERATURE
from onyx.db.engine.sql_engine import get_session_with_current_tenant from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.enums import LLMModelFlowType from onyx.db.enums import LLMModelFlowType
@@ -77,7 +76,7 @@ def _build_model_kwargs(
def get_llm_for_persona( def get_llm_for_persona(
persona: Persona | PersonaOverrideConfig | None, persona: Persona | None,
user: User, user: User,
llm_override: LLMOverride | None = None, llm_override: LLMOverride | None = None,
additional_headers: dict[str, str] | None = None, additional_headers: dict[str, str] | None = None,
@@ -102,20 +101,16 @@ def get_llm_for_persona(
if not provider_model: if not provider_model:
raise ValueError("No LLM provider found") raise ValueError("No LLM provider found")
# Only check access control for database Persona entities, not PersonaOverrideConfig
# PersonaOverrideConfig is used for temporary overrides and doesn't have access restrictions
persona_model = persona if isinstance(persona, Persona) else None
# Fetch user group IDs for access control check # Fetch user group IDs for access control check
user_group_ids = fetch_user_group_ids(db_session, user) user_group_ids = fetch_user_group_ids(db_session, user)
if not can_user_access_llm_provider( if not can_user_access_llm_provider(
provider_model, user_group_ids, persona_model, user.role == UserRole.ADMIN provider_model, user_group_ids, persona, user.role == UserRole.ADMIN
): ):
logger.warning( logger.warning(
"User %s with persona %s cannot access provider %s. Falling back to default provider.", "User %s with persona %s cannot access provider %s. Falling back to default provider.",
user.id, user.id,
getattr(persona_model, "id", None), persona.id,
provider_model.name, provider_model.name,
) )
return get_default_llm( return get_default_llm(

View File

@@ -85,7 +85,7 @@ def send_message(
Enforces rate limiting before executing the agent (via dependency). Enforces rate limiting before executing the agent (via dependency).
Returns a Server-Sent Events (SSE) stream with the agent's response. Returns a Server-Sent Events (SSE) stream with the agent's response.
Follows the same pattern as /chat/send-message for consistency. Follows the same pattern as /chat/send-chat-message for consistency.
""" """
def stream_generator() -> Generator[str, None, None]: def stream_generator() -> Generator[str, None, None]:

View File

@@ -30,7 +30,6 @@ from onyx.chat.models import ChatFullResponse
from onyx.chat.models import CreateChatSessionID from onyx.chat.models import CreateChatSessionID
from onyx.chat.process_message import gather_stream_full from onyx.chat.process_message import gather_stream_full
from onyx.chat.process_message import handle_stream_message_objects from onyx.chat.process_message import handle_stream_message_objects
from onyx.chat.process_message import stream_chat_message_objects
from onyx.chat.prompt_utils import get_default_base_system_prompt from onyx.chat.prompt_utils import get_default_base_system_prompt
from onyx.chat.stop_signal_checker import set_fence from onyx.chat.stop_signal_checker import set_fence
from onyx.configs.app_configs import WEB_DOMAIN from onyx.configs.app_configs import WEB_DOMAIN
@@ -40,8 +39,6 @@ from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import PUBLIC_API_TAGS from onyx.configs.constants import PUBLIC_API_TAGS
from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS
from onyx.db.chat import add_chats_to_session_from_slack_thread from onyx.db.chat import add_chats_to_session_from_slack_thread
from onyx.db.chat import create_chat_session
from onyx.db.chat import create_new_chat_message
from onyx.db.chat import delete_all_chat_sessions_for_user from onyx.db.chat import delete_all_chat_sessions_for_user
from onyx.db.chat import delete_chat_session from onyx.db.chat import delete_chat_session
from onyx.db.chat import duplicate_chat_session_for_user_from_slack from onyx.db.chat import duplicate_chat_session_for_user_from_slack
@@ -49,7 +46,6 @@ from onyx.db.chat import get_chat_message
from onyx.db.chat import get_chat_messages_by_session from onyx.db.chat import get_chat_messages_by_session
from onyx.db.chat import get_chat_session_by_id from onyx.db.chat import get_chat_session_by_id
from onyx.db.chat import get_chat_sessions_by_user from onyx.db.chat import get_chat_sessions_by_user
from onyx.db.chat import get_or_create_root_message
from onyx.db.chat import set_as_latest_chat_message from onyx.db.chat import set_as_latest_chat_message
from onyx.db.chat import translate_db_message_to_chat_message_detail from onyx.db.chat import translate_db_message_to_chat_message_detail
from onyx.db.chat import update_chat_session from onyx.db.chat import update_chat_session
@@ -71,7 +67,6 @@ from onyx.llm.constants import LlmProviderNames
from onyx.llm.factory import get_default_llm from onyx.llm.factory import get_default_llm
from onyx.llm.factory import get_llm_for_persona from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llm_token_counter from onyx.llm.factory import get_llm_token_counter
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.redis.redis_pool import get_redis_client from onyx.redis.redis_pool import get_redis_client
from onyx.secondary_llm_flows.chat_session_naming import generate_chat_session_name from onyx.secondary_llm_flows.chat_session_naming import generate_chat_session_name
from onyx.server.api_key_usage import check_api_key_usage from onyx.server.api_key_usage import check_api_key_usage
@@ -86,10 +81,7 @@ from onyx.server.query_and_chat.models import ChatSessionGroup
from onyx.server.query_and_chat.models import ChatSessionsResponse from onyx.server.query_and_chat.models import ChatSessionsResponse
from onyx.server.query_and_chat.models import ChatSessionSummary from onyx.server.query_and_chat.models import ChatSessionSummary
from onyx.server.query_and_chat.models import ChatSessionUpdateRequest from onyx.server.query_and_chat.models import ChatSessionUpdateRequest
from onyx.server.query_and_chat.models import CreateChatMessageRequest
from onyx.server.query_and_chat.models import LLMOverride
from onyx.server.query_and_chat.models import MessageOrigin from onyx.server.query_and_chat.models import MessageOrigin
from onyx.server.query_and_chat.models import PromptOverride
from onyx.server.query_and_chat.models import RenameChatSessionResponse from onyx.server.query_and_chat.models import RenameChatSessionResponse
from onyx.server.query_and_chat.models import SendMessageRequest from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest from onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest
@@ -503,71 +495,8 @@ def delete_chat_session_by_id(
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))
# WARNING: this endpoint is deprecated and will be removed soon. Use the new send-chat-message endpoint instead. # NOTE: This endpoint is extremely central to the application, any changes to it should be reviewed and approved by an experienced
@router.post("/send-message") # team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
def handle_new_chat_message(
chat_message_req: CreateChatMessageRequest,
request: Request,
user: User = Depends(current_chat_accessible_user),
_rate_limit_check: None = Depends(check_token_rate_limits),
_api_key_usage_check: None = Depends(check_api_key_usage),
) -> StreamingResponse:
"""
This endpoint is both used for all the following purposes:
- Sending a new message in the session
- Regenerating a message in the session (just send the same one again)
- Editing a message (similar to regenerating but sending a different message)
- Kicking off a seeded chat session (set `use_existing_user_message`)
Assumes that previous messages have been set as the latest to minimize overhead.
Args:
chat_message_req (CreateChatMessageRequest): Details about the new chat message.
request (Request): The current HTTP request context.
user (User): The current user, obtained via dependency injection.
_ (None): Rate limit check is run if user/group/global rate limits are enabled.
Returns:
StreamingResponse: Streams the response to the new chat message.
"""
tenant_id = get_current_tenant_id()
logger.debug(f"Received new chat message: {chat_message_req.message}")
if not chat_message_req.message and not chat_message_req.use_existing_user_message:
raise HTTPException(status_code=400, detail="Empty chat message is invalid")
mt_cloud_telemetry(
tenant_id=tenant_id,
distinct_id=tenant_id if user.is_anonymous else user.email,
event=MilestoneRecordType.RAN_QUERY,
)
def stream_generator() -> Generator[str, None, None]:
try:
with get_session_with_current_tenant() as db_session:
for obj in stream_chat_message_objects(
new_msg_req=chat_message_req,
user=user,
db_session=db_session,
litellm_additional_headers=extract_headers(
request.headers, LITELLM_PASS_THROUGH_HEADERS
),
custom_tool_additional_headers=get_custom_tool_additional_request_headers(
request.headers
),
):
yield get_json_line(obj.model_dump())
except Exception as e:
logger.exception("Error in chat message streaming")
yield json.dumps({"error": str(e)})
finally:
logger.debug("Stream generator finished")
return StreamingResponse(stream_generator(), media_type="text/event-stream")
@router.post( @router.post(
"/send-chat-message", "/send-chat-message",
response_model=ChatFullResponse, response_model=ChatFullResponse,
@@ -815,77 +744,6 @@ def get_available_context_tokens_for_session(
"""Endpoints for chat seeding""" """Endpoints for chat seeding"""
class ChatSeedRequest(BaseModel):
# standard chat session stuff
persona_id: int
# overrides / seeding
llm_override: LLMOverride | None = None
prompt_override: PromptOverride | None = None
description: str | None = None
message: str | None = None
# TODO: support this
# initial_message_retrieval_options: RetrievalDetails | None = None
class ChatSeedResponse(BaseModel):
redirect_url: str
@router.post("/seed-chat-session", tags=PUBLIC_API_TAGS)
def seed_chat(
chat_seed_request: ChatSeedRequest,
# NOTE: This endpoint is designed for programmatic access (API keys, external services)
# rather than authenticated user sessions. The user parameter is used for access control
# but the created chat session is "unassigned" (user_id=None) until a user visits the web UI.
# This allows external systems to pre-seed chat sessions that users can then access.
user: User = Depends(current_chat_accessible_user),
db_session: Session = Depends(get_session),
) -> ChatSeedResponse:
try:
new_chat_session = create_chat_session(
db_session=db_session,
description=chat_seed_request.description or "",
user_id=None, # this chat session is "unassigned" until a user visits the web UI
persona_id=chat_seed_request.persona_id,
llm_override=chat_seed_request.llm_override,
prompt_override=chat_seed_request.prompt_override,
)
except Exception as e:
logger.exception(e)
raise HTTPException(status_code=400, detail="Invalid Persona provided.")
if chat_seed_request.message is not None:
root_message = get_or_create_root_message(
chat_session_id=new_chat_session.id, db_session=db_session
)
llm = get_llm_for_persona(
persona=new_chat_session.persona,
user=user,
)
tokenizer = get_tokenizer(
model_name=llm.config.model_name,
provider_type=llm.config.model_provider,
)
token_count = len(tokenizer.encode(chat_seed_request.message))
create_new_chat_message(
chat_session_id=new_chat_session.id,
parent_message=root_message,
message=chat_seed_request.message,
token_count=token_count,
message_type=MessageType.USER,
db_session=db_session,
)
return ChatSeedResponse(
redirect_url=f"{WEB_DOMAIN}/chat?chatId={new_chat_session.id}&seeded=true"
)
class SeedChatFromSlackRequest(BaseModel): class SeedChatFromSlackRequest(BaseModel):
chat_session_id: UUID chat_session_id: UUID

View File

@@ -1,18 +1,15 @@
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from typing import Any from typing import Any
from typing import TYPE_CHECKING
from uuid import UUID from uuid import UUID
from pydantic import BaseModel from pydantic import BaseModel
from pydantic import model_validator from pydantic import model_validator
from onyx.chat.models import PersonaOverrideConfig
from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSource
from onyx.configs.constants import MessageType from onyx.configs.constants import MessageType
from onyx.configs.constants import SessionType from onyx.configs.constants import SessionType
from onyx.context.search.models import BaseFilters from onyx.context.search.models import BaseFilters
from onyx.context.search.models import ChunkContext
from onyx.context.search.models import SavedSearchDoc from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc from onyx.context.search.models import SearchDoc
from onyx.context.search.models import Tag from onyx.context.search.models import Tag
@@ -20,7 +17,6 @@ from onyx.db.enums import ChatSessionSharedStatus
from onyx.db.models import ChatSession from onyx.db.models import ChatSession
from onyx.file_store.models import FileDescriptor from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride
from onyx.server.query_and_chat.streaming_models import Packet from onyx.server.query_and_chat.streaming_models import Packet
@@ -40,8 +36,9 @@ class MessageOrigin(str, Enum):
UNSET = "unset" UNSET = "unset"
if TYPE_CHECKING: class MessageResponseIDInfo(BaseModel):
pass user_message_id: int | None
reserved_assistant_message_id: int
class SourceTag(Tag): class SourceTag(Tag):
@@ -83,6 +80,8 @@ class ChatFeedbackRequest(BaseModel):
return self return self
# NOTE: This model is used for the core flow of the Onyx application, any changes to it should be reviewed and approved by an
# experienced team member. It is very important to 1. avoid bloat and 2. that this remains backwards compatible across versions.
class SendMessageRequest(BaseModel): class SendMessageRequest(BaseModel):
message: str message: str
@@ -141,115 +140,6 @@ class SendMessageRequest(BaseModel):
return self return self
class OptionalSearchSetting(str, Enum):
ALWAYS = "always"
NEVER = "never"
# Determine whether to run search based on history and latest query
AUTO = "auto"
class RetrievalDetails(ChunkContext):
# Use LLM to determine whether to do a retrieval or only rely on existing history
# If the Persona is configured to not run search (0 chunks), this is bypassed
# If no Prompt is configured, the only search results are shown, this is bypassed
run_search: OptionalSearchSetting = OptionalSearchSetting.AUTO
# Is this a real-time/streaming call or a question where Onyx can take more time?
# Used to determine reranking flow
real_time: bool = True
# The following have defaults in the Persona settings which can be overridden via
# the query, if None, then use Persona settings
filters: BaseFilters | None = None
enable_auto_detect_filters: bool | None = None
# if None, no offset / limit
offset: int | None = None
limit: int | None = None
# If this is set, only the highest matching chunk (or merged chunks) is returned
dedupe_docs: bool = False
class CreateChatMessageRequest(ChunkContext):
"""Before creating messages, be sure to create a chat_session and get an id"""
chat_session_id: UUID
# This is the primary-key (unique identifier) for the previous message of the tree
parent_message_id: int | None
# New message contents
message: str
# Files that we should attach to this message
file_descriptors: list[FileDescriptor] = []
# Prompts are embedded in personas, so no separate prompt_id needed
# If search_doc_ids provided, it should use those docs explicitly
search_doc_ids: list[int] | None
retrieval_options: RetrievalDetails | None
# allows the caller to specify the exact search query they want to use
# will disable Query Rewording if specified
query_override: str | None = None
# enables additional handling to ensure that we regenerate with a given user message ID
regenerate: bool | None = None
# allows the caller to override the Persona / Prompt
# these do not persist in the chat thread details
llm_override: LLMOverride | None = None
# Test-only override for deterministic LiteLLM mock responses.
mock_llm_response: str | None = None
prompt_override: PromptOverride | None = None
# Allows the caller to override the temperature for the chat session
# this does persist in the chat thread details
temperature_override: float | None = None
# allow user to specify an alternate assistant
alternate_assistant_id: int | None = None
# This takes the priority over the prompt_override
# This won't be a type that's passed in directly from the API
persona_override_config: PersonaOverrideConfig | None = None
# used for seeded chats to kick off the generation of an AI answer
use_existing_user_message: bool = False
# used for "OpenAI Assistants API"
existing_assistant_message_id: int | None = None
# forces the LLM to return a structured response, see
# https://platform.openai.com/docs/guides/structured-outputs/introduction
structured_response_format: dict | None = None
skip_gen_ai_answer_generation: bool = False
# List of allowed tool IDs to restrict tool usage. If not provided, all tools available to the persona will be used.
allowed_tool_ids: list[int] | None = None
# List of tool IDs we MUST use.
# TODO: make this a single one since unclear how to force this for multiple at a time.
forced_tool_ids: list[int] | None = None
deep_research: bool = False
# When True (default), enables citation generation with markers and CitationInfo packets
# When False, disables citations: removes markers like [1], [2] and skips CitationInfo packets
include_citations: bool = True
# Origin of the message for telemetry tracking
origin: MessageOrigin = MessageOrigin.UNKNOWN
@model_validator(mode="after")
def check_search_doc_ids_or_retrieval_options(self) -> "CreateChatMessageRequest":
if self.search_doc_ids is None and self.retrieval_options is None:
raise ValueError(
"Either search_doc_ids or retrieval_options must be provided, but not both or neither."
)
return self
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
data = super().model_dump(*args, **kwargs)
data["chat_session_id"] = str(data["chat_session_id"])
return data
class ChatMessageIdentifier(BaseModel): class ChatMessageIdentifier(BaseModel):
message_id: int message_id: int
@@ -365,13 +255,3 @@ class ChatSearchResponse(BaseModel):
groups: list[ChatSessionGroup] groups: list[ChatSessionGroup]
has_more: bool has_more: bool
next_page: int | None = None next_page: int | None = None
class ChatSearchRequest(BaseModel):
query: str | None = None
page: int = 1
page_size: int = 10
class CreateChatResponse(BaseModel):
chat_session_id: str

View File

@@ -17,11 +17,12 @@ disallow_untyped_defs = true
warn_unused_ignores = true warn_unused_ignores = true
enable_error_code = ["possibly-undefined"] enable_error_code = ["possibly-undefined"]
strict_equality = true strict_equality = true
# Patterns match paths whether mypy is run from backend/ (CI) or repo root (e.g. VS Code extension with target ./backend)
exclude = [ exclude = [
"^generated/.*", "(?:^|/)generated/",
"^\\.venv/", "(?:^|/)\\.venv/",
"^onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx/", "(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/skills/",
"^onyx/server/features/build/sandbox/kubernetes/docker/templates/venv/", "(?:^|/)onyx/server/features/build/sandbox/kubernetes/docker/templates/",
] ]
[[tool.mypy.overrides]] [[tool.mypy.overrides]]

View File

@@ -23,7 +23,7 @@ def create_new_chat_session(onyx_url: str, api_key: str | None) -> int:
def process_question(onyx_url: str, question: str, api_key: str | None) -> None: def process_question(onyx_url: str, question: str, api_key: str | None) -> None:
message_endpoint = onyx_url + "/api/chat/send-message" message_endpoint = onyx_url + "/api/chat/send-chat-message"
chat_session_id = create_new_chat_session(onyx_url, api_key) chat_session_id = create_new_chat_session(onyx_url, api_key)

View File

@@ -88,7 +88,7 @@ class ChatLoadTester:
token_count = 0 token_count = 0
async with session.post( async with session.post(
f"{self.base_url}/chat/send-message", f"{self.base_url}/chat/send-chat-message",
headers=self.headers, headers=self.headers,
json={ json={
"chat_session_id": chat_session_id, "chat_session_id": chat_session_id,

View File

@@ -4,8 +4,8 @@ from typing import cast
from onyx.chat.models import AnswerStreamPart from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import CreateChatSessionID from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import MessageResponseIDInfo
from onyx.context.search.models import SearchDoc from onyx.context.search.models import SearchDoc
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import AgentResponseStart from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments

View File

@@ -6,9 +6,8 @@ from uuid import uuid4
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import AnswerStreamPart from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import MessageResponseIDInfo
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
from onyx.chat.process_message import stream_chat_message_objects from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session from onyx.db.chat import create_chat_session
from onyx.db.enums import LLMModelFlowType from onyx.db.enums import LLMModelFlowType
from onyx.db.llm import fetch_existing_llm_providers from onyx.db.llm import fetch_existing_llm_providers
@@ -18,8 +17,8 @@ from onyx.db.llm import upsert_llm_provider
from onyx.llm.constants import LlmProviderNames from onyx.llm.constants import LlmProviderNames
from onyx.server.manage.llm.models import LLMProviderUpsertRequest from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import RetrievalDetails from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import AgentResponseStart from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import Packet from onyx.server.query_and_chat.streaming_models import Packet
@@ -70,17 +69,13 @@ def test_answer_with_only_anthropic_provider(
persona_id=0, persona_id=0,
) )
chat_request = CreateChatMessageRequest( chat_request = SendMessageRequest(
chat_session_id=chat_session.id,
parent_message_id=None,
message="hello", message="hello",
file_descriptors=[], chat_session_id=chat_session.id,
search_doc_ids=None,
retrieval_options=RetrievalDetails(),
) )
response_stream: list[AnswerStreamPart] = [] response_stream: list[AnswerStreamPart] = []
for packet in stream_chat_message_objects( for packet in handle_stream_message_objects(
new_msg_req=chat_request, new_msg_req=chat_request,
user=test_user, user=test_user,
db_session=db_session, db_session=db_session,

View File

@@ -4,14 +4,13 @@ from datetime import datetime
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import AnswerStreamPart from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import MessageResponseIDInfo
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
from onyx.chat.process_message import stream_chat_message_objects from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session from onyx.db.chat import create_chat_session
from onyx.db.models import User from onyx.db.models import User
from onyx.db.persona import get_persona_by_id from onyx.db.persona import get_persona_by_id
from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import RetrievalDetails from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
from tests.external_dependency_unit.conftest import create_test_user from tests.external_dependency_unit.conftest import create_test_user
@@ -42,18 +41,12 @@ def test_stream_chat_current_date_response(
persona_id=default_persona.id, persona_id=default_persona.id,
) )
chat_request = CreateChatMessageRequest( chat_request = SendMessageRequest(
chat_session_id=chat_session.id,
parent_message_id=None,
message="Please respond only with the current date in the format 'Weekday Month DD, YYYY'.", message="Please respond only with the current date in the format 'Weekday Month DD, YYYY'.",
file_descriptors=[], chat_session_id=chat_session.id,
prompt_override=None,
search_doc_ids=None,
retrieval_options=RetrievalDetails(),
query_override=None,
) )
gen = stream_chat_message_objects( gen = handle_stream_message_objects(
new_msg_req=chat_request, new_msg_req=chat_request,
user=test_user, user=test_user,
db_session=db_session, db_session=db_session,

View File

@@ -7,8 +7,8 @@ import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import CreateChatSessionID from onyx.chat.models import CreateChatSessionID
from onyx.chat.models import MessageResponseIDInfo
from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSource
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.streaming_models import AgentResponseStart from onyx.server.query_and_chat.streaming_models import AgentResponseStart
from onyx.server.query_and_chat.streaming_models import GeneratedImage from onyx.server.query_and_chat.streaming_models import GeneratedImage
from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal

View File

@@ -6,15 +6,14 @@ import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import AnswerStreamPart from onyx.chat.models import AnswerStreamPart
from onyx.chat.models import MessageResponseIDInfo
from onyx.chat.models import StreamingError from onyx.chat.models import StreamingError
from onyx.chat.process_message import stream_chat_message_objects from onyx.chat.process_message import handle_stream_message_objects
from onyx.db.chat import create_chat_session from onyx.db.chat import create_chat_session
from onyx.db.models import RecencyBiasSetting from onyx.db.models import RecencyBiasSetting
from onyx.db.models import User from onyx.db.models import User
from onyx.db.persona import upsert_persona from onyx.db.persona import upsert_persona
from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.query_and_chat.models import MessageResponseIDInfo
from onyx.server.query_and_chat.models import RetrievalDetails from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
from onyx.server.query_and_chat.streaming_models import Packet from onyx.server.query_and_chat.streaming_models import Packet
from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider from tests.external_dependency_unit.answer.conftest import ensure_default_llm_provider
@@ -100,18 +99,12 @@ def test_stream_chat_message_objects_without_web_search(
persona_id=test_persona.id, persona_id=test_persona.id,
) )
# Create the chat message request with a query that attempts to force web search # Create the chat message request with a query that attempts to force web search
chat_request = CreateChatMessageRequest( chat_request = SendMessageRequest(
chat_session_id=chat_session.id,
parent_message_id=None,
message="run a web search for 'Onyx'", message="run a web search for 'Onyx'",
file_descriptors=[], chat_session_id=chat_session.id,
prompt_override=None,
search_doc_ids=None,
retrieval_options=RetrievalDetails(),
query_override=None,
) )
# Call stream_chat_message_objects # Call handle_stream_message_objects
response_generator = stream_chat_message_objects( response_generator = handle_stream_message_objects(
new_msg_req=chat_request, new_msg_req=chat_request,
user=test_user, user=test_user,
db_session=db_session, db_session=db_session,

View File

@@ -8,7 +8,6 @@ import pytest
from fastapi_users.password import PasswordHelper from fastapi_users.password import PasswordHelper
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from onyx.chat.models import MessageResponseIDInfo
from onyx.db.llm import fetch_existing_llm_provider from onyx.db.llm import fetch_existing_llm_provider
from onyx.db.llm import remove_llm_provider from onyx.db.llm import remove_llm_provider
from onyx.db.llm import update_default_provider from onyx.db.llm import update_default_provider
@@ -21,6 +20,7 @@ from onyx.server.manage.llm.models import LLMProviderUpsertRequest
from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
from onyx.server.query_and_chat.chat_backend import create_new_chat_session from onyx.server.query_and_chat.chat_backend import create_new_chat_session
from onyx.server.query_and_chat.models import ChatSessionCreationRequest from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import MessageResponseIDInfo
from tests.external_dependency_unit.answer.stream_test_assertions import ( from tests.external_dependency_unit.answer.stream_test_assertions import (
assert_answer_stream_part_correct, assert_answer_stream_part_correct,
) )

View File

@@ -29,7 +29,7 @@ def test_create_chat_session_and_send_messages() -> None:
# Send first message # Send first message
first_message = "Hello, this is a test message." first_message = "Hello, this is a test message."
send_message_response = requests.post( send_message_response = requests.post(
f"{base_url}/chat/send-message", f"{base_url}/chat/send-chat-message",
json={ json={
"chat_session_id": chat_session_id, "chat_session_id": chat_session_id,
"message": first_message, "message": first_message,
@@ -43,7 +43,7 @@ def test_create_chat_session_and_send_messages() -> None:
# Send second message # Send second message
second_message = "Can you provide more information?" second_message = "Can you provide more information?"
send_message_response = requests.post( send_message_response = requests.post(
f"{base_url}/chat/send-message", f"{base_url}/chat/send-chat-message",
json={ json={
"chat_session_id": chat_session_id, "chat_session_id": chat_session_id,
"message": second_message, "message": second_message,

View File

@@ -12,10 +12,9 @@ from onyx.context.search.models import SavedSearchDoc
from onyx.context.search.models import SearchDoc from onyx.context.search.models import SearchDoc
from onyx.file_store.models import FileDescriptor from onyx.file_store.models import FileDescriptor
from onyx.llm.override_models import LLMOverride from onyx.llm.override_models import LLMOverride
from onyx.llm.override_models import PromptOverride from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
from onyx.server.query_and_chat.models import ChatSessionCreationRequest from onyx.server.query_and_chat.models import ChatSessionCreationRequest
from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.query_and_chat.models import SendMessageRequest
from onyx.server.query_and_chat.models import RetrievalDetails
from onyx.server.query_and_chat.streaming_models import StreamingType from onyx.server.query_and_chat.streaming_models import StreamingType
from tests.integration.common_utils.constants import API_SERVER_URL from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import GENERAL_HEADERS from tests.integration.common_utils.constants import GENERAL_HEADERS
@@ -104,37 +103,27 @@ class ChatSessionManager:
parent_message_id: int | None = None, parent_message_id: int | None = None,
user_performing_action: DATestUser | None = None, user_performing_action: DATestUser | None = None,
file_descriptors: list[FileDescriptor] | None = None, file_descriptors: list[FileDescriptor] | None = None,
search_doc_ids: list[int] | None = None,
retrieval_options: RetrievalDetails | None = None,
query_override: str | None = None,
regenerate: bool | None = None,
llm_override: LLMOverride | None = None,
prompt_override: PromptOverride | None = None,
alternate_assistant_id: int | None = None,
use_existing_user_message: bool = False,
allowed_tool_ids: list[int] | None = None, allowed_tool_ids: list[int] | None = None,
forced_tool_ids: list[int] | None = None, forced_tool_ids: list[int] | None = None,
chat_session: DATestChatSession | None = None, chat_session: DATestChatSession | None = None,
mock_llm_response: str | None = None, mock_llm_response: str | None = None,
deep_research: bool = False, deep_research: bool = False,
llm_override: LLMOverride | None = None,
) -> StreamedResponse: ) -> StreamedResponse:
chat_message_req = CreateChatMessageRequest( chat_message_req = SendMessageRequest(
chat_session_id=chat_session_id,
parent_message_id=parent_message_id,
message=message, message=message,
chat_session_id=chat_session_id,
parent_message_id=(
parent_message_id
if parent_message_id is not None
else AUTO_PLACE_AFTER_LATEST_MESSAGE
),
file_descriptors=file_descriptors or [], file_descriptors=file_descriptors or [],
search_doc_ids=search_doc_ids or [],
retrieval_options=retrieval_options,
query_override=query_override,
regenerate=regenerate,
llm_override=llm_override,
mock_llm_response=mock_llm_response,
prompt_override=prompt_override,
alternate_assistant_id=alternate_assistant_id,
use_existing_user_message=use_existing_user_message,
allowed_tool_ids=allowed_tool_ids, allowed_tool_ids=allowed_tool_ids,
forced_tool_ids=forced_tool_ids, forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,
mock_llm_response=mock_llm_response,
deep_research=deep_research, deep_research=deep_research,
llm_override=llm_override,
) )
headers = ( headers = (
@@ -145,8 +134,8 @@ class ChatSessionManager:
cookies = user_performing_action.cookies if user_performing_action else None cookies = user_performing_action.cookies if user_performing_action else None
response = requests.post( response = requests.post(
f"{API_SERVER_URL}/chat/send-message", f"{API_SERVER_URL}/chat/send-chat-message",
json=chat_message_req.model_dump(), json=chat_message_req.model_dump(mode="json"),
headers=headers, headers=headers,
stream=True, stream=True,
cookies=cookies, cookies=cookies,
@@ -182,17 +171,11 @@ class ChatSessionManager:
parent_message_id: int | None = None, parent_message_id: int | None = None,
user_performing_action: DATestUser | None = None, user_performing_action: DATestUser | None = None,
file_descriptors: list[FileDescriptor] | None = None, file_descriptors: list[FileDescriptor] | None = None,
search_doc_ids: list[int] | None = None,
query_override: str | None = None,
regenerate: bool | None = None,
llm_override: LLMOverride | None = None,
prompt_override: PromptOverride | None = None,
alternate_assistant_id: int | None = None,
use_existing_user_message: bool = False,
allowed_tool_ids: list[int] | None = None, allowed_tool_ids: list[int] | None = None,
forced_tool_ids: list[int] | None = None, forced_tool_ids: list[int] | None = None,
mock_llm_response: str | None = None, mock_llm_response: str | None = None,
deep_research: bool = False, deep_research: bool = False,
llm_override: LLMOverride | None = None,
) -> None: ) -> None:
""" """
Send a message and simulate client disconnect before stream completes. Send a message and simulate client disconnect before stream completes.
@@ -204,33 +187,25 @@ class ChatSessionManager:
chat_session_id: The chat session ID chat_session_id: The chat session ID
message: The message to send message: The message to send
disconnect_after_packets: Disconnect after receiving this many packets. disconnect_after_packets: Disconnect after receiving this many packets.
If None, disconnect_after_type must be specified.
disconnect_after_type: Disconnect after receiving a packet of this type
(e.g., "message_start", "search_tool_start"). If None,
disconnect_after_packets must be specified.
... (other standard message parameters) ... (other standard message parameters)
Returns: Returns:
StreamedResponse containing data received before disconnect, None. Caller can verify server-side cleanup via get_chat_history etc.
with is_disconnected=True flag set.
""" """
chat_message_req = CreateChatMessageRequest( chat_message_req = SendMessageRequest(
chat_session_id=chat_session_id,
parent_message_id=parent_message_id,
message=message, message=message,
chat_session_id=chat_session_id,
parent_message_id=(
parent_message_id
if parent_message_id is not None
else AUTO_PLACE_AFTER_LATEST_MESSAGE
),
file_descriptors=file_descriptors or [], file_descriptors=file_descriptors or [],
search_doc_ids=search_doc_ids or [],
retrieval_options=RetrievalDetails(), # This will be deprecated soon anyway
query_override=query_override,
regenerate=regenerate,
llm_override=llm_override,
mock_llm_response=mock_llm_response,
prompt_override=prompt_override,
alternate_assistant_id=alternate_assistant_id,
use_existing_user_message=use_existing_user_message,
allowed_tool_ids=allowed_tool_ids, allowed_tool_ids=allowed_tool_ids,
forced_tool_ids=forced_tool_ids, forced_tool_id=forced_tool_ids[0] if forced_tool_ids else None,
mock_llm_response=mock_llm_response,
deep_research=deep_research, deep_research=deep_research,
llm_override=llm_override,
) )
headers = ( headers = (
@@ -243,8 +218,8 @@ class ChatSessionManager:
packets_received = 0 packets_received = 0
with requests.post( with requests.post(
f"{API_SERVER_URL}/chat/send-message", f"{API_SERVER_URL}/chat/send-chat-message",
json=chat_message_req.model_dump(), json=chat_message_req.model_dump(mode="json"),
headers=headers, headers=headers,
stream=True, stream=True,
cookies=cookies, cookies=cookies,

View File

@@ -1,7 +1,5 @@
from onyx.configs import app_configs from onyx.configs import app_configs
from onyx.configs.constants import DocumentSource from onyx.configs.constants import DocumentSource
from onyx.server.query_and_chat.models import OptionalSearchSetting
from onyx.server.query_and_chat.models import RetrievalDetails
from onyx.tools.constants import SEARCH_TOOL_ID from onyx.tools.constants import SEARCH_TOOL_ID
from tests.integration.common_utils.managers.cc_pair import CCPairManager from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.chat import ChatSessionManager from tests.integration.common_utils.managers.chat import ChatSessionManager
@@ -172,7 +170,7 @@ def test_run_search_always_maps_to_forced_search_tool(admin_user: DATestUser) ->
chat_session_id=chat_session.id, chat_session_id=chat_session.id,
message="always run search", message="always run search",
user_performing_action=admin_user, user_performing_action=admin_user,
retrieval_options=RetrievalDetails(run_search=OptionalSearchSetting.ALWAYS), forced_tool_ids=[search_tool_id],
mock_llm_response='{"name":"internal_search","arguments":{"queries":["gamma"]}}', mock_llm_response='{"name":"internal_search","arguments":{"queries":["gamma"]}}',
) )

View File

@@ -196,7 +196,7 @@ members = ["backend", "tools/ods"]
[tool.basedpyright] [tool.basedpyright]
include = ["backend"] include = ["backend"]
exclude = ["backend/generated"] exclude = ["backend/generated", "backend/onyx/server/features/build/sandbox/kubernetes/docker/skills/pptx", "backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/venv"]
typeCheckingMode = "off" typeCheckingMode = "off"
[tool.ruff] [tool.ruff]