Compare commits

..

4 Commits

Author SHA1 Message Date
pablonyx
3ce02ccc01 k 2025-02-16 12:35:52 -08:00
pablonyx
d78c8e2e05 k 2025-02-16 12:35:18 -08:00
pablonyx
501ad93153 remove playwright for now 2025-02-16 12:33:53 -08:00
pablonyx
f4686440ae temporarily silence playwright 2025-02-16 12:32:44 -08:00
54 changed files with 271 additions and 777 deletions

View File

@@ -28,11 +28,11 @@ RUN apt-get update && \
curl \
zip \
ca-certificates \
libgnutls30 \
libblkid1 \
libmount1 \
libsmartcols1 \
libuuid1 \
libgnutls30=3.7.9-2+deb12u3 \
libblkid1=2.38.1-5+deb12u1 \
libmount1=2.38.1-5+deb12u1 \
libsmartcols1=2.38.1-5+deb12u1 \
libuuid1=2.38.1-5+deb12u1 \
libxmlsec1-dev \
pkg-config \
gcc \

View File

@@ -5,14 +5,14 @@ from langgraph.graph import StateGraph
from onyx.agents.agent_search.basic.states import BasicInput
from onyx.agents.agent_search.basic.states import BasicOutput
from onyx.agents.agent_search.basic.states import BasicState
from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
basic_use_tool_response,
)
from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
prepare_tool_input,
)
from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
basic_use_tool_response,
)
from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -33,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
)
graph.add_node(
node="choose_tool",
action=choose_tool,
node="llm_tool_choice",
action=llm_tool_choice,
)
graph.add_node(
node="call_tool",
action=call_tool,
node="tool_call",
action=tool_call,
)
graph.add_node(
@@ -51,12 +51,12 @@ def basic_graph_builder() -> StateGraph:
graph.add_edge(start_key=START, end_key="prepare_tool_input")
graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")
graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")
graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])
graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])
graph.add_edge(
start_key="call_tool",
start_key="tool_call",
end_key="basic_use_tool_response",
)
@@ -73,7 +73,7 @@ def should_continue(state: BasicState) -> str:
# If there are no tool calls, basic graph already streamed the answer
END
if state.tool_choice is None
else "call_tool"
else "tool_call"
)

View File

@@ -31,14 +31,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -87,11 +85,9 @@ def check_sub_answer(
agent_error: AgentErrorLog | None = None
response: BaseMessage | None = None
try:
response = run_with_timeout(
AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
fast_llm.invoke,
response = fast_llm.invoke(
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
)
quality_str: str = cast(str, response.content)
@@ -100,7 +96,7 @@ def check_sub_answer(
)
log_result = f"Answer quality: {quality_str}"
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -1,4 +1,5 @@
from datetime import datetime
from typing import Any
from typing import cast
from langchain_core.messages import merge_message_runs
@@ -46,13 +47,11 @@ from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import NO_RECOVERED_DOCS
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -111,14 +110,15 @@ def generate_sub_answer(
config=fast_llm.config,
)
response: list[str | list[str | dict[str, Any]]] = []
dispatch_timings: list[float] = []
agent_error: AgentErrorLog | None = None
response: list[str] = []
def stream_sub_answer() -> list[str]:
agent_error: AgentErrorLog | None = None
try:
for message in fast_llm.stream(
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
@@ -142,15 +142,8 @@ def generate_sub_answer(
(end_stream_token - start_stream_token).microseconds
)
response.append(content)
return response
try:
response = run_with_timeout(
AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
stream_sub_answer,
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -1,4 +1,5 @@
from datetime import datetime
from typing import Any
from typing import cast
from langchain_core.messages import HumanMessage
@@ -59,15 +60,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
@@ -80,7 +77,6 @@ from onyx.prompts.agent_search import (
)
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
_llm_node_error_strings = LLMNodeErrorStrings(
@@ -234,11 +230,7 @@ def generate_initial_answer(
sub_questions = all_sub_questions # Replace the original assignment
model = (
graph_config.tooling.fast_llm
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
else graph_config.tooling.primary_llm
)
model = graph_config.tooling.fast_llm
doc_context = format_docs(answer_generation_documents.context_documents)
doc_context = trim_prompt_piece(
@@ -268,16 +260,15 @@ def generate_initial_answer(
)
]
streamed_tokens: list[str] = [""]
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
dispatch_timings: list[float] = []
agent_error: AgentErrorLog | None = None
def stream_initial_answer() -> list[str]:
response: list[str] = []
try:
for message in model.stream(
msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
@@ -301,16 +292,9 @@ def generate_initial_answer(
dispatch_timings.append(
(end_stream_token - start_stream_token).microseconds
)
response.append(content)
return response
streamed_tokens.append(content)
try:
streamed_tokens = run_with_timeout(
AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
stream_initial_answer,
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -36,10 +36,7 @@ from onyx.chat.models import StreamType
from onyx.chat.models import SubQuestionPiece
from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
@@ -50,7 +47,6 @@ from onyx.prompts.agent_search import (
INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -135,12 +131,10 @@ def decompose_orig_question(
streamed_tokens: list[BaseMessage_Content] = []
try:
streamed_tokens = run_with_timeout(
AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
dispatch_separated,
streamed_tokens = dispatch_separated(
model.stream(
msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
),
dispatch_subquestion(0, writer),
sep_callback=dispatch_subquestion_sep(0, writer),
@@ -160,7 +154,7 @@ def decompose_orig_question(
)
write_custom_event("stream_finished", stop_event, writer)
except (LLMTimeoutError, TimeoutError) as e:
except LLMTimeoutError as e:
logger.error("LLM Timeout Error - decompose orig question")
raise e # fail loudly on this critical step
except LLMRateLimitError as e:

View File

@@ -25,7 +25,7 @@ logger = setup_logger()
def route_initial_tool_choice(
state: MainState, config: RunnableConfig
) -> Literal["call_tool", "start_agent_search", "logging_node"]:
) -> Literal["tool_call", "start_agent_search", "logging_node"]:
"""
LangGraph edge to route to agent search.
"""
@@ -38,7 +38,7 @@ def route_initial_tool_choice(
):
return "start_agent_search"
else:
return "call_tool"
return "tool_call"
else:
return "logging_node"

View File

@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
answer_refined_query_graph_builder,
)
from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
basic_use_tool_response,
)
from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
prepare_tool_input,
)
from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
basic_use_tool_response,
)
from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
from onyx.utils.logger import setup_logger
@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
# Choose the initial tool
graph.add_node(
node="initial_tool_choice",
action=choose_tool,
action=llm_tool_choice,
)
# Call the tool, if required
graph.add_node(
node="call_tool",
action=call_tool,
node="tool_call",
action=tool_call,
)
# Use the tool response
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
graph.add_conditional_edges(
"initial_tool_choice",
route_initial_tool_choice,
["call_tool", "start_agent_search", "logging_node"],
["tool_call", "start_agent_search", "logging_node"],
)
graph.add_edge(
start_key="call_tool",
start_key="tool_call",
end_key="basic_use_tool_response",
)
graph.add_edge(

View File

@@ -33,15 +33,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
)
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import RefinedAnswerImprovement
from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -107,14 +105,11 @@ def compare_answers(
refined_answer_improvement: bool | None = None
# no need to stream this
try:
resp = run_with_timeout(
AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
model.invoke,
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
resp = model.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -44,10 +44,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
@@ -56,7 +53,6 @@ from onyx.prompts.agent_search import (
)
from onyx.tools.models import ToolCallKickoff
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -138,17 +134,15 @@ def create_refined_sub_questions(
agent_error: AgentErrorLog | None = None
streamed_tokens: list[BaseMessage_Content] = []
try:
streamed_tokens = run_with_timeout(
AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
dispatch_separated,
streamed_tokens = dispatch_separated(
model.stream(
msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
),
dispatch_subquestion(1, writer),
sep_callback=dispatch_subquestion_sep(1, writer),
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -22,17 +22,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
)
from onyx.configs.constants import NUM_EXPLORATORY_DOCS
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
@@ -90,42 +84,30 @@ def extract_entities_terms(
]
fast_llm = graph_config.tooling.fast_llm
# Grader
llm_response = fast_llm.invoke(
prompt=msg,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
)
cleaned_response = (
str(llm_response.content).replace("```json\n", "").replace("\n```", "")
)
first_bracket = cleaned_response.find("{")
last_bracket = cleaned_response.rfind("}")
cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
try:
llm_response = run_with_timeout(
AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
fast_llm.invoke,
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
entity_extraction_result = EntityExtractionResult.model_validate_json(
cleaned_response
)
cleaned_response = (
str(llm_response.content).replace("```json\n", "").replace("\n```", "")
)
first_bracket = cleaned_response.find("{")
last_bracket = cleaned_response.rfind("}")
cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
try:
entity_extraction_result = EntityExtractionResult.model_validate_json(
cleaned_response
)
except ValueError:
logger.error(
"Failed to parse LLM response as JSON in Entity-Term Extraction"
)
entity_extraction_result = EntityExtractionResult(
retrieved_entities_relationships=EntityRelationshipTermExtraction(),
)
except (LLMTimeoutError, TimeoutError):
logger.error("LLM Timeout Error - extract entities terms")
except ValueError:
logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
entity_extraction_result = EntityExtractionResult(
retrieved_entities_relationships=EntityRelationshipTermExtraction(),
)
except LLMRateLimitError:
logger.error("LLM Rate Limit Error - extract entities terms")
entity_extraction_result = EntityExtractionResult(
retrieved_entities_relationships=EntityRelationshipTermExtraction(),
retrieved_entities_relationships=EntityRelationshipTermExtraction(
entities=[],
relationships=[],
terms=[],
),
)
return EntityTermExtractionUpdate(

View File

@@ -1,4 +1,5 @@
from datetime import datetime
from typing import Any
from typing import cast
from langchain_core.messages import HumanMessage
@@ -65,21 +66,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse
from onyx.chat.models import StreamingError
from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
)
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
)
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
@@ -98,7 +92,6 @@ from onyx.prompts.agent_search import (
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -260,12 +253,7 @@ def generate_validate_refined_answer(
else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
)
model = (
graph_config.tooling.fast_llm
if AGENT_ANSWER_GENERATION_BY_FAST_LLM
else graph_config.tooling.primary_llm
)
model = graph_config.tooling.fast_llm
relevant_docs_str = format_docs(answer_generation_documents.context_documents)
relevant_docs_str = trim_prompt_piece(
model.config,
@@ -296,13 +284,13 @@ def generate_validate_refined_answer(
)
]
streamed_tokens: list[str] = [""]
streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
dispatch_timings: list[float] = []
agent_error: AgentErrorLog | None = None
def stream_refined_answer() -> list[str]:
try:
for message in model.stream(
msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
):
# TODO: in principle, the answer here COULD contain images, but we don't support that yet
content = message.content
@@ -327,15 +315,8 @@ def generate_validate_refined_answer(
(end_stream_token - start_stream_token).microseconds
)
streamed_tokens.append(content)
return streamed_tokens
try:
streamed_tokens = run_with_timeout(
AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
stream_refined_answer,
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,
@@ -402,20 +383,16 @@ def generate_validate_refined_answer(
)
]
validation_model = graph_config.tooling.fast_llm
try:
validation_response = run_with_timeout(
AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
validation_model.invoke,
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
validation_response = model.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
)
refined_answer_quality = binary_string_test_after_answer_separator(
text=cast(str, validation_response.content),
positive_value=AGENT_POSITIVE_VALUE_STR,
separator=AGENT_ANSWER_SEPARATOR,
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
refined_answer_quality = True
logger.error("LLM Timeout Error - validate refined answer")

View File

@@ -34,16 +34,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
)
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
)
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
QUERY_REWRITING_PROMPT,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -71,7 +69,7 @@ def expand_queries(
node_start_time = datetime.now()
question = state.question
model = graph_config.tooling.fast_llm
llm = graph_config.tooling.fast_llm
sub_question_id = state.sub_question_id
if sub_question_id is None:
level, question_num = 0, 0
@@ -90,12 +88,10 @@ def expand_queries(
rewritten_queries = []
try:
llm_response_list = run_with_timeout(
AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
dispatch_separated,
model.stream(
llm_response_list = dispatch_separated(
llm.stream(
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
),
dispatch_subquery(level, question_num, writer),
)
@@ -105,7 +101,7 @@ def expand_queries(
rewritten_queries = llm_response.split("\n")
log_result = f"Number of expanded queries: {len(rewritten_queries)}"
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
agent_error = AgentErrorLog(
error_type=AgentLLMErrorType.TIMEOUT,
error_message=AGENT_LLM_TIMEOUT_MESSAGE,

View File

@@ -55,7 +55,6 @@ def rerank_documents(
# Note that these are passed in values from the API and are overrides which are typically None
rerank_settings = graph_config.inputs.search_request.rerank_settings
allow_agent_reranking = graph_config.behavior.allow_agent_reranking
if rerank_settings is None:
with get_session_context_manager() as db_session:
@@ -63,31 +62,23 @@ def rerank_documents(
if not search_settings.disable_rerank_for_streaming:
rerank_settings = RerankingDetails.from_db_model(search_settings)
# Initial default: no reranking. Will be overwritten below if reranking is warranted
reranked_documents = verified_documents
if should_rerank(rerank_settings) and len(verified_documents) > 0:
if len(verified_documents) > 1:
if not allow_agent_reranking:
logger.info("Use of local rerank model without GPU, skipping reranking")
# No reranking, stay with verified_documents as default
else:
# Reranking is warranted, use the rerank_sections functon
reranked_documents = rerank_sections(
query_str=question,
# if runnable, then rerank_settings is not None
rerank_settings=cast(RerankingDetails, rerank_settings),
sections_to_rerank=verified_documents,
)
reranked_documents = rerank_sections(
query_str=question,
# if runnable, then rerank_settings is not None
rerank_settings=cast(RerankingDetails, rerank_settings),
sections_to_rerank=verified_documents,
)
else:
logger.warning(
f"{len(verified_documents)} verified document(s) found, skipping reranking"
)
# No reranking, stay with verified_documents as default
reranked_documents = verified_documents
else:
logger.warning("No reranking settings found, using unranked documents")
# No reranking, stay with verified_documents as default
reranked_documents = verified_documents
if AGENT_RERANKING_STATS:
fit_scores = get_fit_scores(verified_documents, reranked_documents)
else:

View File

@@ -25,15 +25,13 @@ from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrin
from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string,
)
from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
from onyx.llm.chat_llm import LLMRateLimitError
from onyx.llm.chat_llm import LLMTimeoutError
from onyx.prompts.agent_search import (
DOCUMENT_VERIFICATION_PROMPT,
)
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
from onyx.utils.timing import log_function_time
logger = setup_logger()
@@ -88,11 +86,8 @@ def verify_documents(
] # default is to treat document as relevant
try:
response = run_with_timeout(
AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
fast_llm.invoke,
prompt=msg,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
response = fast_llm.invoke(
msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
)
assert isinstance(response.content, str)
@@ -101,7 +96,7 @@ def verify_documents(
):
verified_documents = []
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
# In this case, we decide to continue and don't raise an error, as
# little harm in letting some docs through that are less relevant.
logger.error("LLM Timeout Error - verify documents")

View File

@@ -67,7 +67,6 @@ class GraphSearchConfig(BaseModel):
# Whether to allow creation of refinement questions (and entity extraction, etc.)
allow_refinement: bool = True
skip_gen_ai_answer_generation: bool = False
allow_agent_reranking: bool = False
class GraphConfig(BaseModel):

View File

@@ -25,7 +25,7 @@ logger = setup_logger()
# and a function that handles extracting the necessary fields
# from the state and config
# TODO: fan-out to multiple tool call nodes? Make this configurable?
def choose_tool(
def llm_tool_choice(
state: ToolChoiceState,
config: RunnableConfig,
writer: StreamWriter = lambda _: None,

View File

@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
write_custom_event("basic_response", packet, writer)
def call_tool(
def tool_call(
state: ToolChoiceUpdate,
config: RunnableConfig,
writer: StreamWriter = lambda _: None,

View File

@@ -43,9 +43,8 @@ from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
from onyx.configs.agent_configs import (
AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
)
from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -81,7 +80,6 @@ from onyx.tools.tool_implementations.search.search_tool import SearchResponseSum
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.utils import explicit_tool_calling_supported
from onyx.utils.logger import setup_logger
from onyx.utils.threadpool_concurrency import run_with_timeout
logger = setup_logger()
@@ -397,13 +395,11 @@ def summarize_history(
)
try:
history_response = run_with_timeout(
AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
llm.invoke,
history_response = llm.invoke(
history_context_prompt,
timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
)
except (LLMTimeoutError, TimeoutError):
except LLMTimeoutError:
logger.error("LLM Timeout Error - summarize history")
return (
history # this is what is done at this point anyway, so we default to this

View File

@@ -94,7 +94,6 @@ from onyx.db.models import User
from onyx.db.users import get_user_by_email
from onyx.redis.redis_pool import get_async_redis_connection
from onyx.redis.redis_pool import get_redis_client
from onyx.server.utils import BasicAuthenticationError
from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import create_milestone_and_report
from onyx.utils.telemetry import optional_telemetry
@@ -108,6 +107,11 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
logger = setup_logger()
class BasicAuthenticationError(HTTPException):
def __init__(self, detail: str):
super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
def is_user_admin(user: User | None) -> bool:
if AUTH_TYPE == AuthType.DISABLED:
return True

View File

@@ -105,7 +105,6 @@ def document_by_cc_pair_cleanup_task(
tenant_id=tenant_id,
chunk_count=chunk_count,
)
delete_documents_complete__no_commit(
db_session=db_session,
document_ids=[document_id],

View File

@@ -27,10 +27,8 @@ from onyx.file_store.utils import InMemoryChatFile
from onyx.llm.interfaces import LLM
from onyx.tools.force import ForceUseTool
from onyx.tools.tool import Tool
from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
from onyx.tools.tool_implementations.search.search_tool import SearchTool
from onyx.tools.utils import explicit_tool_calling_supported
from onyx.utils.gpu_utils import gpu_status_request
from onyx.utils.logger import setup_logger
logger = setup_logger()
@@ -82,26 +80,6 @@ class Answer:
and not skip_explicit_tool_calling
)
rerank_settings = search_request.rerank_settings
using_cloud_reranking = (
rerank_settings is not None
and rerank_settings.rerank_provider_type is not None
)
allow_agent_reranking = gpu_status_request() or using_cloud_reranking
# TODO: this is a hack to force the query to be used for the search tool
# this should be removed once we fully unify graph inputs (i.e.
# remove SearchQuery entirely)
if (
force_use_tool.force_use
and search_tool
and force_use_tool.args
and force_use_tool.tool_name == search_tool.name
and QUERY_FIELD in force_use_tool.args
):
search_request.query = force_use_tool.args[QUERY_FIELD]
self.graph_inputs = GraphInputs(
search_request=search_request,
prompt_builder=prompt_builder,
@@ -116,6 +94,7 @@ class Answer:
force_use_tool=force_use_tool,
using_tool_calling_llm=using_tool_calling_llm,
)
assert db_session, "db_session must be provided for agentic persistence"
self.graph_persistence = GraphPersistence(
db_session=db_session,
chat_session_id=chat_session_id,
@@ -125,7 +104,6 @@ class Answer:
use_agentic_search=use_agentic_search,
skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
allow_refinement=True,
allow_agent_reranking=allow_agent_reranking,
)
self.graph_config = GraphConfig(
inputs=self.graph_inputs,

View File

@@ -7,7 +7,7 @@ from typing import cast
from sqlalchemy.orm import Session
from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException
from onyx.agents.agent_search.orchestration.nodes.tool_call import ToolCallException
from onyx.chat.answer import Answer
from onyx.chat.chat_utils import create_chat_chain
from onyx.chat.chat_utils import create_temporary_persona

View File

@@ -31,9 +31,22 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
)
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 1 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 3 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8 # in seconds
AGENT_RETRIEVAL_STATS = (
not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
@@ -165,172 +178,80 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
) # 2000
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 10 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION
)
AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 30 # in seconds
AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(
os.environ.get("AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION")
or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION
)
AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
) # 25
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
) # 3
AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 4 # in seconds
AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION")
or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
) # 30
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 5 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 30 # in seconds
AGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_GENERAL_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
) # 8
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 5 # in seconds
AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
) # 12
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 3 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 30 # in seconds
AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
) # 25
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 5 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 25 # in seconds
AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
) # 25
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 5 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 30 # in seconds
AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
) # 8
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
)
AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 8 # in seconds
AGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(
os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_CHECK")
or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK
)
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
) # 6
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 3 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 8 # in seconds
AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
) # 1
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 1 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 3 # in seconds
AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
) # 4
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION
)
AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 5 # in seconds
AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION")
or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
) # 8
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
)
AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 8 # in seconds
AGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(
os.environ.get("AGENT_TIMEOUT_LLM_COMPARE_ANSWERS")
or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS
)
AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 2 # in seconds
AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(
os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION")
or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION
)
AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 8 # in seconds
AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION")
or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
)
AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = int(
os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION")
or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
) # 8
GRAPH_VERSION_NAME: str = "a"

View File

@@ -628,7 +628,7 @@ def create_new_chat_message(
commit: bool = True,
reserved_message_id: int | None = None,
overridden_model: str | None = None,
refined_answer_improvement: bool | None = None,
refined_answer_improvement: bool = True,
) -> ChatMessage:
if reserved_message_id is not None:
# Edit existing message

View File

@@ -409,6 +409,10 @@ class DefaultMultiLLM(LLM):
self._record_call(processed_prompt)
try:
print(
"model is",
f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
)
return litellm.completion(
mock_response=MOCK_LLM_RESPONSE,
# model choice

View File

@@ -61,10 +61,10 @@ def _create_indexable_chunks(
doc_updated_at=None,
primary_owners=[],
secondary_owners=[],
chunk_count=preprocessed_doc["chunk_ind"] + 1,
chunk_count=1,
)
ids_to_documents[document.id] = document
if preprocessed_doc["chunk_ind"] == 0:
ids_to_documents[document.id] = document
chunk = DocMetadataAwareIndexChunk(
chunk_id=preprocessed_doc["chunk_ind"],
@@ -92,7 +92,6 @@ def _create_indexable_chunks(
boost=DEFAULT_BOOST,
large_chunk_id=None,
)
chunks.append(chunk)
return list(ids_to_documents.values()), chunks
@@ -193,7 +192,6 @@ def seed_initial_documents(
last_successful_index_time=last_index_time,
seeding_flow=True,
)
cc_pair_id = cast(int, result.data)
processed_docs = fetch_versioned_implementation(
"onyx.seeding.load_docs",
@@ -251,5 +249,4 @@ def seed_initial_documents(
.values(chunk_count=doc.chunk_count)
)
db_session.commit()
kv_store.store(KV_DOCUMENTS_SEEDED_KEY, True)

View File

@@ -213,6 +213,8 @@ def get_chat_session(
# we need the tool call objs anyways, so just fetch them in a single call
prefetch_tool_calls=True,
)
for message in session_messages:
translate_db_message_to_chat_message_detail(message)
return ChatSessionDetailResponse(
chat_session_id=session_id,

View File

@@ -58,7 +58,6 @@ SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary"
SEARCH_DOC_CONTENT_ID = "search_doc_content"
SECTION_RELEVANCE_LIST_ID = "section_relevance_list"
SEARCH_EVALUATION_ID = "llm_doc_eval"
QUERY_FIELD = "query"
class SearchResponseSummary(SearchQueryInfo):
@@ -180,12 +179,12 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
"parameters": {
"type": "object",
"properties": {
QUERY_FIELD: {
"query": {
"type": "string",
"description": "What to search for",
},
},
"required": [QUERY_FIELD],
"required": ["query"],
},
},
}
@@ -224,7 +223,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
rephrased_query = history_based_query_rephrase(
query=query, history=history, llm=llm
)
return {QUERY_FIELD: rephrased_query}
return {"query": rephrased_query}
"""Actual tool execution"""
@@ -280,7 +279,7 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):
def run(
self, override_kwargs: SearchToolOverrideKwargs | None = None, **llm_kwargs: Any
) -> Generator[ToolResponse, None, None]:
query = cast(str, llm_kwargs[QUERY_FIELD])
query = cast(str, llm_kwargs["query"])
force_no_rerank = False
alternate_db_session = None
retrieved_sections_callback = None

View File

@@ -1,4 +1,3 @@
import threading
import uuid
from collections.abc import Callable
from concurrent.futures import as_completed
@@ -14,10 +13,6 @@ logger = setup_logger()
R = TypeVar("R")
# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
# is not safe, update this comment.
def run_functions_tuples_in_parallel(
functions_with_args: list[tuple[Callable, tuple]],
allow_failures: bool = False,
@@ -83,10 +78,6 @@ class FunctionCall(Generic[R]):
return self.func(*self.args, **self.kwargs)
# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
# is not safe, update this comment.
def run_functions_in_parallel(
function_calls: list[FunctionCall],
allow_failures: bool = False,
@@ -118,49 +109,3 @@ def run_functions_in_parallel(
raise
return results
class TimeoutThread(threading.Thread):
def __init__(
self, timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
):
super().__init__()
self.timeout = timeout
self.func = func
self.args = args
self.kwargs = kwargs
self.exception: Exception | None = None
def run(self) -> None:
try:
self.result = self.func(*self.args, **self.kwargs)
except Exception as e:
self.exception = e
def end(self) -> None:
raise TimeoutError(
f"Function {self.func.__name__} timed out after {self.timeout} seconds"
)
# WARNING: it is not currently well understood whether we lose access to contextvars when functions are
# executed through this wrapper Do NOT try to acquire a db session in a function run through this unless
# you have heavily tested that multi-tenancy is respected. If/when we know for sure that it is or
# is not safe, update this comment.
def run_with_timeout(
timeout: float, func: Callable[..., R], *args: Any, **kwargs: Any
) -> R:
"""
Executes a function with a timeout. If the function doesn't complete within the specified
timeout, raises TimeoutError.
"""
task = TimeoutThread(timeout, func, *args, **kwargs)
task.start()
task.join(timeout)
if task.exception is not None:
raise task.exception
if task.is_alive():
task.end()
return task.result

View File

@@ -11,7 +11,6 @@ from langchain_core.messages import HumanMessage
from langchain_core.messages import SystemMessage
from langchain_core.messages import ToolCall
from langchain_core.messages import ToolCallChunk
from pytest_mock import MockerFixture
from sqlalchemy.orm import Session
from onyx.chat.answer import Answer
@@ -26,7 +25,6 @@ from onyx.chat.models import StreamStopReason
from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message
from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message
from onyx.context.search.models import RerankingDetails
from onyx.context.search.models import SearchRequest
from onyx.llm.interfaces import LLM
from onyx.tools.force import ForceUseTool
@@ -37,7 +35,6 @@ from onyx.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTEN
from onyx.tools.tool_implementations.search_like_tool_utils import (
FINAL_CONTEXT_DOCUMENTS_ID,
)
from shared_configs.enums import RerankerProvider
from tests.unit.onyx.chat.conftest import DEFAULT_SEARCH_ARGS
from tests.unit.onyx.chat.conftest import QUERY
@@ -47,20 +44,6 @@ def answer_instance(
mock_llm: LLM,
answer_style_config: AnswerStyleConfig,
prompt_config: PromptConfig,
mocker: MockerFixture,
) -> Answer:
mocker.patch(
"onyx.chat.answer.gpu_status_request",
return_value=True,
)
return _answer_fixture_impl(mock_llm, answer_style_config, prompt_config)
def _answer_fixture_impl(
mock_llm: LLM,
answer_style_config: AnswerStyleConfig,
prompt_config: PromptConfig,
rerank_settings: RerankingDetails | None = None,
) -> Answer:
return Answer(
prompt_builder=AnswerPromptBuilder(
@@ -81,13 +64,13 @@ def _answer_fixture_impl(
llm=mock_llm,
fast_llm=mock_llm,
force_use_tool=ForceUseTool(force_use=False, tool_name="", args=None),
search_request=SearchRequest(query=QUERY, rerank_settings=rerank_settings),
search_request=SearchRequest(query=QUERY),
chat_session_id=UUID("123e4567-e89b-12d3-a456-426614174000"),
current_agent_message_id=0,
)
def test_basic_answer(answer_instance: Answer, mocker: MockerFixture) -> None:
def test_basic_answer(answer_instance: Answer) -> None:
mock_llm = cast(Mock, answer_instance.graph_config.tooling.primary_llm)
mock_llm.stream.return_value = [
AIMessageChunk(content="This is a "),
@@ -380,49 +363,3 @@ def test_is_cancelled(answer_instance: Answer) -> None:
# Verify LLM calls
mock_llm.stream.assert_called_once()
@pytest.mark.parametrize(
"gpu_enabled,is_local_model",
[
(True, False),
(False, True),
(True, True),
(False, False),
],
)
def test_no_slow_reranking(
gpu_enabled: bool,
is_local_model: bool,
mock_llm: LLM,
answer_style_config: AnswerStyleConfig,
prompt_config: PromptConfig,
mocker: MockerFixture,
) -> None:
mocker.patch(
"onyx.chat.answer.gpu_status_request",
return_value=gpu_enabled,
)
rerank_settings = (
None
if is_local_model
else RerankingDetails(
rerank_model_name="test_model",
rerank_api_url="test_url",
rerank_api_key="test_key",
num_rerank=10,
rerank_provider_type=RerankerProvider.COHERE,
)
)
answer_instance = _answer_fixture_impl(
mock_llm, answer_style_config, prompt_config, rerank_settings=rerank_settings
)
assert (
answer_instance.graph_config.inputs.search_request.rerank_settings
== rerank_settings
)
assert (
answer_instance.graph_config.behavior.allow_agent_reranking == gpu_enabled
or not is_local_model
)

View File

@@ -36,12 +36,7 @@ def test_skip_gen_ai_answer_generation_flag(
mock_search_tool: SearchTool,
answer_style_config: AnswerStyleConfig,
prompt_config: PromptConfig,
mocker: MockerFixture,
) -> None:
mocker.patch(
"onyx.chat.answer.gpu_status_request",
return_value=True,
)
question = config["question"]
skip_gen_ai_answer_generation = config["skip_gen_ai_answer_generation"]

View File

@@ -1,61 +0,0 @@
import time
import pytest
from onyx.utils.threadpool_concurrency import run_with_timeout
def test_run_with_timeout_completes() -> None:
"""Test that a function that completes within timeout works correctly"""
def quick_function(x: int) -> int:
return x * 2
result = run_with_timeout(1.0, quick_function, x=21)
assert result == 42
@pytest.mark.parametrize("slow,timeout", [(1, 0.1), (0.3, 0.2)])
def test_run_with_timeout_raises_on_timeout(slow: float, timeout: float) -> None:
"""Test that a function that exceeds timeout raises TimeoutError"""
def slow_function() -> None:
time.sleep(slow) # Sleep for 2 seconds
with pytest.raises(TimeoutError) as exc_info:
start = time.time()
run_with_timeout(timeout, slow_function) # Set timeout to 0.1 seconds
end = time.time()
assert end - start >= timeout
assert end - start < (slow + timeout) / 2
assert f"timed out after {timeout} seconds" in str(exc_info.value)
@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
def test_run_with_timeout_propagates_exceptions() -> None:
"""Test that other exceptions from the function are propagated properly"""
def error_function() -> None:
raise ValueError("Test error")
with pytest.raises(ValueError) as exc_info:
run_with_timeout(1.0, error_function)
assert "Test error" in str(exc_info.value)
def test_run_with_timeout_with_args_and_kwargs() -> None:
"""Test that args and kwargs are properly passed to the function"""
def complex_function(x: int, y: int, multiply: bool = False) -> int:
if multiply:
return x * y
return x + y
# Test with just positional args
result1 = run_with_timeout(1.0, complex_function, x=5, y=3)
assert result1 == 8
# Test with positional and keyword args
result2 = run_with_timeout(1.0, complex_function, x=5, y=3, multiply=True)
assert result2 == 15

View File

@@ -1,11 +1,6 @@
"use client";
import {
redirect,
usePathname,
useRouter,
useSearchParams,
} from "next/navigation";
import { redirect, useRouter, useSearchParams } from "next/navigation";
import {
BackendChatSession,
BackendMessage,
@@ -135,7 +130,6 @@ import {
} from "@/lib/browserUtilities";
import { Button } from "@/components/ui/button";
import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal";
import { MessageChannel } from "node:worker_threads";
const TEMP_USER_MESSAGE_ID = -1;
const TEMP_ASSISTANT_MESSAGE_ID = -2;
@@ -1151,7 +1145,6 @@ export function ChatPage({
regenerationRequest?: RegenerationRequest | null;
overrideFileDescriptors?: FileDescriptor[];
} = {}) => {
navigatingAway.current = false;
let frozenSessionId = currentSessionId();
updateCanContinue(false, frozenSessionId);
@@ -1274,6 +1267,7 @@ export function ChatPage({
let stackTrace: string | null = null;
let sub_questions: SubQuestionDetail[] = [];
let second_level_sub_questions: SubQuestionDetail[] = [];
let is_generating: boolean = false;
let second_level_generating: boolean = false;
let finalMessage: BackendMessage | null = null;
@@ -1297,7 +1291,7 @@ export function ChatPage({
const stack = new CurrentMessageFIFO();
updateCurrentMessageFIFO(stack, {
signal: controller.signal,
signal: controller.signal, // Add this line
message: currMessage,
alternateAssistantId: currentAssistantId,
fileDescriptors: overrideFileDescriptors || currentMessageFiles,
@@ -1718,10 +1712,7 @@ export function ChatPage({
const newUrl = buildChatUrl(searchParams, currChatSessionId, null);
// newUrl is like /chat?chatId=10
// current page is like /chat
if (pathname == "/chat" && !navigatingAway.current) {
router.push(newUrl, { scroll: false });
}
router.push(newUrl, { scroll: false });
}
}
if (
@@ -2095,31 +2086,6 @@ export function ChatPage({
llmOverrideManager.updateImageFilesPresent(imageFileInMessageHistory);
}, [imageFileInMessageHistory]);
const pathname = usePathname();
useEffect(() => {
return () => {
// Cleanup which only runs when the component unmounts (i.e. when you navigate away).
const currentSession = currentSessionId();
const controller = abortControllersRef.current.get(currentSession);
if (controller) {
controller.abort();
navigatingAway.current = true;
setAbortControllers((prev) => {
const newControllers = new Map(prev);
newControllers.delete(currentSession);
return newControllers;
});
}
};
}, [pathname]);
const navigatingAway = useRef(false);
// Keep a ref to abortControllers to ensure we always have the latest value
const abortControllersRef = useRef(abortControllers);
useEffect(() => {
abortControllersRef.current = abortControllers;
}, [abortControllers]);
useSidebarShortcut(router, toggleSidebar);
const [sharedChatSession, setSharedChatSession] =
@@ -2334,7 +2300,7 @@ export function ChatPage({
fixed
left-0
z-40
bg-neutral-200
bg-background-100
h-screen
transition-all
bg-opacity-80
@@ -2591,21 +2557,12 @@ export function ChatPage({
) {
return <></>;
}
const nextMessage =
messageHistory.length > i + 1
? messageHistory[i + 1]
: null;
return (
<div
id={`message-${message.messageId}`}
key={messageReactComponentKey}
>
<HumanMessage
disableSwitchingForStreaming={
(nextMessage &&
nextMessage.is_generating) ||
false
}
stopGenerating={stopGenerating}
content={message.message}
files={message.files}

View File

@@ -94,7 +94,7 @@ export function AgenticToggle({
Agent Search (BETA)
</h3>
</div>
<p className="text-xs text-neutral-600 dark:text-neutral-700 mb-2">
<p className="text-xs text-neutarl-600 dark:text-neutral-700 mb-2">
Use AI agents to break down questions and run deep iterative
research through promising pathways. Gives more thorough and
accurate responses but takes slightly longer.

View File

@@ -113,7 +113,7 @@ export default function LLMPopover({
<Popover open={isOpen} onOpenChange={setIsOpen}>
<PopoverTrigger asChild>
<button
className="dark:text-[#fff] text-[#000] focus:outline-none"
className="focus:outline-none"
data-testid="llm-popover-trigger"
>
<ChatInputOption

View File

@@ -250,7 +250,7 @@ export async function* sendMessage({
throw new Error(`HTTP error! status: ${response.status}`);
}
yield* handleSSEStream<PacketType>(response, signal);
yield* handleSSEStream<PacketType>(response);
}
export async function nameChatSession(chatSessionId: string) {

View File

@@ -9,12 +9,6 @@ import React, {
useMemo,
useState,
} from "react";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import ReactMarkdown from "react-markdown";
import { OnyxDocument, FilteredOnyxDocument } from "@/lib/search/interfaces";
import remarkGfm from "remark-gfm";
@@ -314,7 +308,7 @@ export const AgenticMessage = ({
const renderedAlternativeMarkdown = useMemo(() => {
return (
<ReactMarkdown
className="prose dark:prose-invert max-w-full text-base"
className="prose max-w-full text-base"
components={{
...markdownComponents,
code: ({ node, className, children }: any) => {
@@ -341,7 +335,7 @@ export const AgenticMessage = ({
const renderedMarkdown = useMemo(() => {
return (
<ReactMarkdown
className="prose dark:prose-invert max-w-full text-base"
className="prose max-w-full text-base"
components={markdownComponents}
remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[[rehypePrism, { ignoreMissing: true }], rehypeKatex]}
@@ -536,7 +530,6 @@ export const AgenticMessage = ({
{includeMessageSwitcher && (
<div className="-mx-1 mr-auto">
<MessageSwitcher
disableForStreaming={!isComplete}
currentPage={currentMessageInd + 1}
totalPages={otherMessagesCanSwitchTo.length}
handlePrevious={() => {
@@ -623,7 +616,6 @@ export const AgenticMessage = ({
{includeMessageSwitcher && (
<div className="-mx-1 mr-auto">
<MessageSwitcher
disableForStreaming={!isComplete}
currentPage={currentMessageInd + 1}
totalPages={otherMessagesCanSwitchTo.length}
handlePrevious={() => {
@@ -702,52 +694,27 @@ function MessageSwitcher({
totalPages,
handlePrevious,
handleNext,
disableForStreaming,
}: {
currentPage: number;
totalPages: number;
handlePrevious: () => void;
handleNext: () => void;
disableForStreaming?: boolean;
}) {
return (
<div className="flex items-center text-sm space-x-0.5">
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div>
<Hoverable
icon={FiChevronLeft}
onClick={currentPage === 1 ? undefined : handlePrevious}
/>
</div>
</TooltipTrigger>
<TooltipContent>
{disableForStreaming ? "Disabled" : "Previous"}
</TooltipContent>
</Tooltip>
</TooltipProvider>
<Hoverable
icon={FiChevronLeft}
onClick={currentPage === 1 ? undefined : handlePrevious}
/>
<span className="text-text-darker select-none">
{currentPage} / {totalPages}
{disableForStreaming ? "Complete" : "Generating"}
</span>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div>
<Hoverable
icon={FiChevronRight}
onClick={currentPage === totalPages ? undefined : handleNext}
/>
</div>
</TooltipTrigger>
<TooltipContent>
{disableForStreaming ? "Disabled" : "Next"}
</TooltipContent>
</Tooltip>
</TooltipProvider>
<Hoverable
icon={FiChevronRight}
onClick={currentPage === totalPages ? undefined : handleNext}
/>
</div>
);
}

View File

@@ -383,7 +383,7 @@ export const AIMessage = ({
dangerouslySetInnerHTML={{ __html: htmlContent }}
/>
<ReactMarkdown
className="prose dark:prose-invert max-w-full text-base"
className="prose max-w-full text-base"
components={markdownComponents}
remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[[rehypePrism, { ignoreMissing: true }], rehypeKatex]}
@@ -495,10 +495,7 @@ export const AIMessage = ({
{docs && docs.length > 0 && (
<div
className={`mobile:hidden ${
(query ||
toolCall?.tool_name ===
INTERNET_SEARCH_TOOL_NAME) &&
"mt-2"
query && "mt-2"
} -mx-8 w-full mb-4 flex relative`}
>
<div className="w-full">
@@ -798,67 +795,27 @@ function MessageSwitcher({
totalPages,
handlePrevious,
handleNext,
disableForStreaming,
}: {
currentPage: number;
totalPages: number;
handlePrevious: () => void;
handleNext: () => void;
disableForStreaming?: boolean;
}) {
return (
<div className="flex items-center text-sm space-x-0.5">
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div>
<Hoverable
icon={FiChevronLeft}
onClick={
disableForStreaming
? () => null
: currentPage === 1
? undefined
: handlePrevious
}
/>
</div>
</TooltipTrigger>
<TooltipContent>
{disableForStreaming
? "Wait for agent message to complete"
: "Previous"}
</TooltipContent>
</Tooltip>
</TooltipProvider>
<Hoverable
icon={FiChevronLeft}
onClick={currentPage === 1 ? undefined : handlePrevious}
/>
<span className="text-text-darker select-none">
{currentPage} / {totalPages}
</span>
<TooltipProvider>
<Tooltip>
<TooltipTrigger>
<div>
<Hoverable
icon={FiChevronRight}
onClick={
disableForStreaming
? () => null
: currentPage === totalPages
? undefined
: handleNext
}
/>
</div>
</TooltipTrigger>
<TooltipContent>
{disableForStreaming
? "Wait for agent message to complete"
: "Next"}
</TooltipContent>
</Tooltip>
</TooltipProvider>
<Hoverable
icon={FiChevronRight}
onClick={currentPage === totalPages ? undefined : handleNext}
/>
</div>
);
}
@@ -872,7 +829,6 @@ export const HumanMessage = ({
onMessageSelection,
shared,
stopGenerating = () => null,
disableSwitchingForStreaming = false,
}: {
shared?: boolean;
content: string;
@@ -882,7 +838,6 @@ export const HumanMessage = ({
onEdit?: (editedContent: string) => void;
onMessageSelection?: (messageId: number) => void;
stopGenerating?: () => void;
disableSwitchingForStreaming?: boolean;
}) => {
const textareaRef = useRef<HTMLTextAreaElement>(null);
@@ -1112,7 +1067,6 @@ export const HumanMessage = ({
otherMessagesCanSwitchTo.length > 1 && (
<div className="ml-auto mr-3">
<MessageSwitcher
disableForStreaming={disableSwitchingForStreaming}
currentPage={currentMessageInd + 1}
totalPages={otherMessagesCanSwitchTo.length}
handlePrevious={() => {

View File

@@ -294,7 +294,7 @@ const SubQuestionDisplay: React.FC<{
const renderedMarkdown = useMemo(() => {
return (
<ReactMarkdown
className="prose dark:prose-invert max-w-full text-base"
className="prose max-w-full text-base"
components={markdownComponents}
remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[rehypeKatex]}
@@ -340,7 +340,7 @@ const SubQuestionDisplay: React.FC<{
{subQuestion?.question || temporaryDisplay?.question}
</div>
<ChevronDown
className={`mt-0.5 flex-none text-text-darker transition-transform duration-500 ease-in-out ${
className={`mt-0.5 text-text-darker transition-transform duration-500 ease-in-out ${
toggled ? "" : "-rotate-90"
}`}
size={20}
@@ -632,7 +632,9 @@ const SubQuestionsDisplay: React.FC<SubQuestionsDisplayProps> = ({
}
`}</style>
<div className="relative">
{/* {subQuestions.map((subQuestion, index) => ( */}
{memoizedSubQuestions.map((subQuestion, index) => (
// {dynamicSubQuestions.map((subQuestion, index) => (
<SubQuestionDisplay
currentlyOpen={
currentlyOpenQuestion?.level === subQuestion.level &&

View File

@@ -131,7 +131,7 @@ const StandardAnswersTableRow = ({
/>,
<ReactMarkdown
key={`answer-${standardAnswer.id}`}
className="prose dark:prose-invert"
className="prose"
remarkPlugins={[remarkGfm]}
>
{standardAnswer.answer}

View File

@@ -562,7 +562,6 @@ body {
.prose :where(pre):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
background-color: theme("colors.code-bg");
font-size: theme("fontSize.code-sm");
color: #fff;
}
pre[class*="language-"],
@@ -656,3 +655,16 @@ ul > li > p {
display: inline;
/* Make paragraphs inline to reduce vertical space */
}
.dark strong {
color: white;
}
.prose.dark li,
.prose.dark h1,
.prose.dark h2,
.prose.dark h3,
.prose.dark h4,
.prose.dark h5 {
color: #e5e5e5;
}

View File

@@ -17,7 +17,7 @@ export const Hoverable: React.FC<{
<div className="flex items-center">
<Icon
size={size}
className="dark:text-[#B4B4B4] text-neutral-600 rounded h-fit cursor-pointer"
className="hover:bg-background-chat-hover dark:text-[#B4B4B4] text-neutral-600 rounded h-fit cursor-pointer"
/>
{hoverText && (
<div className="max-w-0 leading-none whitespace-nowrap overflow-hidden transition-all duration-300 ease-in-out group-hover:max-w-xs group-hover:ml-2">

View File

@@ -50,7 +50,7 @@ export function SearchResultIcon({ url }: { url: string }) {
return <SourceIcon sourceType={ValidSources.Web} iconSize={18} />;
}
if (url.includes("docs.onyx.app")) {
return <OnyxIcon size={18} className="dark:text-[#fff] text-[#000]" />;
return <OnyxIcon size={18} />;
}
return (

View File

@@ -23,7 +23,7 @@ export function WebResultIcon({
return (
<>
{hostname == "docs.onyx.app" ? (
<OnyxIcon size={size} className="dark:text-[#fff] text-[#000]" />
<OnyxIcon size={size} />
) : !error ? (
<img
className="my-0 rounded-full py-0"

View File

@@ -432,10 +432,7 @@ export const MarkdownFormField = ({
</div>
{isPreviewOpen ? (
<div className="p-4 border-t border-background-300">
<ReactMarkdown
className="prose dark:prose-invert"
remarkPlugins={[remarkGfm]}
>
<ReactMarkdown className="prose" remarkPlugins={[remarkGfm]}>
{field.value}
</ReactMarkdown>
</div>

View File

@@ -9,7 +9,7 @@ export default function BlurBackground({
<div
onClick={onClick}
className={`
desktop:hidden w-full h-full fixed inset-0 bg-neutral-700 bg-opacity-50 backdrop-blur-sm z-30 transition-opacity duration-300 ease-in-out ${
desktop:hidden w-full h-full fixed inset-0 bg-black bg-opacity-50 backdrop-blur-sm z-30 transition-opacity duration-300 ease-in-out ${
visible
? "opacity-100 pointer-events-auto"
: "opacity-0 pointer-events-none"

View File

@@ -35,7 +35,7 @@ export const MinimalMarkdown: React.FC<MinimalMarkdownProps> = ({
return (
<ReactMarkdown
className={`w-full text-wrap break-word prose dark:prose-invert ${className}`}
className={`w-full text-wrap break-word ${className}`}
components={markdownComponents}
remarkPlugins={[remarkGfm]}
>

View File

@@ -78,7 +78,7 @@ export function getUniqueIcons(docs: OnyxDocument[]): JSX.Element[] {
for (const doc of docs) {
// If it's a web source, we check domain uniqueness
if ((doc.is_internet || doc.source_type === ValidSources.Web) && doc.link) {
if (doc.source_type === ValidSources.Web && doc.link) {
const domain = getDomainFromUrl(doc.link);
if (domain && !seenDomains.has(domain)) {
seenDomains.add(domain);

View File

@@ -47,7 +47,7 @@ export default function LogoWithText({
className="flex gap-x-2 items-center ml-0 cursor-pointer desktop:hidden "
>
{!toggled ? (
<Logo className="desktop:hidden" height={24} width={24} />
<Logo className="desktop:hidden -my-2" height={24} width={24} />
) : (
<LogoComponent
show={toggled}

View File

@@ -23,11 +23,8 @@ import { AllUsersResponse } from "./types";
import { Credential } from "./connectors/credentials";
import { SettingsContext } from "@/components/settings/SettingsProvider";
import { Persona, PersonaLabel } from "@/app/admin/assistants/interfaces";
import {
isAnthropic,
LLMProviderDescriptor,
} from "@/app/admin/configuration/llm/interfaces";
import { LLMProviderDescriptor } from "@/app/admin/configuration/llm/interfaces";
import { isAnthropic } from "@/app/admin/configuration/llm/interfaces";
import { getSourceMetadata } from "./sources";
import { AuthType, NEXT_PUBLIC_CLOUD_ENABLED } from "./constants";
import { useUser } from "@/components/user/UserProvider";

View File

@@ -79,18 +79,12 @@ export async function* handleStream<T extends NonEmptyObject>(
}
export async function* handleSSEStream<T extends PacketType>(
streamingResponse: Response,
signal?: AbortSignal
streamingResponse: Response
): AsyncGenerator<T, void, unknown> {
const reader = streamingResponse.body?.getReader();
const decoder = new TextDecoder();
let buffer = "";
if (signal) {
signal.addEventListener("abort", () => {
console.log("aborting");
reader?.cancel();
});
}
while (true) {
const rawChunk = await reader?.read();
if (!rawChunk) {

View File

@@ -21,6 +21,7 @@ module.exports = {
transitionProperty: {
spacing: "margin, padding",
},
keyframes: {
"subtle-pulse": {
"0%, 100%": { opacity: 0.9 },
@@ -147,6 +148,7 @@ module.exports = {
"text-mobile-sidebar": "var(--text-800)",
"background-search-filter": "var(--neutral-100-border-light)",
"background-search-filter-dropdown": "var(--neutral-100-border-light)",
"tw-prose-bold": "var(--text-800)",
"user-bubble": "var(--off-white)",