Compare commits

...

1 Commits

Author SHA1 Message Date
Jamison Lahman
1e69f66705 fix(chat): improve LLM_SOCKET_READ_TIMEOUT user experience 2026-04-17 12:35:02 -07:00
4 changed files with 34 additions and 3 deletions

View File

@@ -93,6 +93,7 @@ from onyx.llm.factory import get_llm_for_persona
from onyx.llm.factory import get_llm_token_counter
from onyx.llm.interfaces import LLM
from onyx.llm.interfaces import LLMUserIdentity
from onyx.llm.multi_llm import LLMTimeoutError
from onyx.llm.override_models import LLMOverride
from onyx.llm.request_context import reset_llm_mock_response
from onyx.llm.request_context import set_llm_mock_response
@@ -1166,6 +1167,32 @@ def _run_models(
else:
if item is _MODEL_DONE:
models_remaining -= 1
elif isinstance(item, LLMTimeoutError):
model_llm = setup.llms[model_idx]
error_msg = (
"The LLM took too long to respond. "
"If you're running a local model, try increasing the "
"LLM_SOCKET_READ_TIMEOUT environment variable "
"(current default: 120 seconds)."
)
stack_trace = "".join(
traceback.format_exception(type(item), item, item.__traceback__)
)
if model_llm.config.api_key and len(model_llm.config.api_key) > 2:
stack_trace = stack_trace.replace(
model_llm.config.api_key, "[REDACTED_API_KEY]"
)
yield StreamingError(
error=error_msg,
stack_trace=stack_trace,
error_code="CONNECTION_ERROR",
is_retryable=True,
details={
"model": model_llm.config.model_name,
"provider": model_llm.config.model_provider,
"model_index": model_idx,
},
)
elif isinstance(item, Exception):
# Yield a tagged error for this model but keep the other models running.
# Do NOT decrement models_remaining — _run_model's finally always posts

View File

@@ -290,7 +290,11 @@ def litellm_exception_to_error_msg(
error_code = "BUDGET_EXCEEDED"
is_retryable = False
elif isinstance(core_exception, Timeout):
error_msg = "Request timed out: The operation took too long to complete. Please try again."
error_msg = (
"The LLM took too long to respond. "
"If you're running a local model, try increasing the "
"LLM_SOCKET_READ_TIMEOUT environment variable (current default: 120 seconds)."
)
error_code = "CONNECTION_ERROR"
is_retryable = True
elif isinstance(core_exception, APIError):

View File

@@ -172,7 +172,7 @@ LOG_ONYX_MODEL_INTERACTIONS=False
## Gen AI Settings
# GEN_AI_MAX_TOKENS=
# LLM_SOCKET_READ_TIMEOUT=
LLM_SOCKET_READ_TIMEOUT=120
# MAX_CHUNKS_FED_TO_CHAT=
# DISABLE_LITELLM_STREAMING=
# LITELLM_EXTRA_HEADERS=

View File

@@ -1262,7 +1262,7 @@ configMap:
S3_FILE_STORE_BUCKET_NAME: ""
# Gen AI Settings
GEN_AI_MAX_TOKENS: ""
LLM_SOCKET_READ_TIMEOUT: "60"
LLM_SOCKET_READ_TIMEOUT: "120"
MAX_CHUNKS_FED_TO_CHAT: ""
# Query Options
DOC_TIME_DECAY: ""