Compare commits

...

6 Commits

Author SHA1 Message Date
Evan Lohn
3a47e3b240 fix: max tokens param (#5174)
* max tokens param

* fix unit test

* fix unit test
2025-08-11 12:16:00 -07:00
Wenxi
cfe8cdb136 fix: sanitize slack payload before logging (#5167)
* sanitize slack payload before logging

* nit
2025-08-11 12:16:00 -07:00
Wenxi Onyx
773acc818a mask llm api key from logs 2025-08-11 12:16:00 -07:00
Wenxi Onyx
1ca3fa9b83 Merge branch 'release/v1.2' into release/v1.3 2025-08-08 18:13:04 -07:00
Wenxi
b6df06e451 add gpt 5 display names (#5175)
(cherry picked from commit 297c2957b4)
2025-08-08 17:51:49 -07:00
Wenxi
037d0cae00 feat: support gpt5 models (#5169)
* support gpt5 models

* gpt5mini visible

(cherry picked from commit cf193dee29)
2025-08-08 17:51:39 -07:00
5 changed files with 55 additions and 10 deletions

View File

@@ -24,6 +24,7 @@ from langchain_core.messages import SystemMessageChunk
from langchain_core.messages.tool import ToolCallChunk
from langchain_core.messages.tool import ToolMessage
from langchain_core.prompt_values import PromptValue
from litellm.utils import get_supported_openai_params
from onyx.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS
from onyx.configs.app_configs import MOCK_LLM_RESPONSE
@@ -52,6 +53,8 @@ litellm.telemetry = False
_LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"
VERTEX_CREDENTIALS_FILE_KWARG = "vertex_credentials"
VERTEX_LOCATION_KWARG = "vertex_location"
LEGACY_MAX_TOKENS_KWARG = "max_tokens"
STANDARD_MAX_TOKENS_KWARG = "max_completion_tokens"
class LLMTimeoutError(Exception):
@@ -313,14 +316,22 @@ class DefaultMultiLLM(LLM):
self._model_kwargs = model_kwargs
def log_model_configs(self) -> None:
logger.debug(f"Config: {self.config}")
self._max_token_param = LEGACY_MAX_TOKENS_KWARG
try:
params = get_supported_openai_params(model_name, model_provider)
if STANDARD_MAX_TOKENS_KWARG in (params or []):
self._max_token_param = STANDARD_MAX_TOKENS_KWARG
except Exception as e:
logger.warning(f"Error getting supported openai params: {e}")
def _safe_model_config(self) -> dict:
dump = self.config.model_dump()
dump["api_key"] = mask_string(dump.get("api_key", ""))
return dump
def log_model_configs(self) -> None:
logger.debug(f"Config: {self._safe_model_config()}")
def _record_call(self, prompt: LanguageModelInput) -> None:
if self._long_term_logger:
self._long_term_logger.record(
@@ -393,11 +404,14 @@ class DefaultMultiLLM(LLM):
messages=processed_prompt,
tools=tools,
tool_choice=tool_choice if tools else None,
max_tokens=max_tokens,
# streaming choice
stream=stream,
# model params
temperature=self._temperature,
temperature=(
1
if self.config.model_name in ["gpt-5", "gpt-5-mini", "gpt-5-nano"]
else self._temperature
),
timeout=timeout_override or self._timeout,
# For now, we don't support parallel tool calls
# NOTE: we can't pass this in if tools are not specified
@@ -422,6 +436,7 @@ class DefaultMultiLLM(LLM):
if structured_response_format
else {}
),
**({self._max_token_param: max_tokens} if max_tokens else {}),
**self._model_kwargs,
)
except Exception as e:

View File

@@ -47,6 +47,9 @@ class WellKnownLLMProviderDescriptor(BaseModel):
OPENAI_PROVIDER_NAME = "openai"
OPEN_AI_MODEL_NAMES = [
"gpt-5",
"gpt-5-mini",
"gpt-5-nano",
"o4-mini",
"o3-mini",
"o1-mini",
@@ -73,7 +76,14 @@ OPEN_AI_MODEL_NAMES = [
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-0301",
]
OPEN_AI_VISIBLE_MODEL_NAMES = ["o1", "o3-mini", "gpt-4o", "gpt-4o-mini"]
OPEN_AI_VISIBLE_MODEL_NAMES = [
"gpt-5",
"gpt-5-mini",
"o1",
"o3-mini",
"gpt-4o",
"gpt-4o-mini",
]
BEDROCK_PROVIDER_NAME = "bedrock"
# need to remove all the weird "bedrock/eu-central-1/anthropic.claude-v1" named

View File

@@ -130,6 +130,10 @@ _SLACK_GREETINGS_TO_IGNORE = {
# This is always (currently) the user id of Slack's official slackbot
_OFFICIAL_SLACKBOT_USER_ID = "USLACKBOT"
# Fields to exclude from Slack payload logging
# Intention is to not log slack message content
_EXCLUDED_SLACK_PAYLOAD_FIELDS = {"text", "blocks"}
class SlackbotHandler:
def __init__(self) -> None:
@@ -570,6 +574,20 @@ class SlackbotHandler:
sys.exit(0)
def sanitize_slack_payload(payload: dict) -> dict:
"""Remove message content from Slack payload for logging"""
sanitized = {
k: v for k, v in payload.items() if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS
}
if "event" in sanitized and isinstance(sanitized["event"], dict):
sanitized["event"] = {
k: v
for k, v in sanitized["event"].items()
if k not in _EXCLUDED_SLACK_PAYLOAD_FIELDS
}
return sanitized
def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -> bool:
"""True to keep going, False to ignore this Slack request"""
@@ -762,7 +780,10 @@ def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -
if not check_message_limit():
return False
logger.debug(f"Handling Slack request: {client.bot_name=} '{req.payload=}'")
# Don't log Slack message content
logger.debug(
f"Handling Slack request: {client.bot_name=} '{sanitize_slack_payload(req.payload)=}'"
)
return True
@@ -928,10 +949,9 @@ def process_message(
if req.type == "events_api":
event = cast(dict[str, Any], req.payload["event"])
event_type = event.get("type")
msg = cast(str, event.get("text", ""))
logger.info(
f"process_message start: {tenant_id=} {req.type=} {req.envelope_id=} "
f"{event_type=} {msg=}"
f"{event_type=}"
)
else:
logger.info(

View File

@@ -148,7 +148,6 @@ def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None:
],
tools=tools,
tool_choice=None,
max_tokens=None,
stream=False,
temperature=0.0, # Default value from GEN_AI_TEMPERATURE
timeout=30,
@@ -294,7 +293,6 @@ def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> No
],
tools=tools,
tool_choice=None,
max_tokens=None,
stream=True,
temperature=0.0, # Default value from GEN_AI_TEMPERATURE
timeout=30,

View File

@@ -679,6 +679,8 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = {
"o1-mini": "o1 Mini",
"o1-preview": "o1 Preview",
o1: "o1",
"gpt-5": "GPT 5",
"gpt-5-mini": "GPT 5 Mini",
"gpt-4.1": "GPT 4.1",
"gpt-4": "GPT 4",
"gpt-4o": "GPT 4o",