mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-08 08:22:42 +00:00
Compare commits
16 Commits
cli/v0.2.1
...
temp/pr-60
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b030251501 | ||
|
|
5338931d5a | ||
|
|
71794f75aa | ||
|
|
86ff83b3ba | ||
|
|
481b0c6d8f | ||
|
|
e73000edcf | ||
|
|
33ae6a46f1 | ||
|
|
745d752069 | ||
|
|
b0169a6186 | ||
|
|
eef172746d | ||
|
|
b05f37a15c | ||
|
|
d8f18ca4c5 | ||
|
|
e396ace839 | ||
|
|
4cebafad51 | ||
|
|
b43fe8a4c4 | ||
|
|
b4bd110477 |
@@ -68,6 +68,9 @@ from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
|
||||
from onyx.server.query_and_chat.models import CreateChatMessageRequest
|
||||
from onyx.server.query_and_chat.models import OptionalSearchSetting
|
||||
from onyx.server.query_and_chat.models import SendMessageRequest
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
|
||||
from onyx.server.query_and_chat.streaming_models import AgentResponseStart
|
||||
from onyx.server.query_and_chat.streaming_models import CitationInfo
|
||||
@@ -331,6 +334,30 @@ def handle_stream_message_objects(
|
||||
user_id=llm_user_identifier, session_id=str(chat_session.id)
|
||||
)
|
||||
|
||||
# Question Qualification Check - Block sensitive questions early
|
||||
# Only check new messages (SendMessageRequest always has a new message)
|
||||
if message_text:
|
||||
try:
|
||||
qualification_service = QuestionQualificationService()
|
||||
qualification_result = qualification_service.qualify_question(
|
||||
message_text, db_session
|
||||
)
|
||||
|
||||
if qualification_result.is_blocked:
|
||||
logger.info(
|
||||
f"Question blocked by qualification service: "
|
||||
f"confidence={qualification_result.similarity_score:.3f}, "
|
||||
f"matched_index={qualification_result.matched_question_index}"
|
||||
)
|
||||
|
||||
# Return error immediately - don't create chat messages
|
||||
yield StreamingError(error=qualification_result.standard_response)
|
||||
return # Exit early, question is blocked
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Question qualification check failed: {e}")
|
||||
# Continue with normal processing if qualification fails
|
||||
|
||||
# permanent "log" store, used primarily for debugging
|
||||
long_term_logger = LongTermLogger(
|
||||
metadata={"user_id": str(user_id), "chat_session_id": str(chat_session.id)}
|
||||
@@ -842,9 +869,25 @@ def gather_stream(
|
||||
elif isinstance(packet, MessageResponseIDInfo):
|
||||
message_id = packet.reserved_assistant_message_id
|
||||
|
||||
if message_id is None:
|
||||
# Only require message_id when there's no error
|
||||
# When a question is blocked early (e.g., by qualification service),
|
||||
# we may return an error without creating a message
|
||||
if message_id is None and error_msg is None:
|
||||
raise ValueError("Message ID is required")
|
||||
|
||||
# If there's an error (e.g., question blocked), return response with error
|
||||
if error_msg or message_id is None:
|
||||
# For blocked questions, we may not have a message_id or answer
|
||||
# Use a default message_id of 0 if not set (shouldn't happen, but safe fallback)
|
||||
return ChatBasicResponse(
|
||||
answer="",
|
||||
answer_citationless="",
|
||||
citation_info=[],
|
||||
message_id=message_id if message_id is not None else 0,
|
||||
error_msg=error_msg,
|
||||
top_documents=[],
|
||||
)
|
||||
|
||||
if answer is None:
|
||||
# This should never be the case as these non-streamed flows do not have a stop-generation signal
|
||||
raise RuntimeError("Answer was not generated")
|
||||
|
||||
142
backend/onyx/configs/README_question_qualification.md
Normal file
142
backend/onyx/configs/README_question_qualification.md
Normal file
@@ -0,0 +1,142 @@
|
||||
# Question Qualification System
|
||||
|
||||
The Question Qualification System allows you to automatically block certain types of questions and return standard responses instead of processing them through the LLM. This is useful for:
|
||||
|
||||
- 🔒 **Privacy Protection**: Block requests for personal information
|
||||
- 🛡️ **Security**: Prevent inappropriate or malicious queries
|
||||
- 📋 **Compliance**: Ensure questions stay within allowed topics
|
||||
- ⚡ **Performance**: Fast responses without full LLM processing
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **LLM-Based Semantic Matching**: Uses a fast LLM to evaluate semantic similarity between user questions and blocked questions
|
||||
2. **Confidence Threshold**: Configurable sensitivity (0.0 to 1.0) - questions are blocked if the LLM's confidence score meets or exceeds the threshold
|
||||
3. **Standard Response**: Single response for all blocked questions
|
||||
4. **Universal Coverage**: Works across all assistants and search methods
|
||||
5. **Real-Time Evaluation**: Questions are evaluated in real-time without caching
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variable Control
|
||||
|
||||
The question qualification system is controlled by a single environment variable:
|
||||
|
||||
**`ENABLE_QUESTION_QUALIFICATION`** - Master control switch
|
||||
|
||||
```bash
|
||||
# Enable question qualification
|
||||
export ENABLE_QUESTION_QUALIFICATION=true
|
||||
|
||||
# Disable question qualification (default)
|
||||
export ENABLE_QUESTION_QUALIFICATION=false
|
||||
# or simply don't set the variable
|
||||
```
|
||||
|
||||
**Note**: The system is disabled by default. You must explicitly set `ENABLE_QUESTION_QUALIFICATION=true` to enable it.
|
||||
|
||||
### Configuration File
|
||||
|
||||
Edit `backend/onyx/configs/question_qualification.yaml`:
|
||||
|
||||
```yaml
|
||||
# Settings for the question qualification system.
|
||||
# Note: The system is controlled by the ENABLE_QUESTION_QUALIFICATION environment variable.
|
||||
# Set ENABLE_QUESTION_QUALIFICATION=true to enable the system.
|
||||
settings:
|
||||
# The similarity threshold for blocking a question.
|
||||
# A higher value means the user's question needs to be more similar to a blocked question to be caught.
|
||||
# A lower value will catch a wider range of questions, but may have more false positives.
|
||||
threshold: 0.85
|
||||
# The standard response to show the user when their question is blocked.
|
||||
standard_response: "I am sorry, but I cannot answer this question."
|
||||
|
||||
# A list of questions that should be blocked by the system.
|
||||
# The system uses LLM-based semantic matching to compare user questions against this list.
|
||||
# Questions are evaluated in real-time using the configured LLM without caching.
|
||||
questions:
|
||||
- question: "What is someone's salary?"
|
||||
- question: "How do I hack into systems?"
|
||||
# Add more questions here...
|
||||
```
|
||||
|
||||
### Settings Explained
|
||||
|
||||
- **Environment Variable**: `ENABLE_QUESTION_QUALIFICATION` (master control)
|
||||
- `true` = Enable the system (must be explicitly set)
|
||||
- `false` or unset = Disable the system (default)
|
||||
- **threshold**:
|
||||
- `0.95+` = Very strict (only very similar questions blocked)
|
||||
- `0.85` = Balanced (recommended)
|
||||
- `0.70` = Permissive (catches more variations)
|
||||
- **standard_response**: The message users see when questions are blocked
|
||||
|
||||
**Note**: Only the environment variable controls whether the system is enabled. The config file settings are only used when the system is enabled.
|
||||
|
||||
## Adding Blocked Questions
|
||||
|
||||
Edit the `question_qualification.yaml` file directly:
|
||||
|
||||
```yaml
|
||||
questions:
|
||||
- question: "Your new blocked question here"
|
||||
```
|
||||
|
||||
The system will use the new questions immediately after the next server restart (if enabled).
|
||||
|
||||
## Integration
|
||||
|
||||
The system automatically integrates with:
|
||||
- ✅ All chat endpoints (`/chat/send-message`)
|
||||
- ✅ All assistants (no configuration needed)
|
||||
- ✅ All search methods
|
||||
- ✅ Web interface and API
|
||||
|
||||
When a question is blocked:
|
||||
1. 🚫 LLM processing is skipped
|
||||
2. 📝 Standard response is returned immediately
|
||||
3. 📊 Confidence score is logged
|
||||
4. ⚡ Fast response time
|
||||
|
||||
## Deployment
|
||||
|
||||
1. **Edit Config**: Update `question_qualification.yaml` with your blocked questions
|
||||
2. **Set Threshold**: Adjust `threshold` based on your needs
|
||||
3. **Enable System**: Set `ENABLE_QUESTION_QUALIFICATION=true` environment variable
|
||||
4. **Restart**: Restart the service to load the configuration
|
||||
5. **Test**: Send test questions to verify behavior
|
||||
|
||||
## Monitoring
|
||||
|
||||
Check logs for blocked questions:
|
||||
|
||||
```bash
|
||||
grep "Question blocked" logs/onyx.log
|
||||
```
|
||||
|
||||
Look for entries like:
|
||||
```
|
||||
Question qualification: blocked=True, confidence=0.891, threshold=0.85
|
||||
Question blocked by LLM analysis: confidence 0.891 >= 0.85
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**System not working?**
|
||||
- ✅ Check `ENABLE_QUESTION_QUALIFICATION=true` environment variable is set
|
||||
- ✅ Verify config file path exists
|
||||
- ✅ Ensure LLM is configured and accessible
|
||||
- ✅ Check logs for errors
|
||||
|
||||
**Too many questions blocked?**
|
||||
- 🔧 Increase `threshold` (e.g., 0.80 → 0.90)
|
||||
- 🔧 Remove overly broad blocked questions
|
||||
- 🔧 Use more specific blocked questions
|
||||
|
||||
**Not catching variations?**
|
||||
- 🔧 Add more example blocked questions
|
||||
- 🔧 Decrease `threshold` (e.g., 0.90 → 0.80)
|
||||
- 🔧 Test with different question phrasings
|
||||
|
||||
## Security Note
|
||||
|
||||
This system provides a first line of defense but should not be the only security measure. Always implement proper access controls and monitoring for sensitive data.
|
||||
@@ -36,6 +36,21 @@ GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int(
|
||||
os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400
|
||||
) # 1 day
|
||||
|
||||
# Controls whether the question qualification system is enabled to block sensitive questions (disabled by default)
|
||||
ENABLE_QUESTION_QUALIFICATION = (
|
||||
os.environ.get("ENABLE_QUESTION_QUALIFICATION", "false").lower() == "true"
|
||||
)
|
||||
# Optional: specify a fast/cheap model for question qualification to avoid using expensive thinking models.
|
||||
# IMPORTANT: This model name MUST be available from the DEFAULT LLM provider configured in admin settings.
|
||||
# The question qualification system always uses the default provider's API credentials, so the model
|
||||
# specified here must be accessible through that provider. For example:
|
||||
# - If your default provider is OpenAI, use models like "gpt-4o-mini", "gpt-3.5-turbo", "gemini-flash-lite-latest"
|
||||
# - If your default provider is Anthropic, use models like "claude-3-haiku-20240307"
|
||||
# - If your default provider is Azure OpenAI, use your deployed model name
|
||||
# This runs independently of the user's selected chat model - it's a system-level check.
|
||||
# If not set, falls back to the default provider's default model.
|
||||
QUESTION_QUALIFICATION_MODEL = os.environ.get("QUESTION_QUALIFICATION_MODEL", "")
|
||||
|
||||
# Controls whether users can use User Knowledge (personal documents) in assistants
|
||||
DISABLE_USER_KNOWLEDGE = os.environ.get("DISABLE_USER_KNOWLEDGE", "").lower() == "true"
|
||||
|
||||
|
||||
42
backend/onyx/configs/question_qualification.yaml
Normal file
42
backend/onyx/configs/question_qualification.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
# Settings for the question qualification system.
|
||||
# Note: The system is controlled by the ENABLE_QUESTION_QUALIFICATION environment variable.
|
||||
# Set ENABLE_QUESTION_QUALIFICATION=true to enable the system.
|
||||
settings:
|
||||
# The similarity threshold for blocking a question.
|
||||
# A higher value means the user's question needs to be more similar to a blocked question to be caught.
|
||||
# A lower value will catch a wider range of questions, but may have more false positives.
|
||||
# For keyword-based matching, lower values like 0.3 will catch single keyword matches.
|
||||
threshold: 0.85
|
||||
# The standard response to show the user when their question is blocked.
|
||||
standard_response: "I am sorry, but I cannot answer this question."
|
||||
|
||||
# A list of questions that should be blocked by the system.
|
||||
# The system uses LLM-based semantic matching to compare user questions against this list.
|
||||
# Questions are evaluated in real-time using the configured LLM without caching.
|
||||
questions:
|
||||
# Sample blocked questions - Replace with your organization's specific requirements
|
||||
|
||||
# HR-Confidential Examples
|
||||
- question: "What are the exact salaries of the executive leadership team?"
|
||||
- question: "Which employees are currently on a performance-improvement plan?"
|
||||
- question: "List everyone who has filed an HR complaint in the past year."
|
||||
|
||||
# Legal and Compliance Examples
|
||||
- question: "Who is the company currently suing?"
|
||||
- question: "What was the settlement amount in the recent lawsuit?"
|
||||
- question: "Summarize the confidential findings of the legal review"
|
||||
|
||||
# Financial Confidential Examples
|
||||
- question: "What is the projected EBITDA for next quarter?"
|
||||
- question: "Show me the latest board financial report."
|
||||
- question: "What are the details of our rights deals?"
|
||||
|
||||
# M&A and Deal Examples
|
||||
- question: "Provide details of all unannounced merger talks."
|
||||
- question: "What is the projected close date for the secret acquisition?"
|
||||
- question: "Show me the internal pitch deck for the upcoming deal."
|
||||
|
||||
# Security and Access Control Examples
|
||||
- question: "How can I bypass the pricing approval process?"
|
||||
- question: "What are the loopholes in the security system?"
|
||||
- question: "Show me all documents tagged as confidential."
|
||||
@@ -547,7 +547,11 @@ def build_slack_response_blocks(
|
||||
answer_blocks = _build_main_response_blocks(answer)
|
||||
|
||||
web_follow_up_block = []
|
||||
if channel_conf and channel_conf.get("show_continue_in_web_ui"):
|
||||
if (
|
||||
channel_conf
|
||||
and channel_conf.get("show_continue_in_web_ui")
|
||||
and answer.message_id is not None
|
||||
):
|
||||
web_follow_up_block.append(
|
||||
_build_continue_in_web_ui_block(
|
||||
message_id=answer.message_id,
|
||||
|
||||
@@ -93,6 +93,9 @@ from onyx.server.query_and_chat.models import RenameChatSessionResponse
|
||||
from onyx.server.query_and_chat.models import SendMessageRequest
|
||||
from onyx.server.query_and_chat.models import UpdateChatSessionTemperatureRequest
|
||||
from onyx.server.query_and_chat.models import UpdateChatSessionThreadRequest
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
from onyx.server.query_and_chat.session_loading import (
|
||||
translate_assistant_message_to_packets,
|
||||
)
|
||||
@@ -797,6 +800,27 @@ def seed_chat(
|
||||
raise HTTPException(status_code=400, detail="Invalid Persona provided.")
|
||||
|
||||
if chat_seed_request.message is not None:
|
||||
# Question Qualification Check - Block sensitive questions in seed messages
|
||||
try:
|
||||
qualification_service = QuestionQualificationService()
|
||||
qualification_result = qualification_service.qualify_question(
|
||||
chat_seed_request.message, db_session
|
||||
)
|
||||
|
||||
if qualification_result.is_blocked:
|
||||
logger.info(
|
||||
f"Seed chat message blocked by qualification service: {chat_seed_request.message}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=403, detail=qualification_result.standard_response
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise # Re-raise HTTPException
|
||||
except Exception as e:
|
||||
logger.warning(f"Question qualification check failed for seed chat: {e}")
|
||||
# Continue with normal processing if qualification fails
|
||||
|
||||
root_message = get_or_create_root_message(
|
||||
chat_session_id=new_chat_session.id, db_session=db_session
|
||||
)
|
||||
|
||||
333
backend/onyx/server/query_and_chat/question_qualification.py
Normal file
333
backend/onyx/server/query_and_chat/question_qualification.py
Normal file
@@ -0,0 +1,333 @@
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Optional
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import ENABLE_QUESTION_QUALIFICATION
|
||||
from onyx.configs.app_configs import QUESTION_QUALIFICATION_MODEL
|
||||
from onyx.db.engine.sql_engine import get_session_with_current_tenant
|
||||
from onyx.db.llm import fetch_default_provider
|
||||
from onyx.llm.factory import get_default_llm
|
||||
from onyx.llm.factory import llm_from_provider
|
||||
from onyx.llm.interfaces import LLM
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QuestionQualificationResponse(BaseModel):
|
||||
"""Pydantic model for structured LLM response."""
|
||||
|
||||
block_confidence: float = Field(
|
||||
description=(
|
||||
"Confidence score between 0.0 and 1.0 indicating how confident "
|
||||
"the model is that the question should be blocked. "
|
||||
"0.0 means should not block, 1.0 means should block."
|
||||
),
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
)
|
||||
matched_index: int = Field(
|
||||
description="Index of matched blocked question, -1 if no match", ge=-1
|
||||
)
|
||||
|
||||
|
||||
class QuestionQualificationResult:
|
||||
def __init__(
|
||||
self,
|
||||
is_blocked: bool,
|
||||
similarity_score: float = 0.0,
|
||||
standard_response: str = "",
|
||||
matched_question: str = "",
|
||||
matched_question_index: int = -1,
|
||||
reasoning: str = "",
|
||||
):
|
||||
self.is_blocked = is_blocked
|
||||
self.similarity_score = similarity_score
|
||||
self.standard_response = standard_response
|
||||
self.matched_question = matched_question
|
||||
self.matched_question_index = matched_question_index
|
||||
self.reasoning = reasoning
|
||||
|
||||
|
||||
# Minimal task-focused prompt for question qualification
|
||||
QUESTION_QUALIFICATION_PROMPT = """Analyze if the user question asks about any blocked topic.
|
||||
|
||||
BLOCKED QUESTIONS:
|
||||
{blocked_questions}
|
||||
|
||||
USER QUESTION: {user_question}
|
||||
|
||||
Determine semantic similarity between the user question and blocked questions. Consider variations in wording and phrasing.
|
||||
|
||||
Return a JSON object with exactly these fields:
|
||||
- "block_confidence": a number between 0.0 and 1.0 indicating how confident you are
|
||||
that the question should be blocked (0.0 = not block, 1.0 = definitely block)
|
||||
- "matched_index": the index number of the matched blocked question, or -1 if no match
|
||||
|
||||
Example response: {{"block_confidence": 0.85, "matched_index": 2}}"""
|
||||
|
||||
|
||||
class QuestionQualificationService:
|
||||
_instance: Optional["QuestionQualificationService"] = None
|
||||
_initialized = False
|
||||
|
||||
def __new__(cls) -> "QuestionQualificationService":
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self) -> None:
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
# Configuration
|
||||
self.config_path = (
|
||||
Path(__file__).parent / "../../configs/question_qualification.yaml"
|
||||
)
|
||||
self.threshold = 0.85 # Now used as confidence threshold
|
||||
self.standard_response = (
|
||||
"I’m sorry, but I can’t answer this request due to policy restrictions."
|
||||
)
|
||||
|
||||
# Store questions
|
||||
self.questions: list[str] = []
|
||||
|
||||
# Track if config has been loaded
|
||||
self._config_loaded = False
|
||||
|
||||
# Load configuration only if enabled
|
||||
if ENABLE_QUESTION_QUALIFICATION:
|
||||
self._load_config()
|
||||
|
||||
# Mark as initialized so subsequent __init__ calls don't reset state
|
||||
self._initialized = True
|
||||
|
||||
def _load_config(self) -> bool:
|
||||
"""Load configuration from YAML file."""
|
||||
if self._config_loaded:
|
||||
return True
|
||||
try:
|
||||
if not self.config_path.exists():
|
||||
logger.warning(
|
||||
f"Question qualification config file not found: {self.config_path}"
|
||||
)
|
||||
self._config_loaded = True
|
||||
return False
|
||||
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
if not config:
|
||||
self._config_loaded = True
|
||||
return False
|
||||
|
||||
# Load settings
|
||||
settings = config.get("settings", {})
|
||||
self.threshold = settings.get("threshold", 0.85)
|
||||
self.standard_response = settings.get(
|
||||
"standard_response", "I am sorry, but I cannot answer this question."
|
||||
)
|
||||
|
||||
# Load questions
|
||||
questions_config = config.get("questions", [])
|
||||
self.questions = []
|
||||
|
||||
for q_config in questions_config:
|
||||
if isinstance(q_config, dict) and "question" in q_config:
|
||||
self.questions.append(q_config["question"])
|
||||
elif isinstance(q_config, str):
|
||||
self.questions.append(q_config)
|
||||
|
||||
logger.info(
|
||||
f"Question qualification service initialized with {len(self.questions)} questions, "
|
||||
f"threshold={self.threshold}, env_enabled={ENABLE_QUESTION_QUALIFICATION}"
|
||||
)
|
||||
self._config_loaded = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading question qualification config: {e}")
|
||||
self._config_loaded = True # Mark as loaded to avoid repeated attempts
|
||||
return False
|
||||
|
||||
def _get_llm_for_qualification(self) -> LLM | None:
|
||||
"""Get LLM for question qualification.
|
||||
|
||||
This method returns an LLM for running question qualification checks. The LLM used
|
||||
is independent of the user's chat session model - question qualification always uses
|
||||
a system-configured model to ensure consistent behavior.
|
||||
|
||||
Configuration priority:
|
||||
1. QUESTION_QUALIFICATION_MODEL env var - if set, uses this model name with the
|
||||
DEFAULT provider's credentials. The model must be available from that provider.
|
||||
Recommended for fast/cheap models (e.g., gpt-4o-mini, claude-3-haiku).
|
||||
2. Falls back to the default provider's default model if env var is not set.
|
||||
|
||||
Returns None if LLM initialization fails entirely.
|
||||
"""
|
||||
try:
|
||||
# If a specific fast model is configured, use it with the default provider.
|
||||
# NOTE: The model name MUST be available from the default provider since we use
|
||||
# that provider's API credentials. This is independent of user's chat model.
|
||||
if QUESTION_QUALIFICATION_MODEL:
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
llm_provider = fetch_default_provider(db_session)
|
||||
if not llm_provider:
|
||||
logger.warning(
|
||||
"No default LLM provider found, cannot use QUESTION_QUALIFICATION_MODEL"
|
||||
)
|
||||
return None
|
||||
logger.debug(
|
||||
f"Using configured fast model for question qualification: "
|
||||
f"{QUESTION_QUALIFICATION_MODEL} via provider '{llm_provider.name}'"
|
||||
)
|
||||
return llm_from_provider(
|
||||
model_name=QUESTION_QUALIFICATION_MODEL,
|
||||
llm_provider=llm_provider,
|
||||
)
|
||||
# Fall back to default LLM (default provider's default model)
|
||||
return get_default_llm()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get LLM for question qualification: {e}")
|
||||
return None
|
||||
|
||||
def is_enabled(self) -> bool:
|
||||
"""Check if question qualification is enabled by environment variable."""
|
||||
return ENABLE_QUESTION_QUALIFICATION
|
||||
|
||||
def qualify_question(
|
||||
self, question: str, db_session: Session
|
||||
) -> QuestionQualificationResult:
|
||||
"""
|
||||
Check if a question should be blocked using fast LLM with structured JSON output.
|
||||
"""
|
||||
# Check environment variable
|
||||
if not ENABLE_QUESTION_QUALIFICATION:
|
||||
logger.debug("Question qualification disabled by environment variable")
|
||||
return QuestionQualificationResult(is_blocked=False, similarity_score=0.0)
|
||||
|
||||
# Lazy-load config if not already loaded
|
||||
if not self._config_loaded:
|
||||
self._load_config()
|
||||
|
||||
try:
|
||||
logger.info(f"Question qualification: question = {question}")
|
||||
|
||||
if not self.questions:
|
||||
logger.warning("No blocked questions loaded")
|
||||
return QuestionQualificationResult(
|
||||
is_blocked=False, similarity_score=0.0
|
||||
)
|
||||
|
||||
# Get LLM fresh each call to handle admin config changes
|
||||
llm = self._get_llm_for_qualification()
|
||||
if llm is None:
|
||||
logger.warning("No LLM available, question qualification skipped")
|
||||
return QuestionQualificationResult(
|
||||
is_blocked=False, similarity_score=0.0
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Using LLM: {llm.config.model_name} ({llm.config.model_provider})"
|
||||
)
|
||||
|
||||
# Format blocked questions with indices
|
||||
blocked_questions_text = "\n".join(
|
||||
f"{i}: {q}" for i, q in enumerate(self.questions)
|
||||
)
|
||||
|
||||
# Create structured response format schema from Pydantic model
|
||||
structured_response_format = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "QuestionQualificationResponse",
|
||||
"schema": QuestionQualificationResponse.model_json_schema(),
|
||||
"strict": True,
|
||||
},
|
||||
}
|
||||
|
||||
# Create minimal task-focused prompt
|
||||
prompt = QUESTION_QUALIFICATION_PROMPT.format(
|
||||
blocked_questions=blocked_questions_text,
|
||||
user_question=question,
|
||||
)
|
||||
|
||||
# Get response using structured outputs
|
||||
response = llm.invoke(
|
||||
prompt,
|
||||
structured_response_format=structured_response_format,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
# Parse the JSON response
|
||||
try:
|
||||
response_text = response.choice.message.content or ""
|
||||
# Try to extract JSON from the response
|
||||
parsed_data = json.loads(response_text)
|
||||
|
||||
block_confidence = float(parsed_data.get("block_confidence", 0.0))
|
||||
matched_index = int(parsed_data.get("matched_index", -1))
|
||||
|
||||
# Get matched question if available
|
||||
matched_question = ""
|
||||
if matched_index >= 0 and matched_index < len(self.questions):
|
||||
matched_question = self.questions[matched_index]
|
||||
|
||||
# Log detailed information including LLM used
|
||||
logger.info(
|
||||
f"Question qualification: block_confidence={block_confidence:.3f}, "
|
||||
f"threshold={self.threshold} | "
|
||||
f"LLM: {llm.config.model_name}"
|
||||
)
|
||||
if matched_question:
|
||||
logger.info(
|
||||
f"Matched blocked question (index {matched_index}): '{matched_question[:100]}...'"
|
||||
)
|
||||
|
||||
# Apply threshold
|
||||
final_blocked = block_confidence >= self.threshold
|
||||
|
||||
if final_blocked:
|
||||
logger.info(
|
||||
f"Question blocked by LLM analysis: block_confidence {block_confidence:.3f} >= {self.threshold}"
|
||||
)
|
||||
|
||||
standard_response = self.standard_response if final_blocked else ""
|
||||
return QuestionQualificationResult(
|
||||
is_blocked=final_blocked,
|
||||
similarity_score=block_confidence,
|
||||
standard_response=standard_response,
|
||||
matched_question=matched_question,
|
||||
matched_question_index=matched_index,
|
||||
reasoning="",
|
||||
)
|
||||
|
||||
except (json.JSONDecodeError, KeyError, ValueError) as e:
|
||||
logger.error(
|
||||
f"Error parsing JSON response: {e}, response: {response.choice.message.content}"
|
||||
)
|
||||
# Fallback to safe default
|
||||
return QuestionQualificationResult(
|
||||
is_blocked=False, similarity_score=0.0
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in question qualification: {e}")
|
||||
# On error, allow the question through to avoid blocking legitimate queries
|
||||
return QuestionQualificationResult(is_blocked=False, similarity_score=0.0)
|
||||
|
||||
def get_stats(self) -> dict[str, Any]:
|
||||
"""Get statistics about the question qualification service."""
|
||||
return {
|
||||
"enabled": ENABLE_QUESTION_QUALIFICATION,
|
||||
"num_blocked_questions": len(self.questions),
|
||||
"threshold": self.threshold,
|
||||
"standard_response": self.standard_response,
|
||||
}
|
||||
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Unit tests for Question Qualification Service
|
||||
|
||||
Tests cover:
|
||||
- Lazy loading when ENABLE_QUESTION_QUALIFICATION is disabled
|
||||
- Config loading only when enabled
|
||||
- HTTPException propagation in query_backend
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections.abc import Generator
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
# Mock heavy dependencies before importing the module under test
|
||||
# This is necessary because question_qualification.py imports from onyx.llm.factory
|
||||
# which has a large dependency chain
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_heavy_dependencies() -> Generator[None, None, None]:
|
||||
"""Mock heavy dependencies before any imports."""
|
||||
# Store original modules
|
||||
original_modules = {}
|
||||
modules_to_mock = [
|
||||
"onyx.llm.factory",
|
||||
"onyx.llm.interfaces",
|
||||
"onyx.db.models",
|
||||
"onyx.chat.models",
|
||||
"onyx.context.search.models",
|
||||
"sqlalchemy",
|
||||
"sqlalchemy.orm",
|
||||
]
|
||||
|
||||
for module_name in modules_to_mock:
|
||||
if module_name in sys.modules:
|
||||
original_modules[module_name] = sys.modules[module_name]
|
||||
sys.modules[module_name] = MagicMock()
|
||||
|
||||
# Create mock LLM
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.config.model_name = "test-model"
|
||||
mock_llm.config.model_provider = "test-provider"
|
||||
|
||||
yield
|
||||
|
||||
# Restore original modules
|
||||
for module_name in modules_to_mock:
|
||||
if module_name in original_modules:
|
||||
sys.modules[module_name] = original_modules[module_name]
|
||||
else:
|
||||
sys.modules.pop(module_name, None)
|
||||
|
||||
# Clear the question_qualification module so it can be re-imported fresh
|
||||
sys.modules.pop("onyx.server.query_and_chat.question_qualification", None)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_singleton(mock_heavy_dependencies: None) -> Generator[None, None, None]:
|
||||
"""Reset singleton state before each test."""
|
||||
# Need to import after mocking
|
||||
yield
|
||||
# Cleanup after test - re-import to get fresh module
|
||||
if "onyx.server.query_and_chat.question_qualification" in sys.modules:
|
||||
module = sys.modules["onyx.server.query_and_chat.question_qualification"]
|
||||
if hasattr(module, "QuestionQualificationService"):
|
||||
module.QuestionQualificationService._instance = None
|
||||
module.QuestionQualificationService._initialized = False
|
||||
|
||||
|
||||
class TestQuestionQualificationService:
|
||||
"""Test QuestionQualificationService behavior."""
|
||||
|
||||
def test_singleton_pattern(self) -> None:
|
||||
"""Test that service is a singleton."""
|
||||
# Import after mocking
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
|
||||
# Reset singleton state for this test
|
||||
QuestionQualificationService._instance = None
|
||||
QuestionQualificationService._initialized = False
|
||||
|
||||
service1 = QuestionQualificationService()
|
||||
service2 = QuestionQualificationService()
|
||||
assert service1 is service2
|
||||
|
||||
@patch(
|
||||
"onyx.server.query_and_chat.question_qualification.ENABLE_QUESTION_QUALIFICATION",
|
||||
False,
|
||||
)
|
||||
def test_no_config_loading_when_disabled(self) -> None:
|
||||
"""Test that config is not loaded when ENABLE_QUESTION_QUALIFICATION is False."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
|
||||
# Reset singleton state for this test
|
||||
QuestionQualificationService._instance = None
|
||||
QuestionQualificationService._initialized = False
|
||||
|
||||
service = QuestionQualificationService()
|
||||
|
||||
# Config should not be loaded when disabled
|
||||
assert not service._config_loaded
|
||||
assert service.questions == []
|
||||
|
||||
@patch(
|
||||
"onyx.server.query_and_chat.question_qualification.ENABLE_QUESTION_QUALIFICATION",
|
||||
False,
|
||||
)
|
||||
def test_qualify_question_returns_not_blocked_when_disabled(self) -> None:
|
||||
"""Test that qualify_question returns not blocked when feature is disabled."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
|
||||
# Reset singleton state for this test
|
||||
QuestionQualificationService._instance = None
|
||||
QuestionQualificationService._initialized = False
|
||||
|
||||
service = QuestionQualificationService()
|
||||
mock_db_session = MagicMock()
|
||||
|
||||
result = service.qualify_question("What is someone's salary?", mock_db_session)
|
||||
|
||||
assert not result.is_blocked
|
||||
assert result.similarity_score == 0.0
|
||||
|
||||
@patch(
|
||||
"onyx.server.query_and_chat.question_qualification.ENABLE_QUESTION_QUALIFICATION",
|
||||
True,
|
||||
)
|
||||
@patch(
|
||||
"onyx.server.query_and_chat.question_qualification.QuestionQualificationService._load_config"
|
||||
)
|
||||
def test_config_loading_when_enabled(self, mock_load_config: MagicMock) -> None:
|
||||
"""Test that config loads when enabled."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
|
||||
# Reset singleton state for this test
|
||||
QuestionQualificationService._instance = None
|
||||
QuestionQualificationService._initialized = False
|
||||
|
||||
mock_load_config.return_value = True
|
||||
service = QuestionQualificationService()
|
||||
|
||||
# When enabled, _load_config should be called during init
|
||||
assert service is not None
|
||||
|
||||
def test_get_stats(self) -> None:
|
||||
"""Test get_stats method."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationService,
|
||||
)
|
||||
|
||||
# Reset singleton state for this test
|
||||
QuestionQualificationService._instance = None
|
||||
QuestionQualificationService._initialized = False
|
||||
|
||||
service = QuestionQualificationService()
|
||||
stats = service.get_stats()
|
||||
|
||||
assert "enabled" in stats
|
||||
assert "num_blocked_questions" in stats
|
||||
assert "threshold" in stats
|
||||
assert "standard_response" in stats
|
||||
|
||||
|
||||
class TestHTTPExceptionPropagation:
|
||||
"""Test HTTPException propagation in query_backend."""
|
||||
|
||||
def test_http_exception_re_raised_in_get_answer_with_citation(self) -> None:
|
||||
"""Test that HTTPException is re-raised in get_answer_with_citation.
|
||||
|
||||
This test verifies the pattern used in the fix - HTTPException should be
|
||||
caught separately and re-raised to preserve status codes.
|
||||
"""
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
raise HTTPException(status_code=403, detail="Blocked query")
|
||||
assert exc_info.value.status_code == 403
|
||||
assert exc_info.value.detail == "Blocked query"
|
||||
|
||||
def test_http_exception_re_raised_in_stream_answer_with_citation(self) -> None:
|
||||
"""Test that HTTPException is re-raised in stream_answer_with_citation.
|
||||
|
||||
This test verifies the pattern used in the fix - HTTPException should be
|
||||
caught before creating StreamingResponse to preserve status codes.
|
||||
"""
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
raise HTTPException(status_code=403, detail="Blocked query")
|
||||
assert exc_info.value.status_code == 403
|
||||
assert exc_info.value.detail == "Blocked query"
|
||||
|
||||
def test_http_exception_vs_generic_exception(self) -> None:
|
||||
"""Test that HTTPException is distinct from generic Exception."""
|
||||
# Verify HTTPException is a subclass of Exception
|
||||
assert issubclass(HTTPException, Exception)
|
||||
|
||||
# But we can catch it specifically
|
||||
try:
|
||||
raise HTTPException(status_code=403, detail="Test")
|
||||
except HTTPException as e:
|
||||
assert e.status_code == 403
|
||||
except Exception:
|
||||
pytest.fail(
|
||||
"HTTPException should be caught by HTTPException handler, not generic Exception"
|
||||
)
|
||||
|
||||
|
||||
class TestQuestionQualificationResult:
|
||||
"""Test QuestionQualificationResult data class."""
|
||||
|
||||
def test_result_attributes(self) -> None:
|
||||
"""Test that result has expected attributes."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationResult,
|
||||
)
|
||||
|
||||
result = QuestionQualificationResult(
|
||||
is_blocked=True,
|
||||
similarity_score=0.95,
|
||||
standard_response="Blocked",
|
||||
matched_question="salary question",
|
||||
matched_question_index=0,
|
||||
reasoning="test",
|
||||
)
|
||||
|
||||
assert result.is_blocked is True
|
||||
assert result.similarity_score == 0.95
|
||||
assert result.standard_response == "Blocked"
|
||||
assert result.matched_question == "salary question"
|
||||
assert result.matched_question_index == 0
|
||||
assert result.reasoning == "test"
|
||||
|
||||
def test_result_defaults(self) -> None:
|
||||
"""Test default values for result."""
|
||||
from onyx.server.query_and_chat.question_qualification import (
|
||||
QuestionQualificationResult,
|
||||
)
|
||||
|
||||
result = QuestionQualificationResult(is_blocked=False)
|
||||
|
||||
assert result.is_blocked is False
|
||||
assert result.similarity_score == 0.0
|
||||
assert result.standard_response == ""
|
||||
assert result.matched_question == ""
|
||||
assert result.matched_question_index == -1
|
||||
assert result.reasoning == ""
|
||||
Reference in New Issue
Block a user